Add missing thirdparty files

This commit is contained in:
Sajid
2024-09-08 17:16:32 +06:00
parent 458577aaee
commit 13ec7258e1
488 changed files with 1066961 additions and 1 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,7 @@
// Capstone definitions fix for X86 LLVM instructions.
let Defs = [EFLAGS] in
def INT1 : I<0xf1, RawFrm, (outs), (ins), "int1", []>;
// def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", [], IIC_FNCLEX>;
def FSETPM : I<0xDB, MRM_E4, (outs), (ins), "fsetpm", []>;

View File

@@ -0,0 +1,111 @@
//===-- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the 3DNow! instruction set, which extends MMX to support
// floating point and also adds a few more random instructions for good measure.
//
//===----------------------------------------------------------------------===//
class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
: I<o, F, outs, ins, asm, pat>, Requires<[Has3DNow]> {
}
class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
: I3DNow<o, F, (outs VR64:$dst), ins,
!strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>, ThreeDNow {
let Constraints = "$src1 = $dst";
}
class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
: I3DNow<o, F, (outs VR64:$dst), ins,
!strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>, ThreeDNow;
multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn,
X86FoldableSchedWrite sched, bit Commutable = 0,
string Ver = ""> {
let isCommutable = Commutable in
def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>,
Sched<[sched]>;
def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
Sched<[sched.Folded, ReadAfterLd]>;
}
multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn,
X86FoldableSchedWrite sched, string Ver = ""> {
def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>,
Sched<[sched]>;
def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn))
(bitconvert (load_mmx addr:$src))))]>,
Sched<[sched.Folded, ReadAfterLd]>;
}
defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>;
defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id", WriteCvtPS2I>;
defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc", WriteFAdd>;
defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd", WriteFAdd, 1>;
defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq", WriteFAdd, 1>;
defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge", WriteFAdd>;
defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt", WriteFAdd>;
defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax", WriteFAdd>;
defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin", WriteFAdd>;
defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul", WriteFAdd, 1>;
defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp", WriteFAdd>;
defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1", WriteFAdd>;
defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2", WriteFAdd>;
defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1", WriteFAdd>;
defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt", WriteFAdd>;
defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub", WriteFAdd, 1>;
defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr", WriteFAdd, 1>;
defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2PS>;
defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>;
let SchedRW = [WriteEMMS] in
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
[(int_x86_mmx_femms)]>, TB;
// PREFETCHWT1 is supported we want to use it for everything but T0.
def PrefetchWLevel : PatFrag<(ops), (i32 imm), [{
return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1();
}]>;
// Use PREFETCHWT1 for NTA, T2, T1.
def PrefetchWT1Level : ImmLeaf<i32, [{
return Imm < 3;
}]>;
let SchedRW = [WriteLoad] in {
let Predicates = [Has3DNow, NoSSEPrefetch] in
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
"prefetch\t$addr",
[(prefetch addr:$addr, imm, imm, (i32 1))]>, TB;
def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
[(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))]>,
TB, Requires<[HasPrefetchW]>;
def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr",
[(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))]>,
TB, Requires<[HasPREFETCHWT1]>;
}
// "3DNowA" instructions
defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", WriteCvtPS2I, "a">;
defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", WriteCvtI2PS, "a">;
defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", WriteFAdd, 0, "a">;
defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", WriteFAdd, 0, "a">;
defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", SchedWriteShuffle.MMX, "a">;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,116 @@
//===-- X86InstrCMovSetCC.td - Conditional Move and SetCC --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 conditional move and set on condition
// instructions.
//
//===----------------------------------------------------------------------===//
// CMOV instructions.
multiclass CMOV<bits<8> opc, string Mnemonic, X86FoldableSchedWrite Sched,
PatLeaf CondNode> {
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
isCommutable = 1, SchedRW = [Sched] in {
def NAME#16rr
: I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
[(set GR16:$dst,
(X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,
TB, OpSize16;
def NAME#32rr
: I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
!strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
[(set GR32:$dst,
(X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>,
TB, OpSize32;
def NAME#64rr
:RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
!strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
[(set GR64:$dst,
(X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
}
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
SchedRW = [Sched.Folded, ReadAfterLd] in {
def NAME#16rm
: I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
[(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
CondNode, EFLAGS))]>, TB, OpSize16;
def NAME#32rm
: I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
!strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
[(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
CondNode, EFLAGS))]>, TB, OpSize32;
def NAME#64rm
:RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
!strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
[(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
CondNode, EFLAGS))]>, TB;
} // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
} // end multiclass
// Conditional Moves.
defm CMOVO : CMOV<0x40, "cmovo" , WriteCMOV, X86_COND_O>;
defm CMOVNO : CMOV<0x41, "cmovno", WriteCMOV, X86_COND_NO>;
defm CMOVB : CMOV<0x42, "cmovb" , WriteCMOV, X86_COND_B>;
defm CMOVAE : CMOV<0x43, "cmovae", WriteCMOV, X86_COND_AE>;
defm CMOVE : CMOV<0x44, "cmove" , WriteCMOV, X86_COND_E>;
defm CMOVNE : CMOV<0x45, "cmovne", WriteCMOV, X86_COND_NE>;
defm CMOVBE : CMOV<0x46, "cmovbe", WriteCMOV2, X86_COND_BE>;
defm CMOVA : CMOV<0x47, "cmova" , WriteCMOV2, X86_COND_A>;
defm CMOVS : CMOV<0x48, "cmovs" , WriteCMOV, X86_COND_S>;
defm CMOVNS : CMOV<0x49, "cmovns", WriteCMOV, X86_COND_NS>;
defm CMOVP : CMOV<0x4A, "cmovp" , WriteCMOV, X86_COND_P>;
defm CMOVNP : CMOV<0x4B, "cmovnp", WriteCMOV, X86_COND_NP>;
defm CMOVL : CMOV<0x4C, "cmovl" , WriteCMOV, X86_COND_L>;
defm CMOVGE : CMOV<0x4D, "cmovge", WriteCMOV, X86_COND_GE>;
defm CMOVLE : CMOV<0x4E, "cmovle", WriteCMOV, X86_COND_LE>;
defm CMOVG : CMOV<0x4F, "cmovg" , WriteCMOV, X86_COND_G>;
// SetCC instructions.
multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
let Uses = [EFLAGS] in {
def r : I<opc, MRMXr, (outs GR8:$dst), (ins),
!strconcat(Mnemonic, "\t$dst"),
[(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>,
TB, Sched<[WriteSETCC]>;
def m : I<opc, MRMXm, (outs), (ins i8mem:$dst),
!strconcat(Mnemonic, "\t$dst"),
[(store (X86setcc OpNode, EFLAGS), addr:$dst)]>,
TB, Sched<[WriteSETCCStore]>;
} // Uses = [EFLAGS]
}
defm SETO : SETCC<0x90, "seto", X86_COND_O>; // is overflow bit set
defm SETNO : SETCC<0x91, "setno", X86_COND_NO>; // is overflow bit not set
defm SETB : SETCC<0x92, "setb", X86_COND_B>; // unsigned less than
defm SETAE : SETCC<0x93, "setae", X86_COND_AE>; // unsigned greater or equal
defm SETE : SETCC<0x94, "sete", X86_COND_E>; // equal to
defm SETNE : SETCC<0x95, "setne", X86_COND_NE>; // not equal to
defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>; // unsigned less than or equal
defm SETA : SETCC<0x97, "seta", X86_COND_A>; // unsigned greater than
defm SETS : SETCC<0x98, "sets", X86_COND_S>; // is signed bit set
defm SETNS : SETCC<0x99, "setns", X86_COND_NS>; // is not signed
defm SETP : SETCC<0x9A, "setp", X86_COND_P>; // is parity bit set
defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>; // is parity bit not set
defm SETL : SETCC<0x9C, "setl", X86_COND_L>; // signed less than
defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal
defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal
defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than
// SALC is an undocumented instruction. Information for this instruction can be found
// here http://www.rcollins.org/secrets/opcodes/SALC.html
// Set AL if carry.
let Uses = [EFLAGS], Defs = [AL], SchedRW = [WriteALU] in {
def SALC : I<0xD6, RawFrm, (outs), (ins), "salc", []>, Requires<[Not64BitMode]>;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,413 @@
//===-- X86InstrControl.td - Control Flow Instructions -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 jump, return, call, and related instructions.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
//
// Return instructions.
//
// The X86retflag return instructions are variadic because we may add ST0 and
// ST1 arguments when returning values on the x87 stack.
let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
def RETL : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret{l}", []>, OpSize32, Requires<[Not64BitMode]>;
def RETQ : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret{q}", []>, OpSize32, Requires<[In64BitMode]>;
def RETW : I <0xC3, RawFrm, (outs), (ins),
"ret{w}", []>, OpSize16;
def RETIL : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret{l}\t$amt", []>, OpSize32, Requires<[Not64BitMode]>;
def RETIQ : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret{q}\t$amt", []>, OpSize32, Requires<[In64BitMode]>;
def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
"ret{w}\t$amt", []>, OpSize16;
def LRETL : I <0xCB, RawFrm, (outs), (ins),
"{l}ret{l|f}", []>, OpSize32;
def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
"{l}ret{|f}q", []>, Requires<[In64BitMode]>;
def LRETW : I <0xCB, RawFrm, (outs), (ins),
"{l}ret{w|f}", []>, OpSize16;
def LRETIL : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
"{l}ret{l|f}\t$amt", []>, OpSize32;
def LRETIQ : RIi16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
"{l}ret{|f}q\t$amt", []>, Requires<[In64BitMode]>;
def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
"{l}ret{w|f}\t$amt", []>, OpSize16;
// The machine return from interrupt instruction, but sometimes we need to
// perform a post-epilogue stack adjustment. Codegen emits the pseudo form
// which expands to include an SP adjustment if necessary.
def IRET16 : I <0xcf, RawFrm, (outs), (ins), "iret{w}", []>,
OpSize16;
def IRET32 : I <0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>, OpSize32;
def IRET64 : RI <0xcf, RawFrm, (outs), (ins), "iretq", []>, Requires<[In64BitMode]>;
// let isCodeGenOnly = 1 in
// def IRET : PseudoI<(outs), (ins i32imm:$adj), [(X86iret timm:$adj)]>;
// def RET : PseudoI<(outs), (ins i32imm:$adj, variable_ops), [(X86retflag timm:$adj)]>;
}
// Unconditional branches.
let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
"jmp\t$dst", [(br bb:$dst)]>;
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
def JMP_2 : Ii16PCRel<0xE9, RawFrm, (outs), (ins brtarget16:$dst),
"jmp\t$dst", []>, OpSize16;
def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget32:$dst),
"jmp\t$dst", []>, OpSize32;
}
}
// Conditional Branches.
let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm,
[(X86brcond bb:$dst, Cond, EFLAGS)]>;
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
def _2 : Ii16PCRel<opc4, RawFrm, (outs), (ins brtarget16:$dst), asm,
[]>, OpSize16, TB;
def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget32:$dst), asm,
[]>, TB, OpSize32;
}
}
}
defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
defm JNO : ICBr<0x71, 0x81, "jno\t$dst", X86_COND_NO>;
defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
// jcx/jecx/jrcx instructions.
let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in {
// These are the 32-bit versions of this instruction for the asmparser. In
// 32-bit mode, the address size prefix is jcxz and the unprefixed version is
// jecxz.
let Uses = [CX] in
def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
"jcxz\t$dst", []>, AdSize16, Requires<[Not64BitMode]>;
let Uses = [ECX] in
def JECXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
"jecxz\t$dst", []>, AdSize32;
let Uses = [RCX] in
def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
"jrcxz\t$dst", []>, AdSize64, Requires<[In64BitMode]>;
}
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def JMP16r : I<0xFF, MRM4r, (outs), (ins GR16:$dst), "jmp{w}\t{*}$dst",
[(brind GR16:$dst)]>, Requires<[Not64BitMode]>,
OpSize16, Sched<[WriteJump]>;
def JMP16m : I<0xFF, MRM4m, (outs), (ins i16mem:$dst), "jmp{w}\t{*}$dst",
[(brind (loadi16 addr:$dst))]>, Requires<[Not64BitMode]>,
OpSize16, Sched<[WriteJumpLd]>;
def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
[(brind GR32:$dst)]>, Requires<[Not64BitMode]>,
OpSize32, Sched<[WriteJump]>;
def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
[(brind (loadi32 addr:$dst))]>, Requires<[Not64BitMode]>,
OpSize32, Sched<[WriteJumpLd]>;
def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
[(brind GR64:$dst)]>, Requires<[In64BitMode]>,
Sched<[WriteJump]>;
def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
[(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>,
Sched<[WriteJumpLd]>;
// Non-tracking jumps for IBT, use with caution.
let isCodeGenOnly = 1 in {
def JMP16r_NT : I<0xFF, MRM4r, (outs), (ins GR16 : $dst), "jmp{w}\t{*}$dst",
[(X86NoTrackBrind GR16 : $dst)]>, Requires<[Not64BitMode]>,
OpSize16, Sched<[WriteJump]>, NOTRACK;
def JMP16m_NT : I<0xFF, MRM4m, (outs), (ins i16mem : $dst), "jmp{w}\t{*}$dst",
[(X86NoTrackBrind (loadi16 addr : $dst))]>,
Requires<[Not64BitMode]>, OpSize16, Sched<[WriteJumpLd]>,
NOTRACK;
def JMP32r_NT : I<0xFF, MRM4r, (outs), (ins GR32 : $dst), "jmp{l}\t{*}$dst",
[(X86NoTrackBrind GR32 : $dst)]>, Requires<[Not64BitMode]>,
OpSize32, Sched<[WriteJump]>, NOTRACK;
def JMP32m_NT : I<0xFF, MRM4m, (outs), (ins i32mem : $dst), "jmp{l}\t{*}$dst",
[(X86NoTrackBrind (loadi32 addr : $dst))]>,
Requires<[Not64BitMode]>, OpSize32, Sched<[WriteJumpLd]>,
NOTRACK;
def JMP64r_NT : I<0xFF, MRM4r, (outs), (ins GR64 : $dst), "jmp{q}\t{*}$dst",
[(X86NoTrackBrind GR64 : $dst)]>, Requires<[In64BitMode]>,
Sched<[WriteJump]>, NOTRACK;
def JMP64m_NT : I<0xFF, MRM4m, (outs), (ins i64mem : $dst), "jmp{q}\t{*}$dst",
[(X86NoTrackBrind(loadi64 addr : $dst))]>,
Requires<[In64BitMode]>, Sched<[WriteJumpLd]>, NOTRACK;
}
let Predicates = [Not64BitMode], AsmVariantName = "att" in {
def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
(ins i16imm:$off, i16imm:$seg),
"ljmp{w}\t$seg : $off", []>,
OpSize16, Sched<[WriteJump]>;
def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
(ins i32imm:$off, i16imm:$seg),
"ljmp{l}\t$seg : $off", []>,
OpSize32, Sched<[WriteJump]>;
}
def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
"ljmp{q}\t{*}$dst", []>, Sched<[WriteJump]>, Requires<[In64BitMode]>;
let AsmVariantName = "att" in
def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
"ljmp{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
"{l}jmp{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
}
// Loop instructions
let SchedRW = [WriteJump] in {
def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
}
//===----------------------------------------------------------------------===//
// Call Instructions...
//
let isCall = 1 in
// All calls clobber the non-callee saved registers. ESP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead. Uses for argument
// registers are added manually.
let Uses = [ESP, SSP] in {
def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i32imm_pcrel:$dst),
"call{l}\t$dst", []>, OpSize32,
Requires<[Not64BitMode]>, Sched<[WriteJump]>;
let hasSideEffects = 0 in
def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
(outs), (ins i16imm_pcrel:$dst),
"call{w}\t$dst", []>, OpSize16,
Sched<[WriteJump]>;
def CALL16r : I<0xFF, MRM2r, (outs), (ins GR16:$dst),
"call{w}\t{*}$dst", [(X86call GR16:$dst)]>,
OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>;
def CALL16m : I<0xFF, MRM2m, (outs), (ins i16mem:$dst),
"call{w}\t{*}$dst", [(X86call (loadi16 addr:$dst))]>,
OpSize16, Requires<[Not64BitMode,FavorMemIndirectCall]>,
Sched<[WriteJumpLd]>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
"call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
Requires<[Not64BitMode,NotUseRetpoline]>, Sched<[WriteJump]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
"call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
OpSize32,
Requires<[Not64BitMode,FavorMemIndirectCall,NotUseRetpoline]>,
Sched<[WriteJumpLd]>;
// Non-tracking calls for IBT, use with caution.
let isCodeGenOnly = 1 in {
def CALL16r_NT : I<0xFF, MRM2r, (outs), (ins GR16 : $dst),
"call{w}\t{*}$dst",[(X86NoTrackCall GR16 : $dst)]>,
OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>, NOTRACK;
def CALL16m_NT : I<0xFF, MRM2m, (outs), (ins i16mem : $dst),
"call{w}\t{*}$dst",[(X86NoTrackCall(loadi16 addr : $dst))]>,
OpSize16, Requires<[Not64BitMode,FavorMemIndirectCall]>,
Sched<[WriteJumpLd]>, NOTRACK;
def CALL32r_NT : I<0xFF, MRM2r, (outs), (ins GR32 : $dst),
"call{l}\t{*}$dst",[(X86NoTrackCall GR32 : $dst)]>,
OpSize32, Requires<[Not64BitMode]>, Sched<[WriteJump]>, NOTRACK;
def CALL32m_NT : I<0xFF, MRM2m, (outs), (ins i32mem : $dst),
"call{l}\t{*}$dst",[(X86NoTrackCall(loadi32 addr : $dst))]>,
OpSize32, Requires<[Not64BitMode,FavorMemIndirectCall]>,
Sched<[WriteJumpLd]>, NOTRACK;
}
let Predicates = [Not64BitMode], AsmVariantName = "att" in {
def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
(ins i16imm:$off, i16imm:$seg),
"lcall{w}\t$seg : $off", []>,
OpSize16, Sched<[WriteJump]>;
def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
(ins i32imm:$off, i16imm:$seg),
"lcall{l}\t$seg : $off", []>,
OpSize32, Sched<[WriteJump]>;
}
def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
"lcall{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
"{l}call{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
}
/*
// Tail call stuff.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
let Uses = [ESP, SSP] in {
def TCRETURNdi : PseudoI<(outs),
(ins i32imm_pcrel:$dst, i32imm:$offset), []>, NotMemoryFoldable;
def TCRETURNri : PseudoI<(outs),
(ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable;
let mayLoad = 1 in
def TCRETURNmi : PseudoI<(outs),
(ins i32mem_TC:$dst, i32imm:$offset), []>;
// FIXME: The should be pseudo instructions that are lowered when going to
// mcinst.
def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
(ins i32imm_pcrel:$dst), "jmp\t$dst", []>;
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
"", []>; // FIXME: Remove encoding when JIT is dead.
let mayLoad = 1 in
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst),
"jmp{l}\t{*}$dst", []>;
}
// Conditional tail calls are similar to the above, but they are branches
// rather than barriers, and they use EFLAGS.
let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
let Uses = [ESP, EFLAGS, SSP] in {
def TCRETURNdicc : PseudoI<(outs),
(ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
// This gets substituted to a conditional jump instruction in MC lowering.
def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
(ins i32imm_pcrel:$dst, i32imm:$cond), "", []>;
}
*/
//===----------------------------------------------------------------------===//
// Call Instructions...
//
// RSP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead. Uses for argument
// registers are added manually.
let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
// NOTE: this pattern doesn't match "X86call imm", because we do not know
// that the offset between an arbitrary immediate and the call will fit in
// the 32-bit pcrel field that we have.
def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i64i32imm_pcrel:$dst),
"call{q}\t$dst", []>, OpSize32,
Requires<[In64BitMode]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
"call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
Requires<[In64BitMode,NotUseRetpoline]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
Requires<[In64BitMode,FavorMemIndirectCall,
NotUseRetpoline]>;
// Non-tracking calls for IBT, use with caution.
let isCodeGenOnly = 1 in {
def CALL64r_NT : I<0xFF, MRM2r, (outs), (ins GR64 : $dst),
"call{q}\t{*}$dst",[(X86NoTrackCall GR64 : $dst)]>,
Requires<[In64BitMode]>, NOTRACK;
def CALL64m_NT : I<0xFF, MRM2m, (outs), (ins i64mem : $dst),
"call{q}\t{*}$dst",
[(X86NoTrackCall(loadi64 addr : $dst))]>,
Requires<[In64BitMode,FavorMemIndirectCall]>, NOTRACK;
}
def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
"lcall{q}\t{*}$dst", []>;
}
/*
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
def TCRETURNdi64 : PseudoI<(outs),
(ins i64i32imm_pcrel:$dst, i32imm:$offset),
[]>;
def TCRETURNri64 : PseudoI<(outs),
(ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable;
let mayLoad = 1 in
def TCRETURNmi64 : PseudoI<(outs),
(ins i64mem_TC:$dst, i32imm:$offset), []>, NotMemoryFoldable;
def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst),
"jmp\t$dst", []>;
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
"jmp{q}\t{*}$dst", []>;
let mayLoad = 1 in
def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
"jmp{q}\t{*}$dst", []>;
// Win64 wants indirect jumps leaving the function to have a REX_W prefix.
let hasREX_WPrefix = 1 in {
def TAILJMPr64_REX : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
"rex64 jmp{q}\t{*}$dst", []>;
let mayLoad = 1 in
def TAILJMPm64_REX : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
"rex64 jmp{q}\t{*}$dst", []>;
}
}
let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
Uses = [RSP, SSP],
usesCustomInserter = 1,
SchedRW = [WriteJump] in {
def RETPOLINE_CALL32 :
PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
Requires<[Not64BitMode,UseRetpoline]>;
def RETPOLINE_CALL64 :
PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
Requires<[In64BitMode,UseRetpoline]>;
// Retpoline variant of indirect tail calls.
let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
def RETPOLINE_TCRETURN64 :
PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>;
def RETPOLINE_TCRETURN32 :
PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;
}
}
// Conditional tail calls are similar to the above, but they are branches
// rather than barriers, and they use EFLAGS.
let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
let Uses = [RSP, EFLAGS, SSP] in {
def TCRETURNdi64cc : PseudoI<(outs),
(ins i64i32imm_pcrel:$dst, i32imm:$offset,
i32imm:$cond), []>;
// This gets substituted to a conditional jump instruction in MC lowering.
def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
(ins i64i32imm_pcrel:$dst, i32imm:$cond), "", []>;
}
*/

View File

@@ -0,0 +1,204 @@
//===-- X86InstrExtension.td - Sign and Zero Extensions ----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the sign and zero extension operations.
//
//===----------------------------------------------------------------------===//
let hasSideEffects = 0 in {
let Defs = [AX], Uses = [AL] in // AX = signext(AL)
def CBW : I<0x98, RawFrm, (outs), (ins),
"{cbtw|cbw}", []>, OpSize16, Sched<[WriteALU]>;
let Defs = [EAX], Uses = [AX] in // EAX = signext(AX)
def CWDE : I<0x98, RawFrm, (outs), (ins),
"{cwtl|cwde}", []>, OpSize32, Sched<[WriteALU]>;
let Defs = [AX,DX], Uses = [AX] in // DX:AX = signext(AX)
def CWD : I<0x99, RawFrm, (outs), (ins),
"{cwtd|cwd}", []>, OpSize16, Sched<[WriteALU]>;
let Defs = [EAX,EDX], Uses = [EAX] in // EDX:EAX = signext(EAX)
def CDQ : I<0x99, RawFrm, (outs), (ins),
"{cltd|cdq}", []>, OpSize32, Sched<[WriteALU]>;
let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX)
def CDQE : RI<0x98, RawFrm, (outs), (ins),
"{cltq|cdqe}", []>, Sched<[WriteALU]>;
let Defs = [RAX,RDX], Uses = [RAX] in // RDX:RAX = signext(RAX)
def CQO : RI<0x99, RawFrm, (outs), (ins),
"{cqto|cqo}", []>, Sched<[WriteALU]>;
}
// Sign/Zero extenders
let hasSideEffects = 0 in {
def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", []>,
TB, OpSize16, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", []>,
TB, OpSize16, Sched<[WriteALULd]>;
} // hasSideEffects = 0
def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sext GR8:$src))]>, TB,
OpSize32, Sched<[WriteALU]>;
def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB,
OpSize32, Sched<[WriteALULd]>;
def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sext GR16:$src))]>, TB,
OpSize32, Sched<[WriteALU]>;
def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sextloadi32i16 addr:$src))]>,
OpSize32, TB, Sched<[WriteALULd]>;
let hasSideEffects = 0 in {
def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", []>,
TB, OpSize16, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", []>,
TB, OpSize16, Sched<[WriteALULd]>;
} // hasSideEffects = 0
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zext GR8:$src))]>, TB,
OpSize32, Sched<[WriteALU]>;
def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB,
OpSize32, Sched<[WriteALULd]>;
def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zext GR16:$src))]>, TB,
OpSize32, Sched<[WriteALU]>;
def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zextloadi32i16 addr:$src))]>,
TB, OpSize32, Sched<[WriteALULd]>;
// These instructions exist as a consequence of operand size prefix having
// control of the destination size, but not the input size. Only support them
// for the disassembler.
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def MOVSX16rr16: I<0xBF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"movs{ww|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
def MOVZX16rr16: I<0xB7, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"movz{ww|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
let mayLoad = 1 in {
def MOVSX16rm16: I<0xBF, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movs{ww|x}\t{$src, $dst|$dst, $src}",
[]>, OpSize16, TB, Sched<[WriteALULd]>, NotMemoryFoldable;
def MOVZX16rm16: I<0xB7, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movz{ww|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize16, Sched<[WriteALULd]>, NotMemoryFoldable;
} // mayLoad = 1
} // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0
// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
// except that they use GR32_NOREX for the output operand register class
// instead of GR32. This allows them to operate on h registers on x86-64.
let hasSideEffects = 0, isCodeGenOnly = 1 in {
def MOVZX32rr8_NOREX : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize32, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX32rm8_NOREX : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize32, Sched<[WriteALULd]>;
def MOVSX32rr8_NOREX : I<0xBE, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize32, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVSX32rm8_NOREX : I<0xBE, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize32, Sched<[WriteALULd]>;
}
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
// operand, which makes it a rare instruction with an 8-bit register
// operand that can never access an h register. If support for h registers
// were generalized, this would require a special register class.
def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sext GR8:$src))]>, TB,
Sched<[WriteALU]>;
def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i8 addr:$src))]>,
TB, Sched<[WriteALULd]>;
def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sext GR16:$src))]>, TB,
Sched<[WriteALU]>;
def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i16 addr:$src))]>,
TB, Sched<[WriteALULd]>;
def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sext GR32:$src))]>,
Sched<[WriteALU]>, Requires<[In64BitMode]>;
def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i32 addr:$src))]>,
Sched<[WriteALULd]>, Requires<[In64BitMode]>;
// movzbq and movzwq encodings for the disassembler
let hasSideEffects = 0 in {
def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
"movz{bq|x}\t{$src, $dst|$dst, $src}", []>,
TB, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
"movz{bq|x}\t{$src, $dst|$dst, $src}", []>,
TB, Sched<[WriteALULd]>;
def MOVZX64rr16 : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
"movz{wq|x}\t{$src, $dst|$dst, $src}", []>,
TB, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX64rm16 : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movz{wq|x}\t{$src, $dst|$dst, $src}", []>,
TB, Sched<[WriteALULd]>;
}
// 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a
// 32-bit register.
def : Pat<(i64 (zext GR8:$src)),
(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8:$src), sub_32bit)>;
def : Pat<(zextloadi64i8 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
def : Pat<(i64 (zext GR16:$src)),
(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16:$src), sub_32bit)>;
def : Pat<(zextloadi64i16 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
// The preferred way to do 32-bit-to-64-bit zero extension on x86-64 is to use a
// SUBREG_TO_REG to utilize implicit zero-extension, however this isn't possible
// when the 32-bit value is defined by a truncate or is copied from something
// where the high bits aren't necessarily all zero. In such cases, we fall back
// to these explicit zext instructions.
def : Pat<(i64 (zext GR32:$src)),
(SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src), sub_32bit)>;
def : Pat<(i64 (zextloadi64i32 addr:$src)),
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;

View File

@@ -0,0 +1,636 @@
//===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes FMA (Fused Multiply-Add) instructions.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// FMA3 - Intel 3 operand Fused Multiply-Add instructions
//===----------------------------------------------------------------------===//
// For all FMA opcodes declared in fma3p_rm_* and fma3s_rm_* multiclasses
// defined below, both the register and memory variants are commutable.
// For the register form the commutable operands are 1, 2 and 3.
// For the memory variant the folded operand must be in 3. Thus,
// in that case, only the operands 1 and 2 can be swapped.
// Commuting some of operands may require the opcode change.
// FMA*213*:
// operands 1 and 2 (memory & register forms): *213* --> *213*(no changes);
// operands 1 and 3 (register forms only): *213* --> *231*;
// operands 2 and 3 (register forms only): *213* --> *132*.
// FMA*132*:
// operands 1 and 2 (memory & register forms): *132* --> *231*;
// operands 1 and 3 (register forms only): *132* --> *132*(no changes);
// operands 2 and 3 (register forms only): *132* --> *213*.
// FMA*231*:
// operands 1 and 2 (memory & register forms): *231* --> *132*;
// operands 1 and 3 (register forms only): *231* --> *213*;
// operands 2 and 3 (register forms only): *231* --> *231*(no changes).
multiclass fma3p_rm_213<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
SDNode Op, X86FoldableSchedWrite sched> {
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, RC:$src1, RC:$src3)))]>,
Sched<[sched]>;
let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, RC:$src1,
(MemFrag addr:$src3))))]>,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
SDNode Op, X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, Sched<[sched]>;
let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3),
RC:$src1)))]>,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
SDNode Op, X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, Sched<[sched]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1,
RC:$src2)))]>,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpcodeStr, string PackTy, string Suff,
PatFrag MemFrag128, PatFrag MemFrag256,
SDNode Op, ValueType OpTy128, ValueType OpTy256,
X86SchedWriteWidths sched> {
defm NAME#213#Suff : fma3p_rm_213<opc213, !strconcat(OpcodeStr, "213", PackTy),
VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
defm NAME#231#Suff : fma3p_rm_231<opc231, !strconcat(OpcodeStr, "231", PackTy),
VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
defm NAME#132#Suff : fma3p_rm_132<opc132, !strconcat(OpcodeStr, "132", PackTy),
VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
defm NAME#213#Suff#Y : fma3p_rm_213<opc213, !strconcat(OpcodeStr, "213", PackTy),
VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
VEX_L;
defm NAME#231#Suff#Y : fma3p_rm_231<opc231, !strconcat(OpcodeStr, "231", PackTy),
VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
VEX_L;
defm NAME#132#Suff#Y : fma3p_rm_132<opc132, !strconcat(OpcodeStr, "132", PackTy),
VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
VEX_L;
}
// Fused Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS",
loadv4f32, loadv8f32, X86Fmadd, v4f32, v8f32,
SchedWriteFMA>;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS",
loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32,
SchedWriteFMA>;
defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", "PS",
loadv4f32, loadv8f32, X86Fmaddsub, v4f32, v8f32,
SchedWriteFMA>;
defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", "PS",
loadv4f32, loadv8f32, X86Fmsubadd, v4f32, v8f32,
SchedWriteFMA>;
}
let ExeDomain = SSEPackedDouble in {
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD",
loadv2f64, loadv4f64, X86Fmadd, v2f64,
v4f64, SchedWriteFMA>, VEX_W;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD",
loadv2f64, loadv4f64, X86Fmsub, v2f64,
v4f64, SchedWriteFMA>, VEX_W;
defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", "PD",
loadv2f64, loadv4f64, X86Fmaddsub,
v2f64, v4f64, SchedWriteFMA>, VEX_W;
defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", "PD",
loadv2f64, loadv4f64, X86Fmsubadd,
v2f64, v4f64, SchedWriteFMA>, VEX_W;
}
// Fused Negative Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", "PS", loadv4f32,
loadv8f32, X86Fnmadd, v4f32, v8f32, SchedWriteFMA>;
defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", "PS", loadv4f32,
loadv8f32, X86Fnmsub, v4f32, v8f32, SchedWriteFMA>;
}
let ExeDomain = SSEPackedDouble in {
defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", "PD", loadv2f64,
loadv4f64, X86Fnmadd, v2f64, v4f64, SchedWriteFMA>, VEX_W;
defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", "PD", loadv2f64,
loadv4f64, X86Fnmsub, v2f64, v4f64, SchedWriteFMA>, VEX_W;
}
// All source register operands of FMA opcodes defined in fma3s_rm multiclass
// can be commuted. In many cases such commute transformation requires an opcode
// adjustment, for example, commuting the operands 1 and 2 in FMA*132 form
// would require an opcode change to FMA*231:
// FMA*132* reg1, reg2, reg3; // reg1 * reg3 + reg2;
// -->
// FMA*231* reg2, reg1, reg3; // reg1 * reg3 + reg2;
// Please see more detailed comment at the very beginning of the section
// defining FMA3 opcodes above.
multiclass fma3s_rm_213<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, RegisterClass RC,
SDPatternOperator OpNode,
X86FoldableSchedWrite sched> {
def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (OpNode RC:$src2, RC:$src1, RC:$src3))]>,
Sched<[sched]>;
let mayLoad = 1 in
def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, RegisterClass RC,
SDPatternOperator OpNode, X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in
def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, Sched<[sched]>;
let mayLoad = 1 in
def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src2, (load addr:$src3), RC:$src1))]>,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, RegisterClass RC,
SDPatternOperator OpNode, X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in
def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, Sched<[sched]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
let mayLoad = 1 in
def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode (load addr:$src3), RC:$src1, RC:$src2))]>,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, string PackTy, string Suff,
SDNode OpNode, RegisterClass RC,
X86MemOperand x86memop, X86FoldableSchedWrite sched> {
defm NAME#213#Suff : fma3s_rm_213<opc213, !strconcat(OpStr, "213", PackTy),
x86memop, RC, OpNode, sched>;
defm NAME#231#Suff : fma3s_rm_231<opc231, !strconcat(OpStr, "231", PackTy),
x86memop, RC, OpNode, sched>;
defm NAME#132#Suff : fma3s_rm_132<opc132, !strconcat(OpStr, "132", PackTy),
x86memop, RC, OpNode, sched>;
}
// These FMA*_Int instructions are defined specially for being used when
// the scalar FMA intrinsics are lowered to machine instructions, and in that
// sense, they are similar to existing ADD*_Int, SUB*_Int, MUL*_Int, etc.
// instructions.
//
// All of the FMA*_Int opcodes are defined as commutable here.
// Commuting the 2nd and 3rd source register operands of FMAs is quite trivial
// and the corresponding optimizations have been developed.
// Commuting the 1st operand of FMA*_Int requires some additional analysis,
// the commute optimization is legal only if all users of FMA*_Int use only
// the lowest element of the FMA*_Int instruction. Even though such analysis
// may be not implemented yet we allow the routines doing the actual commute
// transformation to decide if one or another instruction is commutable or not.
let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
hasSideEffects = 0 in
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
Operand memopr, RegisterClass RC,
X86FoldableSchedWrite sched> {
def r_Int : FMA3S_Int<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, Sched<[sched]>;
let mayLoad = 1 in
def m_Int : FMA3S_Int<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, memopr:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
// The FMA 213 form is created for lowering of scalar FMA intrinscis
// to machine instructions.
// The FMA 132 form can trivially be get by commuting the 2nd and 3rd operands
// of FMA 213 form.
// The FMA 231 form can be get only by commuting the 1st operand of 213 or 132
// forms and is possible only after special analysis of all uses of the initial
// instruction. Such analysis do not exist yet and thus introducing the 231
// form of FMA*_Int instructions is done using an optimistic assumption that
// such analysis will be implemented eventually.
multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, string PackTy, string Suff,
RegisterClass RC, Operand memop,
X86FoldableSchedWrite sched> {
defm NAME#132#Suff : fma3s_rm_int<opc132, !strconcat(OpStr, "132", PackTy),
memop, RC, sched>;
defm NAME#213#Suff : fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
memop, RC, sched>;
defm NAME#231#Suff : fma3s_rm_int<opc231, !strconcat(OpStr, "231", PackTy),
memop, RC, sched>;
}
multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, SDNode OpNode, X86FoldableSchedWrite sched> {
let ExeDomain = SSEPackedSingle in
defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", OpNode,
FR32, f32mem, sched>,
fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", "SS",
VR128, ssmem, sched>;
let ExeDomain = SSEPackedDouble in
defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "SD", OpNode,
FR64, f64mem, sched>,
fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", "SD",
VR128, sdmem, sched>, VEX_W;
}
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadd,
SchedWriteFMA.Scl>, VEX_LIG;
defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsub,
SchedWriteFMA.Scl>, VEX_LIG;
defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadd,
SchedWriteFMA.Scl>, VEX_LIG;
defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsub,
SchedWriteFMA.Scl>, VEX_LIG;
multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
SDNode Move, ValueType VT, ValueType EltVT,
RegisterClass RC, PatFrag mem_frag> {
let Predicates = [HasFMA, NoAVX512] in {
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
(Op RC:$src2,
(EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
RC:$src3))))),
(!cast<Instruction>(Prefix#"213"#Suffix#"r_Int")
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
(Op RC:$src2, RC:$src3,
(EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))),
(!cast<Instruction>(Prefix#"231"#Suffix#"r_Int")
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
(Op RC:$src2,
(EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
(mem_frag addr:$src3)))))),
(!cast<Instruction>(Prefix#"213"#Suffix#"m_Int")
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
addr:$src3)>;
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
(Op (EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
(mem_frag addr:$src3), RC:$src2))))),
(!cast<Instruction>(Prefix#"132"#Suffix#"m_Int")
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
addr:$src3)>;
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
(Op RC:$src2, (mem_frag addr:$src3),
(EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))),
(!cast<Instruction>(Prefix#"231"#Suffix#"m_Int")
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
addr:$src3)>;
}
}
defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
//===----------------------------------------------------------------------===//
// FMA4 - AMD 4 operand Fused Multiply-Add instructions
//===----------------------------------------------------------------------===//
multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
PatFrag mem_frag, X86FoldableSchedWrite sched> {
let isCommutable = 1 in
def rr : FMA4S<opc, MRMSrcRegOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG,
Sched<[sched]>;
def rm : FMA4S<opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2,
(mem_frag addr:$src3)))]>, VEX_W, VEX_LIG,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
def mr : FMA4S<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG,
Sched<[sched.Folded, ReadAfterLd,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : FMA4S<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
VEX_LIG, FoldGenData<NAME#rr>, Sched<[sched]>;
}
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
ValueType VT, X86FoldableSchedWrite sched> {
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, VEX_W, VEX_LIG, Sched<[sched]>;
let mayLoad = 1 in
def rm_Int : FMA4S_Int<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, VEX_W, VEX_LIG,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
let mayLoad = 1 in
def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>,
VEX_LIG, Sched<[sched.Folded, ReadAfterLd,
// memop:$src2
ReadDefault, ReadDefault, ReadDefault,
ReadDefault, ReadDefault,
// VR128::$src3
ReadAfterLd]>;
def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, VEX_LIG, FoldGenData<NAME#rr_Int>, Sched<[sched]>;
} // isCodeGenOnly = 1
}
multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT128, ValueType OpVT256,
PatFrag ld_frag128, PatFrag ld_frag256,
X86SchedWriteWidths sched> {
let isCommutable = 1 in
def rr : FMA4<opc, MRMSrcRegOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>,
VEX_W, Sched<[sched.XMM]>;
def rm : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
(ld_frag128 addr:$src3)))]>, VEX_W,
Sched<[sched.XMM.Folded, ReadAfterLd, ReadAfterLd]>;
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>,
Sched<[sched.XMM.Folded, ReadAfterLd,
// f128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR128::$src3
ReadAfterLd]>;
let isCommutable = 1 in
def Yrr : FMA4<opc, MRMSrcRegOp4, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst,
(OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>,
VEX_W, VEX_L, Sched<[sched.YMM]>;
def Yrm : FMA4<opc, MRMSrcMemOp4, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
(ld_frag256 addr:$src3)))]>, VEX_W, VEX_L,
Sched<[sched.YMM.Folded, ReadAfterLd, ReadAfterLd]>;
def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (OpNode VR256:$src1,
(ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L,
Sched<[sched.YMM.Folded, ReadAfterLd,
// f256mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR256::$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
Sched<[sched.XMM]>, FoldGenData<NAME#rr>;
def Yrr_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
VEX_L, Sched<[sched.YMM]>, FoldGenData<NAME#Yrr>;
} // isCodeGenOnly = 1
}
let ExeDomain = SSEPackedSingle in {
// Scalar Instructions
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32,
SchedWriteFMA.Scl>,
fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
SchedWriteFMA.Scl>;
defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32,
SchedWriteFMA.Scl>,
fma4s_int<0x6E, "vfmsubss", ssmem, v4f32,
SchedWriteFMA.Scl>;
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
X86Fnmadd, loadf32, SchedWriteFMA.Scl>,
fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32,
SchedWriteFMA.Scl>;
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
X86Fnmsub, loadf32, SchedWriteFMA.Scl>,
fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
}
let ExeDomain = SSEPackedDouble in {
// Scalar Instructions
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64,
SchedWriteFMA.Scl>,
fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64,
SchedWriteFMA.Scl>,
fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
X86Fnmadd, loadf64, SchedWriteFMA.Scl>,
fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
X86Fnmsub, loadf64, SchedWriteFMA.Scl>,
fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
}
multiclass scalar_fma4_patterns<SDNode Op, string Name,
ValueType VT, ValueType EltVT,
RegisterClass RC, PatFrag mem_frag> {
let Predicates = [HasFMA4] in {
def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
(Op RC:$src1, RC:$src2, RC:$src3))))),
(!cast<Instruction>(Name#"rr_Int")
(VT (COPY_TO_REGCLASS RC:$src1, VR128)),
(VT (COPY_TO_REGCLASS RC:$src2, VR128)),
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
(Op RC:$src1, RC:$src2,
(mem_frag addr:$src3)))))),
(!cast<Instruction>(Name#"rm_Int")
(VT (COPY_TO_REGCLASS RC:$src1, VR128)),
(VT (COPY_TO_REGCLASS RC:$src2, VR128)), addr:$src3)>;
def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
(Op RC:$src1, (mem_frag addr:$src2),
RC:$src3))))),
(!cast<Instruction>(Name#"mr_Int")
(VT (COPY_TO_REGCLASS RC:$src1, VR128)), addr:$src2,
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
}
}
defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;

View File

@@ -0,0 +1,748 @@
//===- X86InstrFPStack.td - FPU Instruction Set ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 x87 FPU instruction set, defining the
// instructions, and properties of the instructions which are needed for code
// generation, machine code emission, and analysis.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// FPStack specific DAG Nodes.
//===----------------------------------------------------------------------===//
def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
SDTCisVT<1, f80>]>;
def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
[SDNPHasChain, SDNPInGlue, SDNPMayStore,
SDNPMemOperand]>;
def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
SDNPMemOperand]>;
def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
[SDNPHasChain, SDNPMayStore, SDNPSideEffect,
SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// FPStack pattern fragments
//===----------------------------------------------------------------------===//
def fpimm0 : FPImmLeaf<fAny, [{
return Imm.isExactlyValue(+0.0);
}]>;
def fpimmneg0 : FPImmLeaf<fAny, [{
return Imm.isExactlyValue(-0.0);
}]>;
def fpimm1 : FPImmLeaf<fAny, [{
return Imm.isExactlyValue(+1.0);
}]>;
def fpimmneg1 : FPImmLeaf<fAny, [{
return Imm.isExactlyValue(-1.0);
}]>;
/*
// Some 'special' instructions - expanded after instruction selection.
let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
[(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
[(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
[(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
[(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
[(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
[(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
[(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
[(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
[(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
}
*/
// All FP Stack operations are represented with four instructions here. The
// first three instructions, generated by the instruction selector, use "RFP32"
// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
// 64-bit or 80-bit floating point values. These sizes apply to the values,
// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
// copied to each other without losing information. These instructions are all
// pseudo instructions and use the "_Fp" suffix.
// In some cases there are additional variants with a mixture of different
// register sizes.
// The second instruction is defined with FPI, which is the actual instruction
// emitted by the assembler. These use "RST" registers, although frequently
// the actual register(s) used are implicit. These are always 80 bits.
// The FP stackifier pass converts one to the other after register allocation
// occurs.
//
// Note that the FpI instruction should have instruction selection info (e.g.
// a pattern) and the FPI instruction should have emission info (e.g. opcode
// encoding and asm printing info).
// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
// f80 instructions cannot use SSE and use neither of these.
class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
// Factoring for arithmetic.
multiclass FPBinary_rr<SDNode OpNode> {
// Register op register -> register
// These are separated out because they have no reversed form.
def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
[(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
[(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
[(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
}
// The FopST0 series are not included here because of the irregularities
// in where the 'r' goes in assembly output.
// These instructions cannot address 80-bit memory.
multiclass FPBinary<SDNode OpNode, Format fp, string asmstring,
bit Forward = 1> {
let mayLoad = 1, hasSideEffects = 1 in {
// ST(0) = ST(0) + [mem]
def _Fp32m : FpIf32<(outs RFP32:$dst),
(ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
[!if(Forward,
(set RFP32:$dst,
(OpNode RFP32:$src1, (loadf32 addr:$src2))),
(set RFP32:$dst,
(OpNode (loadf32 addr:$src2), RFP32:$src1)))]>;
def _Fp64m : FpIf64<(outs RFP64:$dst),
(ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
[!if(Forward,
(set RFP64:$dst,
(OpNode RFP64:$src1, (loadf64 addr:$src2))),
(set RFP64:$dst,
(OpNode (loadf64 addr:$src2), RFP64:$src1)))]>;
def _Fp64m32: FpIf64<(outs RFP64:$dst),
(ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
[!if(Forward,
(set RFP64:$dst,
(OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2)))),
(set RFP64:$dst,
(OpNode (f64 (extloadf32 addr:$src2)), RFP64:$src1)))]>;
def _Fp80m32: FpI_<(outs RFP80:$dst),
(ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
[!if(Forward,
(set RFP80:$dst,
(OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2)))),
(set RFP80:$dst,
(OpNode (f80 (extloadf32 addr:$src2)), RFP80:$src1)))]>;
def _Fp80m64: FpI_<(outs RFP80:$dst),
(ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
[!if(Forward,
(set RFP80:$dst,
(OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))),
(set RFP80:$dst,
(OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>;
def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
!strconcat("f", asmstring, "{s}\t$src")>;
def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
!strconcat("f", asmstring, "{l}\t$src")>;
// ST(0) = ST(0) + [memint]
def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP32:$dst,
(OpNode RFP32:$src1, (X86fild addr:$src2, i16))),
(set RFP32:$dst,
(OpNode (X86fild addr:$src2, i16), RFP32:$src1)))]>;
def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP32:$dst,
(OpNode RFP32:$src1, (X86fild addr:$src2, i32))),
(set RFP32:$dst,
(OpNode (X86fild addr:$src2, i32), RFP32:$src1)))]>;
def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP64:$dst,
(OpNode RFP64:$src1, (X86fild addr:$src2, i16))),
(set RFP64:$dst,
(OpNode (X86fild addr:$src2, i16), RFP64:$src1)))]>;
def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP64:$dst,
(OpNode RFP64:$src1, (X86fild addr:$src2, i32))),
(set RFP64:$dst,
(OpNode (X86fild addr:$src2, i32), RFP64:$src1)))]>;
def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP80:$dst,
(OpNode RFP80:$src1, (X86fild addr:$src2, i16))),
(set RFP80:$dst,
(OpNode (X86fild addr:$src2, i16), RFP80:$src1)))]>;
def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP80:$dst,
(OpNode RFP80:$src1, (X86fild addr:$src2, i32))),
(set RFP80:$dst,
(OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>;
def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
!strconcat("fi", asmstring, "{s}\t$src")>;
def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
!strconcat("fi", asmstring, "{l}\t$src")>;
} // mayLoad = 1, hasSideEffects = 1
}
let Defs = [FPSW] in {
// FPBinary_rr just defines pseudo-instructions, no need to set a scheduling
// resources.
let hasNoSchedulingInfo = 1 in {
defm ADD : FPBinary_rr<fadd>;
defm SUB : FPBinary_rr<fsub>;
defm MUL : FPBinary_rr<fmul>;
defm DIV : FPBinary_rr<fdiv>;
}
// Sets the scheduling resources for the actual NAME#_F<size>m definitions.
let SchedRW = [WriteFAddLd] in {
defm ADD : FPBinary<fadd, MRM0m, "add">;
defm SUB : FPBinary<fsub, MRM4m, "sub">;
defm SUBR: FPBinary<fsub ,MRM5m, "subr", 0>;
}
let SchedRW = [WriteFMulLd] in {
defm MUL : FPBinary<fmul, MRM1m, "mul">;
}
let SchedRW = [WriteFDivLd] in {
defm DIV : FPBinary<fdiv, MRM6m, "div">;
defm DIVR: FPBinary<fdiv, MRM7m, "divr", 0>;
}
} // Defs = [FPSW]
class FPST0rInst<Format fp, string asm>
: FPI<0xD8, fp, (outs), (ins RST:$op), asm>;
class FPrST0Inst<Format fp, string asm>
: FPI<0xDC, fp, (outs), (ins RST:$op), asm>;
class FPrST0PInst<Format fp, string asm>
: FPI<0xDE, fp, (outs), (ins RST:$op), asm>;
// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
// of some of the 'reverse' forms of the fsub and fdiv instructions. As such,
// we have to put some 'r's in and take them out of weird places.
let SchedRW = [WriteFAdd] in {
def ADD_FST0r : FPST0rInst <MRM0r, "fadd\t$op">;
def ADD_FrST0 : FPrST0Inst <MRM0r, "fadd\t{%st(0), $op|$op, st(0)}">;
def ADD_FPrST0 : FPrST0PInst<MRM0r, "faddp\t$op">;
def SUBR_FST0r : FPST0rInst <MRM5r, "fsubr\t$op">;
def SUB_FrST0 : FPrST0Inst <MRM5r, "fsub{r}\t{%st(0), $op|$op, st(0)}">;
def SUB_FPrST0 : FPrST0PInst<MRM5r, "fsub{r}p\t$op">;
def SUB_FST0r : FPST0rInst <MRM4r, "fsub\t$op">;
def SUBR_FrST0 : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">;
def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">;
} // SchedRW
let SchedRW = [WriteFCom] in {
def COM_FST0r : FPST0rInst <MRM2r, "fcom\t$op">;
def COMP_FST0r : FPST0rInst <MRM3r, "fcomp\t$op">;
} // SchedRW
let SchedRW = [WriteFMul] in {
def MUL_FST0r : FPST0rInst <MRM1r, "fmul\t$op">;
def MUL_FrST0 : FPrST0Inst <MRM1r, "fmul\t{%st(0), $op|$op, st(0)}">;
def MUL_FPrST0 : FPrST0PInst<MRM1r, "fmulp\t$op">;
} // SchedRW
let SchedRW = [WriteFDiv] in {
def DIVR_FST0r : FPST0rInst <MRM7r, "fdivr\t$op">;
def DIV_FrST0 : FPrST0Inst <MRM7r, "fdiv{r}\t{%st(0), $op|$op, st(0)}">;
def DIV_FPrST0 : FPrST0PInst<MRM7r, "fdiv{r}p\t$op">;
def DIV_FST0r : FPST0rInst <MRM6r, "fdiv\t$op">;
def DIVR_FrST0 : FPrST0Inst <MRM6r, "fdiv{|r}\t{%st(0), $op|$op, st(0)}">;
def DIVR_FPrST0 : FPrST0PInst<MRM6r, "fdiv{|r}p\t$op">;
} // SchedRW
// Unary operations.
multiclass FPUnary<SDNode OpNode, Format fp, string asmstring> {
def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
[(set RFP32:$dst, (OpNode RFP32:$src))]>;
def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
[(set RFP64:$dst, (OpNode RFP64:$src))]>;
def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
[(set RFP80:$dst, (OpNode RFP80:$src))]>;
def _F : FPI<0xD9, fp, (outs), (ins), asmstring>;
}
let Defs = [FPSW] in {
let SchedRW = [WriteFSign] in {
defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
defm ABS : FPUnary<fabs, MRM_E1, "fabs">;
}
let SchedRW = [WriteFSqrt80] in
defm SQRT: FPUnary<fsqrt,MRM_FA, "fsqrt">;
let SchedRW = [WriteMicrocoded] in {
defm SIN : FPUnary<fsin, MRM_FE, "fsin">;
defm COS : FPUnary<fcos, MRM_FF, "fcos">;
}
let SchedRW = [WriteFCom] in {
let hasSideEffects = 0 in {
def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
} // hasSideEffects
def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
} // SchedRW
} // Defs = [FPSW]
// Versions of FP instructions that take a single memory operand. Added for the
// disassembler; remove as they are included with patterns elsewhere.
let SchedRW = [WriteFComLd] in {
def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">;
def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
} // SchedRW
let SchedRW = [WriteMicrocoded] in {
def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins f32mem:$dst), "fnstenv\t$dst">;
def FRSTORm : FPI<0xDD, MRM4m, (outs), (ins f32mem:$dst), "frstor\t$dst">;
def FSAVEm : FPI<0xDD, MRM6m, (outs), (ins f32mem:$dst), "fnsave\t$dst">;
def FNSTSWm : FPI<0xDD, MRM7m, (outs), (ins i16mem:$dst), "fnstsw\t$dst">;
def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f80mem:$src), "fbld\ttbyte ptr $src">;
def FBSTPm : FPI<0xDF, MRM6m, (outs), (ins f80mem:$dst), "fbstp\ttbyte ptr $dst">;
} // SchedRW
// Floating point cmovs.
class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32, HasCMov]>;
class FpIf64CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64, HasCMov]>;
multiclass FPCMov<PatLeaf cc> {
def _Fp32 : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
CondMovFP,
[(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
cc, EFLAGS))]>;
def _Fp64 : FpIf64CMov<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
CondMovFP,
[(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
cc, EFLAGS))]>;
def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
CondMovFP,
[(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
cc, EFLAGS))]>,
Requires<[HasCMov]>;
}
let Defs = [FPSW] in {
let SchedRW = [WriteFCMOV] in {
let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
defm CMOVB : FPCMov<X86_COND_B>;
defm CMOVBE : FPCMov<X86_COND_BE>;
defm CMOVE : FPCMov<X86_COND_E>;
defm CMOVP : FPCMov<X86_COND_P>;
defm CMOVNB : FPCMov<X86_COND_AE>;
defm CMOVNBE: FPCMov<X86_COND_A>;
defm CMOVNE : FPCMov<X86_COND_NE>;
defm CMOVNP : FPCMov<X86_COND_NP>;
} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
let Predicates = [HasCMov] in {
// These are not factored because there's no clean way to pass DA/DB.
def CMOVB_F : FPI<0xDA, MRM0r, (outs), (ins RST:$op),
"fcmovb\t{$op, %st(0)|st(0), $op}">;
def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RST:$op),
"fcmovbe\t{$op, %st(0)|st(0), $op}">;
def CMOVE_F : FPI<0xDA, MRM1r, (outs), (ins RST:$op),
"fcmove\t{$op, %st(0)|st(0), $op}">;
def CMOVP_F : FPI<0xDA, MRM3r, (outs), (ins RST:$op),
"fcmovu\t{$op, %st(0)|st(0), $op}">;
def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RST:$op),
"fcmovnb\t{$op, %st(0)|st(0), $op}">;
def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RST:$op),
"fcmovnbe\t{$op, %st(0)|st(0), $op}">;
def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RST:$op),
"fcmovne\t{$op, %st(0)|st(0), $op}">;
def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RST:$op),
"fcmovnu\t{$op, %st(0)|st(0), $op}">;
} // Predicates = [HasCMov]
} // SchedRW
// Floating point loads & stores.
let SchedRW = [WriteLoad] in {
let canFoldAsLoad = 1 in {
def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP32:$dst, (loadf32 addr:$src))]>;
let isReMaterializable = 1 in
def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
[(set RFP64:$dst, (loadf64 addr:$src))]>;
def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
[(set RFP80:$dst, (loadf80 addr:$src))]>;
} // canFoldAsLoad
def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
[(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
[(set RFP32:$dst, (X86fild addr:$src, i16))]>;
def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
[(set RFP32:$dst, (X86fild addr:$src, i32))]>;
def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
[(set RFP32:$dst, (X86fild addr:$src, i64))]>;
def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
[(set RFP64:$dst, (X86fild addr:$src, i16))]>;
def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
[(set RFP64:$dst, (X86fild addr:$src, i32))]>;
def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
[(set RFP64:$dst, (X86fild addr:$src, i64))]>;
def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
[(set RFP80:$dst, (X86fild addr:$src, i16))]>;
def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
[(set RFP80:$dst, (X86fild addr:$src, i32))]>;
def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
[(set RFP80:$dst, (X86fild addr:$src, i64))]>;
} // SchedRW
let SchedRW = [WriteStore] in {
def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
[(store RFP32:$src, addr:$op)]>;
def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
[(truncstoref32 RFP64:$src, addr:$op)]>;
def ST_Fp64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
[(store RFP64:$src, addr:$op)]>;
def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
[(truncstoref32 RFP80:$src, addr:$op)]>;
def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP,
[(truncstoref64 RFP80:$src, addr:$op)]>;
// FST does not support 80-bit memory target; FSTP must be used.
let mayStore = 1, hasSideEffects = 0 in {
def ST_FpP32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
def ST_FpP80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
} // mayStore
def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
[(store RFP80:$src, addr:$op)]>;
let mayStore = 1, hasSideEffects = 0 in {
def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
} // mayStore
} // SchedRW
let mayLoad = 1, SchedRW = [WriteLoad] in {
def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
}
let mayStore = 1, SchedRW = [WriteStore] in {
def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
def ST_FP64m : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">;
def ST_FP80m : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">;
def IST_F16m : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">;
def IST_F32m : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">;
def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">;
def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">;
def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
}
// FISTTP requires SSE3 even though it's a FPStack op.
let Predicates = [HasSSE3], SchedRW = [WriteStore] in {
def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
[(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
[(X86fp_to_i32mem RFP32:$src, addr:$op)]>;
def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
[(X86fp_to_i64mem RFP32:$src, addr:$op)]>;
def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
[(X86fp_to_i16mem RFP64:$src, addr:$op)]>;
def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
[(X86fp_to_i32mem RFP64:$src, addr:$op)]>;
def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
[(X86fp_to_i64mem RFP64:$src, addr:$op)]>;
def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
[(X86fp_to_i16mem RFP80:$src, addr:$op)]>;
def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
[(X86fp_to_i32mem RFP80:$src, addr:$op)]>;
def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
[(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
} // Predicates = [HasSSE3]
let mayStore = 1, SchedRW = [WriteStore] in {
def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">;
}
// FP Stack manipulation instructions.
let SchedRW = [WriteMove] in {
def LD_Frr : FPI<0xD9, MRM0r, (outs), (ins RST:$op), "fld\t$op">;
def ST_Frr : FPI<0xDD, MRM2r, (outs), (ins RST:$op), "fst\t$op">;
def ST_FPrr : FPI<0xDD, MRM3r, (outs), (ins RST:$op), "fstp\t$op">;
def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RST:$op), "fxch\t$op">;
}
// Floating point constant loads.
let isReMaterializable = 1, SchedRW = [WriteZero] in {
def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
[(set RFP32:$dst, fpimm0)]>;
def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
[(set RFP32:$dst, fpimm1)]>;
def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
[(set RFP64:$dst, fpimm0)]>;
def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
[(set RFP64:$dst, fpimm1)]>;
def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
[(set RFP80:$dst, fpimm0)]>;
def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
[(set RFP80:$dst, fpimm1)]>;
}
let SchedRW = [WriteFLD0] in
def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
let SchedRW = [WriteFLD1] in
def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
let SchedRW = [WriteFLDC], Defs = [FPSW] in {
def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
def FLDLG2 : I<0xD9, MRM_EC, (outs), (ins), "fldlg2", []>;
def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", []>;
} // SchedRW
// Floating point compares.
let SchedRW = [WriteFCom] in {
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>;
def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>;
} // SchedRW
} // Defs = [FPSW]
let SchedRW = [WriteFCom] in {
// CC = ST(0) cmp ST(i)
let Defs = [EFLAGS, FPSW] in {
def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
}
let Defs = [FPSW], Uses = [ST0] in {
def UCOM_Fr : FPI<0xDD, MRM4r, // FPSW = cmp ST(0) with ST(i)
(outs), (ins RST:$reg), "fucom\t$reg">;
def UCOM_FPr : FPI<0xDD, MRM5r, // FPSW = cmp ST(0) with ST(i), pop
(outs), (ins RST:$reg), "fucomp\t$reg">;
def UCOM_FPPr : FPI<0xDA, MRM_E9, // cmp ST(0) with ST(1), pop, pop
(outs), (ins), "fucompp">;
}
let Defs = [EFLAGS, FPSW], Uses = [ST0] in {
def UCOM_FIr : FPI<0xDB, MRM5r, // CC = cmp ST(0) with ST(i)
(outs), (ins RST:$reg), "fucomi\t$reg">;
def UCOM_FIPr : FPI<0xDF, MRM5r, // CC = cmp ST(0) with ST(i), pop
(outs), (ins RST:$reg), "fucompi\t$reg">;
}
let Defs = [EFLAGS, FPSW] in {
def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RST:$reg), "fcomi\t$reg">;
def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RST:$reg), "fcompi\t$reg">;
}
} // SchedRW
// Floating point flag ops.
let SchedRW = [WriteALU] in {
let Defs = [AX], Uses = [FPSW] in
def FNSTSW16r : I<0xDF, MRM_E0, // AX = fp flags
(outs), (ins), "fnstsw\t{%ax|ax}",
[(set AX, (X86fp_stsw FPSW))]>;
let Defs = [FPSW] in
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
(outs), (ins i16mem:$dst), "fnstcw\t$dst",
[(X86fp_cwd_get16 addr:$dst)]>;
} // SchedRW
let Defs = [FPSW], mayLoad = 1 in
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
(outs), (ins i16mem:$dst), "fldcw\t$dst", []>,
Sched<[WriteLoad]>;
// FPU control instructions
let SchedRW = [WriteMicrocoded] in {
let Defs = [FPSW] in {
def FNINIT : I<0xDB, MRM_E3, (outs), (ins), "fninit", []>;
def FFREE : FPI<0xDD, MRM0r, (outs), (ins RST:$reg), "ffree\t$reg">;
def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RST:$reg), "ffreep\t$reg">;
def FPNCEST0r : FPI<0xD9, MRM3r, (outs RST:$op), (ins),
"fstpnce\t{%st(0), $op|$op, st(0)}">;
def FENI8087_NOP : I<0xDB, MRM_E0, (outs), (ins), "feni8087_nop", []>;
def FDISI8087_NOP : I<0xDB, MRM_E1, (outs), (ins), "fdisi8087_nop", []>;
// Clear exceptions
def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", []>;
} // Defs = [FPSW]
} // SchedRW
// Operand-less floating-point instructions for the disassembler.
def FNOP : I<0xD9, MRM_D0, (outs), (ins), "fnop", []>, Sched<[WriteNop]>;
let SchedRW = [WriteMicrocoded] in {
let Defs = [FPSW] in {
def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
def FXAM : I<0xD9, MRM_E5, (outs), (ins), "fxam", []>;
def F2XM1 : I<0xD9, MRM_F0, (outs), (ins), "f2xm1", []>;
def FYL2X : I<0xD9, MRM_F1, (outs), (ins), "fyl2x", []>;
def FPTAN : I<0xD9, MRM_F2, (outs), (ins), "fptan", []>;
def FPATAN : I<0xD9, MRM_F3, (outs), (ins), "fpatan", []>;
def FXTRACT : I<0xD9, MRM_F4, (outs), (ins), "fxtract", []>;
def FPREM1 : I<0xD9, MRM_F5, (outs), (ins), "fprem1", []>;
def FDECSTP : I<0xD9, MRM_F6, (outs), (ins), "fdecstp", []>;
def FINCSTP : I<0xD9, MRM_F7, (outs), (ins), "fincstp", []>;
def FPREM : I<0xD9, MRM_F8, (outs), (ins), "fprem", []>;
def FYL2XP1 : I<0xD9, MRM_F9, (outs), (ins), "fyl2xp1", []>;
def FSINCOS : I<0xD9, MRM_FB, (outs), (ins), "fsincos", []>;
def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", []>;
def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", []>;
def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", []>;
} // Defs = [FPSW]
def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
"fxsave\t$dst", [(int_x86_fxsave addr:$dst)]>, TB,
Requires<[HasFXSR]>;
def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
"fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)]>,
TB, Requires<[HasFXSR, In64BitMode]>;
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaquemem:$src),
"fxrstor\t$src", [(int_x86_fxrstor addr:$src)]>,
TB, Requires<[HasFXSR]>;
def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaquemem:$src),
"fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)]>,
TB, Requires<[HasFXSR, In64BitMode]>;
} // SchedRW
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
// Required for RET of f32 / f64 / f80 values.
def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
// Required for CALL which return f32 / f64 / f80 values.
def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
RFP64:$src)>;
def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
RFP80:$src)>;
def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
RFP80:$src)>;
def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
RFP80:$src)>;
// Floating point constant -0.0 and -1.0
def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
// Used to conv. i64 to f64 since there isn't a SSE version.
def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
// FP extensions map onto simple pseudo-value conversions if they are to/from
// the FP stack.
def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
Requires<[FPStackf32]>;
def : Pat<(f80 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
Requires<[FPStackf32]>;
def : Pat<(f80 (fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
Requires<[FPStackf64]>;
// FP truncations map onto simple pseudo-value conversions if they are to/from
// the FP stack. We have validated that only value-preserving truncations make
// it through isel.
def : Pat<(f32 (fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
Requires<[FPStackf32]>;
def : Pat<(f32 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
Requires<[FPStackf32]>;
def : Pat<(f64 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
Requires<[FPStackf64]>;

View File

@@ -0,0 +1,993 @@
//===-- X86InstrFormats.td - X86 Instruction Formats -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
//
// Format specifies the encoding used by the instruction. This is part of the
// ad-hoc solution used to emit machine instruction encodings by our machine
// code emitter.
class Format<bits<7> val> {
bits<7> Value = val;
}
def Pseudo : Format<0>;
def RawFrm : Format<1>;
def AddRegFrm : Format<2>;
def RawFrmMemOffs : Format<3>;
def RawFrmSrc : Format<4>;
def RawFrmDst : Format<5>;
def RawFrmDstSrc : Format<6>;
def RawFrmImm8 : Format<7>;
def RawFrmImm16 : Format<8>;
def MRMDestMem : Format<32>;
def MRMSrcMem : Format<33>;
def MRMSrcMem4VOp3 : Format<34>;
def MRMSrcMemOp4 : Format<35>;
def MRMXm : Format<39>;
def MRM0m : Format<40>; def MRM1m : Format<41>; def MRM2m : Format<42>;
def MRM3m : Format<43>; def MRM4m : Format<44>; def MRM5m : Format<45>;
def MRM6m : Format<46>; def MRM7m : Format<47>;
def MRMDestReg : Format<48>;
def MRMSrcReg : Format<49>;
def MRMSrcReg4VOp3 : Format<50>;
def MRMSrcRegOp4 : Format<51>;
def MRMXr : Format<55>;
def MRM0r : Format<56>; def MRM1r : Format<57>; def MRM2r : Format<58>;
def MRM3r : Format<59>; def MRM4r : Format<60>; def MRM5r : Format<61>;
def MRM6r : Format<62>; def MRM7r : Format<63>;
def MRM_C0 : Format<64>; def MRM_C1 : Format<65>; def MRM_C2 : Format<66>;
def MRM_C3 : Format<67>; def MRM_C4 : Format<68>; def MRM_C5 : Format<69>;
def MRM_C6 : Format<70>; def MRM_C7 : Format<71>; def MRM_C8 : Format<72>;
def MRM_C9 : Format<73>; def MRM_CA : Format<74>; def MRM_CB : Format<75>;
def MRM_CC : Format<76>; def MRM_CD : Format<77>; def MRM_CE : Format<78>;
def MRM_CF : Format<79>; def MRM_D0 : Format<80>; def MRM_D1 : Format<81>;
def MRM_D2 : Format<82>; def MRM_D3 : Format<83>; def MRM_D4 : Format<84>;
def MRM_D5 : Format<85>; def MRM_D6 : Format<86>; def MRM_D7 : Format<87>;
def MRM_D8 : Format<88>; def MRM_D9 : Format<89>; def MRM_DA : Format<90>;
def MRM_DB : Format<91>; def MRM_DC : Format<92>; def MRM_DD : Format<93>;
def MRM_DE : Format<94>; def MRM_DF : Format<95>; def MRM_E0 : Format<96>;
def MRM_E1 : Format<97>; def MRM_E2 : Format<98>; def MRM_E3 : Format<99>;
def MRM_E4 : Format<100>; def MRM_E5 : Format<101>; def MRM_E6 : Format<102>;
def MRM_E7 : Format<103>; def MRM_E8 : Format<104>; def MRM_E9 : Format<105>;
def MRM_EA : Format<106>; def MRM_EB : Format<107>; def MRM_EC : Format<108>;
def MRM_ED : Format<109>; def MRM_EE : Format<110>; def MRM_EF : Format<111>;
def MRM_F0 : Format<112>; def MRM_F1 : Format<113>; def MRM_F2 : Format<114>;
def MRM_F3 : Format<115>; def MRM_F4 : Format<116>; def MRM_F5 : Format<117>;
def MRM_F6 : Format<118>; def MRM_F7 : Format<119>; def MRM_F8 : Format<120>;
def MRM_F9 : Format<121>; def MRM_FA : Format<122>; def MRM_FB : Format<123>;
def MRM_FC : Format<124>; def MRM_FD : Format<125>; def MRM_FE : Format<126>;
def MRM_FF : Format<127>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
// machine code emitter.
class ImmType<bits<4> val> {
bits<4> Value = val;
}
def NoImm : ImmType<0>;
def Imm8 : ImmType<1>;
def Imm8PCRel : ImmType<2>;
def Imm8Reg : ImmType<3>; // Register encoded in [7:4].
def Imm16 : ImmType<4>;
def Imm16PCRel : ImmType<5>;
def Imm32 : ImmType<6>;
def Imm32PCRel : ImmType<7>;
def Imm32S : ImmType<8>;
def Imm64 : ImmType<9>;
// FPFormat - This specifies what form this FP instruction has. This is used by
// the Floating-Point stackifier pass.
class FPFormat<bits<3> val> {
bits<3> Value = val;
}
def NotFP : FPFormat<0>;
def ZeroArgFP : FPFormat<1>;
def OneArgFP : FPFormat<2>;
def OneArgFPRW : FPFormat<3>;
def TwoArgFP : FPFormat<4>;
def CompareFP : FPFormat<5>;
def CondMovFP : FPFormat<6>;
def SpecialFP : FPFormat<7>;
// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
// Keep in sync with tables in X86InstrInfo.cpp.
class Domain<bits<2> val> {
bits<2> Value = val;
}
def GenericDomain : Domain<0>;
def SSEPackedSingle : Domain<1>;
def SSEPackedDouble : Domain<2>;
def SSEPackedInt : Domain<3>;
// Class specifying the vector form of the decompressed
// displacement of 8-bit.
class CD8VForm<bits<3> val> {
bits<3> Value = val;
}
def CD8VF : CD8VForm<0>; // v := VL
def CD8VH : CD8VForm<1>; // v := VL/2
def CD8VQ : CD8VForm<2>; // v := VL/4
def CD8VO : CD8VForm<3>; // v := VL/8
// The tuple (subvector) forms.
def CD8VT1 : CD8VForm<4>; // v := 1
def CD8VT2 : CD8VForm<5>; // v := 2
def CD8VT4 : CD8VForm<6>; // v := 4
def CD8VT8 : CD8VForm<7>; // v := 8
// Class specifying the prefix used an opcode extension.
class Prefix<bits<3> val> {
bits<3> Value = val;
}
def NoPrfx : Prefix<0>;
def PD : Prefix<1>;
def XS : Prefix<2>;
def XD : Prefix<3>;
def PS : Prefix<4>; // Similar to NoPrfx, but disassembler uses this to know
// that other instructions with this opcode use PD/XS/XD
// and if any of those is not supported they shouldn't
// decode to this instruction. e.g. ANDSS/ANDSD don't
// exist, but the 0xf2/0xf3 encoding shouldn't
// disable to ANDPS.
// Class specifying the opcode map.
class Map<bits<3> val> {
bits<3> Value = val;
}
def OB : Map<0>;
def TB : Map<1>;
def T8 : Map<2>;
def TA : Map<3>;
def XOP8 : Map<4>;
def XOP9 : Map<5>;
def XOPA : Map<6>;
def ThreeDNow : Map<7>;
// Class specifying the encoding
class Encoding<bits<2> val> {
bits<2> Value = val;
}
def EncNormal : Encoding<0>;
def EncVEX : Encoding<1>;
def EncXOP : Encoding<2>;
def EncEVEX : Encoding<3>;
// Operand size for encodings that change based on mode.
class OperandSize<bits<2> val> {
bits<2> Value = val;
}
def OpSizeFixed : OperandSize<0>; // Never needs a 0x66 prefix.
def OpSize16 : OperandSize<1>; // Needs 0x66 prefix in 32-bit mode.
def OpSize32 : OperandSize<2>; // Needs 0x66 prefix in 16-bit mode.
// Address size for encodings that change based on mode.
class AddressSize<bits<2> val> {
bits<2> Value = val;
}
def AdSizeX : AddressSize<0>; // Address size determined using addr operand.
def AdSize16 : AddressSize<1>; // Encodes a 16-bit address.
def AdSize32 : AddressSize<2>; // Encodes a 32-bit address.
def AdSize64 : AddressSize<3>; // Encodes a 64-bit address.
// Prefix byte classes which are used to indicate to the ad-hoc machine code
// emitter that various prefix bytes are required.
class OpSize16 { OperandSize OpSize = OpSize16; }
class OpSize32 { OperandSize OpSize = OpSize32; }
class AdSize16 { AddressSize AdSize = AdSize16; }
class AdSize32 { AddressSize AdSize = AdSize32; }
class AdSize64 { AddressSize AdSize = AdSize64; }
class REX_W { bit hasREX_WPrefix = 1; }
class LOCK { bit hasLockPrefix = 1; }
class REP { bit hasREPPrefix = 1; }
class TB { Map OpMap = TB; }
class T8 { Map OpMap = T8; }
class TA { Map OpMap = TA; }
class XOP8 { Map OpMap = XOP8; Prefix OpPrefix = PS; }
class XOP9 { Map OpMap = XOP9; Prefix OpPrefix = PS; }
class XOPA { Map OpMap = XOPA; Prefix OpPrefix = PS; }
class ThreeDNow { Map OpMap = ThreeDNow; }
class OBXS { Prefix OpPrefix = XS; }
class PS : TB { Prefix OpPrefix = PS; }
class PD : TB { Prefix OpPrefix = PD; }
class XD : TB { Prefix OpPrefix = XD; }
class XS : TB { Prefix OpPrefix = XS; }
class T8PS : T8 { Prefix OpPrefix = PS; }
class T8PD : T8 { Prefix OpPrefix = PD; }
class T8XD : T8 { Prefix OpPrefix = XD; }
class T8XS : T8 { Prefix OpPrefix = XS; }
class TAPS : TA { Prefix OpPrefix = PS; }
class TAPD : TA { Prefix OpPrefix = PD; }
class TAXD : TA { Prefix OpPrefix = XD; }
class VEX { Encoding OpEnc = EncVEX; }
class VEX_W { bits<2> VEX_WPrefix = 1; }
class VEX_WIG { bits<2> VEX_WPrefix = 2; }
// Special version of VEX_W that can be changed to VEX.W==0 for EVEX2VEX.
// FIXME: We should consider adding separate bits for VEX_WIG and the extra
// part of W1X. This would probably simplify the tablegen emitters and
// the TSFlags creation below.
class VEX_W1X { bits<2> VEX_WPrefix = 3; }
class VEX_4V : VEX { bit hasVEX_4V = 1; }
class VEX_L { bit hasVEX_L = 1; }
class VEX_LIG { bit ignoresVEX_L = 1; }
class EVEX { Encoding OpEnc = EncEVEX; }
class EVEX_4V : EVEX { bit hasVEX_4V = 1; }
class EVEX_K { bit hasEVEX_K = 1; }
class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
class EVEX_B { bit hasEVEX_B = 1; }
class EVEX_RC { bit hasEVEX_RC = 1; }
class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; }
class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; }
class NOTRACK { bit hasNoTrackPrefix = 1; }
// Specify AVX512 8-bit compressed displacement encoding based on the vector
// element size in bits (8, 16, 32, 64) and the CDisp8 form.
class EVEX_CD8<int esize, CD8VForm form> {
int CD8_EltSize = !srl(esize, 3);
bits<3> CD8_Form = form.Value;
}
class XOP { Encoding OpEnc = EncXOP; }
class XOP_4V : XOP { bit hasVEX_4V = 1; }
// Specify the alternative register form instruction to replace the current
// instruction in case it was picked during generation of memory folding tables
class FoldGenData<string _RegisterForm> {
string FoldGenRegForm = _RegisterForm;
}
// Provide a specific instruction to be used by the EVEX2VEX conversion.
class EVEX2VEXOverride<string VEXInstrName> {
string EVEX2VEXOverride = VEXInstrName;
}
// Mark the instruction as "illegal to memory fold/unfold"
class NotMemoryFoldable { bit isMemoryFoldable = 0; }
// Prevent EVEX->VEX conversion from considering this instruction.
class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
: Instruction {
let Namespace = "X86";
bits<8> Opcode = opcod;
Format Form = f;
bits<7> FormBits = Form.Value;
ImmType ImmT = i;
dag OutOperandList = outs;
dag InOperandList = ins;
string AsmString = AsmStr;
// If this is a pseudo instruction, mark it isCodeGenOnly.
let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
//
// Attributes specific to X86 instructions...
//
bit ForceDisassemble = 0; // Force instruction to disassemble even though it's
// isCodeGenonly. Needed to hide an ambiguous
// AsmString from the parser, but still disassemble.
OperandSize OpSize = OpSizeFixed; // Does this instruction's encoding change
// based on operand size of the mode?
bits<2> OpSizeBits = OpSize.Value;
AddressSize AdSize = AdSizeX; // Does this instruction's encoding change
// based on address size of the mode?
bits<2> AdSizeBits = AdSize.Value;
Prefix OpPrefix = NoPrfx; // Which prefix byte does this inst have?
bits<3> OpPrefixBits = OpPrefix.Value;
Map OpMap = OB; // Which opcode map does this inst have?
bits<3> OpMapBits = OpMap.Value;
bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
Domain ExeDomain = d;
bit hasREPPrefix = 0; // Does this inst have a REP prefix?
Encoding OpEnc = EncNormal; // Encoding used by this instruction
bits<2> OpEncBits = OpEnc.Value;
bits<2> VEX_WPrefix = 0; // Does this inst set the VEX_W field?
bit hasVEX_4V = 0; // Does this inst require the VEX.VVVV field?
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
bit hasEVEX_K = 0; // Does this inst require masking?
bit hasEVEX_Z = 0; // Does this inst set the EVEX_Z field?
bit hasEVEX_L2 = 0; // Does this inst set the EVEX_L2 field?
bit hasEVEX_B = 0; // Does this inst set the EVEX_B field?
bits<3> CD8_Form = 0; // Compressed disp8 form - vector-width.
// Declare it int rather than bits<4> so that all bits are defined when
// assigning to bits<7>.
int CD8_EltSize = 0; // Compressed disp8 form - element-size in bytes.
bit hasEVEX_RC = 0; // Explicitly specified rounding control in FP instruction.
bit hasNoTrackPrefix = 0; // Does this inst has 0x3E (NoTrack) prefix?
bits<2> EVEX_LL;
let EVEX_LL{0} = hasVEX_L;
let EVEX_LL{1} = hasEVEX_L2;
// Vector size in bytes.
bits<7> VectSize = !shl(16, EVEX_LL);
// The scaling factor for AVX512's compressed displacement is either
// - the size of a power-of-two number of elements or
// - the size of a single element for broadcasts or
// - the total vector size divided by a power-of-two number.
// Possible values are: 0 (non-AVX512 inst), 1, 2, 4, 8, 16, 32 and 64.
bits<7> CD8_Scale = !if (!eq (OpEnc.Value, EncEVEX.Value),
!if (CD8_Form{2},
!shl(CD8_EltSize, CD8_Form{1-0}),
!if (hasEVEX_B,
CD8_EltSize,
!srl(VectSize, CD8_Form{1-0}))), 0);
// Used in the memory folding generation (TableGen backend) to point to an alternative
// instruction to replace the current one in case it got picked during generation.
string FoldGenRegForm = ?;
// Used to prevent an explicit EVEX2VEX override for this instruction.
string EVEX2VEXOverride = ?;
bit isMemoryFoldable = 1; // Is it allowed to memory fold/unfold this instruction?
bit notEVEX2VEXConvertible = 0; // Prevent EVEX->VEX conversion.
// TSFlags layout should be kept in sync with X86BaseInfo.h.
let TSFlags{6-0} = FormBits;
let TSFlags{8-7} = OpSizeBits;
let TSFlags{10-9} = AdSizeBits;
// No need for 3rd bit, we don't need to distinguish NoPrfx from PS.
let TSFlags{12-11} = OpPrefixBits{1-0};
let TSFlags{15-13} = OpMapBits;
let TSFlags{16} = hasREX_WPrefix;
let TSFlags{20-17} = ImmT.Value;
let TSFlags{23-21} = FPForm.Value;
let TSFlags{24} = hasLockPrefix;
let TSFlags{25} = hasREPPrefix;
let TSFlags{27-26} = ExeDomain.Value;
let TSFlags{29-28} = OpEncBits;
let TSFlags{37-30} = Opcode;
// Currently no need for second bit in TSFlags - W Ignore is equivalent to 0.
let TSFlags{38} = VEX_WPrefix{0};
let TSFlags{39} = hasVEX_4V;
let TSFlags{40} = hasVEX_L;
let TSFlags{41} = hasEVEX_K;
let TSFlags{42} = hasEVEX_Z;
let TSFlags{43} = hasEVEX_L2;
let TSFlags{44} = hasEVEX_B;
// If we run out of TSFlags bits, it's possible to encode this in 3 bits.
let TSFlags{51-45} = CD8_Scale;
let TSFlags{52} = hasEVEX_RC;
let TSFlags{53} = hasNoTrackPrefix;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
: X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
let Pattern = pattern;
}
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, Domain d = GenericDomain>
: X86Inst<o, f, NoImm, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, Domain d = GenericDomain>
: X86Inst<o, f, Imm8, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8Reg<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, Domain d = GenericDomain>
: X86Inst<o, f, Imm8Reg, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm16, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm32, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii32S<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm32S, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii64<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm64, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
// FPStack Instruction Templates:
// FPI - Floating Point Instruction template.
class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
: I<o, F, outs, ins, asm, []> {}
// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
: PseudoI<outs, ins, pattern> {
let FPForm = fp;
}
// Templates for instructions that use a 16- or 32-bit segmented address as
// their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
//
// Iseg16 - 16-bit segment selector, 16-bit offset
// Iseg32 - 16-bit segment selector, 32-bit offset
class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm16, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm32, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
// SI - SSE 1 & 2 scalar instructions
class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d = GenericDomain>
: I<o, F, outs, ins, asm, pattern, d> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
!if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
!if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
[UseSSE1])))));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
asm));
}
// SI - SSE 1 & 2 scalar intrinsics - vex form available on AVX512
class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d = GenericDomain>
: I<o, F, outs, ins, asm, pattern, d> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
!if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
!if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
[UseSSE1])))));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
asm));
}
// SIi8 - SSE 1 & 2 scalar instructions - vex form available on AVX512
class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
!if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
[UseSSE2])));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
asm));
}
// PI - SSE 1 & 2 packed instructions
class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
Domain d>
: I<o, F, outs, ins, asm, pattern, d> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
!if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
[UseSSE1])));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
asm));
}
// MMXPI - SSE 1 & 2 packed instructions with MMX operands
class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
Domain d>
: I<o, F, outs, ins, asm, pattern, d> {
let Predicates = !if(!eq(OpPrefix.Value, PD.Value), [HasMMX, HasSSE2],
[HasMMX, HasSSE1]);
}
// PIi8 - SSE 1 & 2 packed instructions with immediate
class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d>
: Ii8<o, F, outs, ins, asm, pattern, d> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
!if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
[UseSSE1])));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
asm));
}
// SSE1 Instruction Templates:
//
// SSI - SSE1 instructions with XS prefix.
// PSI - SSE1 instructions with PS prefix.
// PSIi8 - SSE1 instructions with ImmT == Imm8 and PS prefix.
// VSSI - SSE1 instructions with XS prefix in AVX form.
// VPSI - SSE1 instructions with PS prefix in AVX form, packed single.
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
Requires<[UseSSE1]>;
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
Requires<[UseSSE1]>;
class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
Requires<[HasAVX]>;
class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, PS,
Requires<[HasAVX]>;
// SSE2 Instruction Templates:
//
// SDI - SSE2 instructions with XD prefix.
// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
// S2SI - SSE2 instructions with XS prefix.
// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
// PDI - SSE2 instructions with PD prefix, packed double domain.
// PDIi8 - SSE2 instructions with ImmT == Imm8 and PD prefix.
// VSDI - SSE2 scalar instructions with XD prefix in AVX form.
// VPDI - SSE2 vector instructions with PD prefix in AVX form,
// packed double domain.
// VS2I - SSE2 scalar instructions with PD prefix in AVX form.
// S2I - SSE2 scalar instructions with PD prefix.
// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as
// MMX operands.
// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as
// MMX operands.
class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
Requires<[UseSSE2]>;
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
Requires<[UseSSE2]>;
class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
Requires<[UseAVX]>;
class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
Requires<[HasAVX]>;
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
PD, Requires<[HasAVX]>;
class VS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, PD,
Requires<[UseAVX]>;
class S2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, PD, Requires<[UseSSE2]>;
class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX, HasSSE2]>;
class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX, HasSSE2]>;
// SSE3 Instruction Templates:
//
// S3I - SSE3 instructions with PD prefixes.
// S3SI - SSE3 instructions with XS prefix.
// S3DI - SSE3 instructions with XD prefix.
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
Requires<[UseSSE3]>;
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
Requires<[UseSSE3]>;
class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
Requires<[UseSSE3]>;
// SSSE3 Instruction Templates:
//
// SS38I - SSSE3 instructions with T8 prefix.
// SS3AI - SSSE3 instructions with TA prefix.
// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands.
// MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands.
//
// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
// uses the MMX registers. The 64-bit versions are grouped with the MMX
// classes. They need to be enabled even if AVX is enabled.
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[UseSSSE3]>;
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[UseSSSE3]>;
class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PS,
Requires<[HasMMX, HasSSSE3]>;
class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPS,
Requires<[HasMMX, HasSSSE3]>;
// SSE4.1 Instruction Templates:
//
// SS48I - SSE 4.1 instructions with T8 prefix.
// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
//
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[UseSSE41]>;
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[UseSSE41]>;
// SSE4.2 Instruction Templates:
//
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[UseSSE42]>;
// SS42FI - SSE 4.2 instructions with T8XD prefix.
// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[UseSSE42]>;
// AVX Instruction Templates:
// Instructions introduced in AVX (no SSE equivalent forms)
//
// AVX8I - AVX instructions with T8PD prefix.
// AVXAIi8 - AVX instructions with TAPD prefix and ImmT = Imm8.
class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[HasAVX]>;
class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[HasAVX]>;
// AVX2 Instruction Templates:
// Instructions introduced in AVX2 (no SSE equivalent forms)
//
// AVX28I - AVX2 instructions with T8PD prefix.
// AVX2AIi8 - AVX2 instructions with TAPD prefix and ImmT = Imm8.
class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[HasAVX2]>;
class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[HasAVX2]>;
// AVX-512 Instruction Templates:
// Instructions introduced in AVX-512 (no SSE equivalent forms)
//
// AVX5128I - AVX-512 instructions with T8PD prefix.
// AVX512AIi8 - AVX-512 instructions with TAPD prefix and ImmT = Imm8.
// AVX512PDI - AVX-512 instructions with PD, double packed.
// AVX512PSI - AVX-512 instructions with PS, single packed.
// AVX512XS8I - AVX-512 instructions with T8 and XS prefixes.
// AVX512XSI - AVX-512 instructions with XS prefix, generic domain.
// AVX512BI - AVX-512 instructions with PD, int packed domain.
// AVX512SI - AVX-512 scalar instructions with PD prefix.
class AVX5128I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[HasAVX512]>;
class AVX5128IBase : T8PD {
Domain ExeDomain = SSEPackedInt;
}
class AVX512XS8I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8XS,
Requires<[HasAVX512]>;
class AVX512XSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XS,
Requires<[HasAVX512]>;
class AVX512XDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, XD,
Requires<[HasAVX512]>;
class AVX512BI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
Requires<[HasAVX512]>;
class AVX512BIBase : PD {
Domain ExeDomain = SSEPackedInt;
}
class AVX512BIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
Requires<[HasAVX512]>;
class AVX512BIi8Base : PD {
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
class AVX512XSIi8Base : XS {
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
class AVX512XDIi8Base : XD {
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
class AVX512PSIi8Base : PS {
Domain ExeDomain = SSEPackedSingle;
ImmType ImmT = Imm8;
}
class AVX512PDIi8Base : PD {
Domain ExeDomain = SSEPackedDouble;
ImmType ImmT = Imm8;
}
class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[HasAVX512]>;
class AVX512AIi8Base : TAPD {
ImmType ImmT = Imm8;
}
class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>,
Requires<[HasAVX512]>;
class AVX512PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
Requires<[HasAVX512]>;
class AVX512PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
Requires<[HasAVX512]>;
class AVX512PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d>
: Ii8<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
class AVX512PI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d>
: I<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
class AVX512FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern>, T8PD,
EVEX_4V, Requires<[HasAVX512]>;
class AVX512FMA3Base : T8PD, EVEX_4V;
class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern>, Requires<[HasAVX512]>;
// AES Instruction Templates:
//
// AES8I
// These use the same encoding as the SSE4.2 T8 and TA encodings.
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[NoAVX, HasAES]>;
class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[NoAVX, HasAES]>;
// PCLMUL Instruction Templates
class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD;
// FMA3 Instruction Templates
class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern>, T8PD,
VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoVLX]>;
class FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern>, T8PD,
VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoAVX512]>;
class FMA3S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern>, T8PD,
VEX_4V, FMASC, Requires<[HasFMA, NoAVX512]>;
// FMA4 Instruction Templates
class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
VEX_4V, FMASC, Requires<[HasFMA4, NoVLX]>;
class FMA4S<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
VEX_4V, FMASC, Requires<[HasFMA4, NoAVX512]>;
class FMA4S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
VEX_4V, FMASC, Requires<[HasFMA4]>;
// XOP 2, 3 and 4 Operand Instruction Template
class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
XOP9, Requires<[HasXOP]>;
// XOP 2 and 3 Operand Instruction Templates with imm byte
class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
XOP8, Requires<[HasXOP]>;
// XOP 4 Operand Instruction Templates with imm byte
class IXOPi8Reg<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
XOP8, Requires<[HasXOP]>;
// XOP 5 operand instruction (VEX encoding!)
class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
VEX_4V, Requires<[HasXOP]>;
// X86-64 Instruction templates...
//
class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, REX_W;
class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, REX_W;
class RIi16 <bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii16<o, F, outs, ins, asm, pattern>, REX_W;
class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii32<o, F, outs, ins, asm, pattern>, REX_W;
class RIi32S <bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii32S<o, F, outs, ins, asm, pattern>, REX_W;
class RIi64<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii64<o, F, outs, ins, asm, pattern>, REX_W;
class RS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: S2I<o, F, outs, ins, asm, pattern>, REX_W;
class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: VS2I<o, F, outs, ins, asm, pattern>, VEX_W;
// MMX Instruction templates
//
// MMXI - MMX instructions with TB prefix.
// MMXI32 - MMX instructions with TB prefix valid only in 32 bit mode.
// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
// MMX2I - MMX / SSE2 instructions with PD prefix.
// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
// MMXID - MMX instructions with XD prefix.
// MMXIS - MMX instructions with XS prefix.
class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,Not64BitMode]>;
class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,In64BitMode]>;
class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, PS, REX_W, Requires<[HasMMX]>;
class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, PD, Requires<[HasMMX]>;
class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,612 @@
//===-- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 MMX instruction set, defining the instructions,
// and properties of the instructions which are needed for code generation,
// machine code emission, and analysis.
//
// All instructions that use MMX should be in this file, even if they also use
// SSE.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// MMX Multiclasses
//===----------------------------------------------------------------------===//
// Alias instruction that maps zero vector to pxor mmx.
// This is expanded by ExpandPostRAPseudos to an pxor.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, SchedRW = [WriteZero] in {
def MMX_SET0 : I<0, Pseudo, (outs VR64:$dst), (ins), "", []>;
}
let Constraints = "$src1 = $dst" in {
// MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
// When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
X86FoldableSchedWrite sched, bit Commutable = 0,
X86MemOperand OType = i64mem> {
def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>,
Sched<[sched]> {
let isCommutable = Commutable;
}
def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, OType:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
Sched<[sched.Folded, ReadAfterLd]>;
}
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr, Intrinsic IntId,
Intrinsic IntId2, X86FoldableSchedWrite sched,
X86FoldableSchedWrite schedImm> {
def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>,
Sched<[sched]>;
def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
Sched<[sched.Folded, ReadAfterLd]>;
def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
(ins VR64:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId2 VR64:$src1, imm:$src2))]>,
Sched<[schedImm]>;
}
}
/// Unary MMX instructions requiring SSSE3.
multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
Intrinsic IntId64, X86FoldableSchedWrite sched> {
def rr : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst, (IntId64 VR64:$src))]>,
Sched<[sched]>;
def rm : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst,
(IntId64 (bitconvert (load_mmx addr:$src))))]>,
Sched<[sched.Folded]>;
}
/// Binary MMX instructions requiring SSSE3.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
Intrinsic IntId64, X86FoldableSchedWrite sched,
bit Commutable = 0> {
let isCommutable = Commutable in
def rr : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>,
Sched<[sched]>;
def rm : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst,
(IntId64 VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
Sched<[sched.Folded, ReadAfterLd]>;
}
}
/// PALIGN MMX instructions (require SSSE3).
multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
X86FoldableSchedWrite sched> {
def rri : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
Sched<[sched]>;
def rmi : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
Sched<[sched.Folded, ReadAfterLd]>;
}
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
string asm, X86FoldableSchedWrite sched, Domain d> {
def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[(set DstRC:$dst, (Int SrcRC:$src))], d>,
Sched<[sched]>;
def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
[(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>,
Sched<[sched.Folded]>;
}
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
PatFrag ld_frag, string asm, Domain d> {
def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst),
(ins DstRC:$src1, SrcRC:$src2), asm,
[(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>,
Sched<[WriteCvtI2PS]>;
def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2), asm,
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>,
Sched<[WriteCvtI2PS.Folded]>;
}
//===----------------------------------------------------------------------===//
// MMX EMMS Instruction
//===----------------------------------------------------------------------===//
let SchedRW = [WriteEMMS] in
def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
//===----------------------------------------------------------------------===//
// MMX Scalar Instructions
//===----------------------------------------------------------------------===//
// Data Transfer Instructions
def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (scalar_to_vector GR32:$src)))]>,
Sched<[WriteVecMoveFromGpr]>;
def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (scalar_to_vector (loadi32 addr:$src))))]>,
Sched<[WriteVecLoad]>;
let Predicates = [HasMMX] in {
def : Pat<(x86mmx (MMX_X86movw2d GR32:$src)),
(MMX_MOVD64rr GR32:$src)>;
def : Pat<(x86mmx (MMX_X86movw2d (i32 0))),
(MMX_SET0)>;
def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))),
(MMX_MOVD64rm addr:$src)>;
}
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>,
Sched<[WriteVecStore]>;
def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst,
(MMX_X86movd2w (x86mmx VR64:$src)))]>,
Sched<[WriteVecMoveToGpr]>, FoldGenData<"MMX_MOVD64rr">;
let isBitcast = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (bitconvert GR64:$src))]>,
Sched<[WriteVecMoveFromGpr]>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst),
(ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}",
[]>, Sched<[SchedWriteVecMoveLS.MMX.RM]>;
let isBitcast = 1 in {
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert VR64:$src))]>,
Sched<[WriteVecMoveToGpr]>;
let SchedRW = [WriteVecMove], hasSideEffects = 0, isMoveReg = 1 in {
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>;
let isCodeGenOnly = 1, ForceDisassemble = 1 in
def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>,
FoldGenData<"MMX_MOVQ64rr">;
} // SchedRW, hasSideEffects, isMoveReg
} // isBitcast
// def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
// (MMX_MOVQ64rr_REV VR64:$dst, VR64:$src), 0>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
def MMX_MOVD64from64rm : MMXRI<0x7E, MRMDestMem,
(outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteVecMoveLS.MMX.MR]>;
let SchedRW = [SchedWriteVecMoveLS.MMX.RM] in {
let canFoldAsLoad = 1 in
def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (load_mmx addr:$src))]>;
} // SchedRW
let SchedRW = [SchedWriteVecMoveLS.MMX.MR] in
def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (x86mmx VR64:$src), addr:$dst)]>;
let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (bitconvert
(i64 (extractelt (v2i64 VR128:$src),
(iPTR 0))))))]>;
def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64
(scalar_to_vector
(i64 (bitconvert (x86mmx VR64:$src))))))]>;
let isCodeGenOnly = 1, hasSideEffects = 1 in {
def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
[]>;
def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[]>;
}
} // SchedRW
let Predicates = [HasMMX, HasSSE1] in
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>,
Sched<[SchedWriteVecMoveLSNT.MMX.MR]>;
let Predicates = [HasMMX] in {
// movd to MMX register zero-extends
def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))),
(MMX_MOVD64rr GR32:$src)>;
def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
(MMX_MOVD64rm addr:$src)>;
}
// Arithmetic Instructions
defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
SchedWriteVecALU.MMX>;
defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w,
SchedWriteVecALU.MMX>;
defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d,
SchedWriteVecALU.MMX>;
// -- Addition
defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b,
SchedWriteVecALU.MMX, 1>;
defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w,
SchedWriteVecALU.MMX, 1>;
defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d,
SchedWriteVecALU.MMX, 1>;
let Predicates = [HasMMX, HasSSE2] in
defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q,
SchedWriteVecALU.MMX, 1>;
defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b,
SchedWriteVecALU.MMX, 1>;
defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w,
SchedWriteVecALU.MMX, 1>;
defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b,
SchedWriteVecALU.MMX, 1>;
defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w,
SchedWriteVecALU.MMX, 1>;
defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w,
SchedWritePHAdd.MMX>;
defm MMX_PHADDD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d,
SchedWritePHAdd.MMX>;
defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw,
SchedWritePHAdd.MMX>;
// -- Subtraction
defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b,
SchedWriteVecALU.MMX>;
defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w,
SchedWriteVecALU.MMX>;
defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d,
SchedWriteVecALU.MMX>;
let Predicates = [HasMMX, HasSSE2] in
defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q,
SchedWriteVecALU.MMX>;
defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b,
SchedWriteVecALU.MMX>;
defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w,
SchedWriteVecALU.MMX>;
defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b,
SchedWriteVecALU.MMX>;
defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w,
SchedWriteVecALU.MMX>;
defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w,
SchedWritePHAdd.MMX>;
defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d,
SchedWritePHAdd.MMX>;
defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw,
SchedWritePHAdd.MMX>;
// -- Multiplication
defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w,
SchedWriteVecIMul.MMX, 1>;
defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w,
SchedWriteVecIMul.MMX, 1>;
let Predicates = [HasMMX, HasSSE1] in
defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w,
SchedWriteVecIMul.MMX, 1>;
let Predicates = [HasMMX, HasSSE2] in
defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq,
SchedWriteVecIMul.MMX, 1>;
defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
int_x86_ssse3_pmul_hr_sw,
SchedWriteVecIMul.MMX, 1>;
// -- Miscellanea
defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd,
SchedWriteVecIMul.MMX, 1>;
defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
int_x86_ssse3_pmadd_ub_sw,
SchedWriteVecIMul.MMX>;
let Predicates = [HasMMX, HasSSE1] in {
defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b,
SchedWriteVecALU.MMX, 1>;
defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w,
SchedWriteVecALU.MMX, 1>;
defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b,
SchedWriteVecALU.MMX, 1>;
defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w,
SchedWriteVecALU.MMX, 1>;
defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b,
SchedWriteVecALU.MMX, 1>;
defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w,
SchedWriteVecALU.MMX, 1>;
defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw,
SchedWritePSADBW.MMX, 1>;
}
defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b,
SchedWriteVecALU.MMX>;
defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w,
SchedWriteVecALU.MMX>;
defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d,
SchedWriteVecALU.MMX>;
let Constraints = "$src1 = $dst" in
defm MMX_PALIGNR : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b,
SchedWriteShuffle.MMX>;
// Logical Instructions
defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand,
SchedWriteVecLogic.MMX, 1>;
defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por,
SchedWriteVecLogic.MMX, 1>;
defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor,
SchedWriteVecLogic.MMX, 1>;
defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn,
SchedWriteVecLogic.MMX>;
// Shift Instructions
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
int_x86_mmx_psrl_w, int_x86_mmx_psrli_w,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
int_x86_mmx_psrl_d, int_x86_mmx_psrli_d,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
int_x86_mmx_psrl_q, int_x86_mmx_psrli_q,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
int_x86_mmx_psll_w, int_x86_mmx_pslli_w,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
int_x86_mmx_psll_d, int_x86_mmx_pslli_d,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
int_x86_mmx_psll_q, int_x86_mmx_pslli_q,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
int_x86_mmx_psra_w, int_x86_mmx_psrai_w,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_mmx_psra_d, int_x86_mmx_psrai_d,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
// Comparison Instructions
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b,
SchedWriteVecALU.MMX>;
defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w,
SchedWriteVecALU.MMX>;
defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d,
SchedWriteVecALU.MMX>;
defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b,
SchedWriteVecALU.MMX>;
defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w,
SchedWriteVecALU.MMX>;
defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d,
SchedWriteVecALU.MMX>;
// -- Unpack Instructions
defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
int_x86_mmx_punpckhbw,
SchedWriteShuffle.MMX>;
defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
int_x86_mmx_punpckhwd,
SchedWriteShuffle.MMX>;
defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
int_x86_mmx_punpckhdq,
SchedWriteShuffle.MMX>;
defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
int_x86_mmx_punpcklbw,
SchedWriteShuffle.MMX,
0, i32mem>;
defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
int_x86_mmx_punpcklwd,
SchedWriteShuffle.MMX,
0, i32mem>;
defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
int_x86_mmx_punpckldq,
SchedWriteShuffle.MMX,
0, i32mem>;
// -- Pack Instructions
defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb,
SchedWriteShuffle.MMX>;
defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw,
SchedWriteShuffle.MMX>;
defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb,
SchedWriteShuffle.MMX>;
// -- Shuffle Instructions
defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b,
SchedWriteVarShuffle.MMX>;
def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, u8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>,
Sched<[SchedWriteShuffle.MMX]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(int_x86_sse_pshuf_w (load_mmx addr:$src1),
imm:$src2))]>,
Sched<[SchedWriteShuffle.MMX.Folded]>;
// -- Conversion Instructions
defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
WriteCvtPS2I, SSEPackedSingle>, PS;
defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
WriteCvtPD2I, SSEPackedDouble>, PD;
defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
WriteCvtPS2I, SSEPackedSingle>, PS;
defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
WriteCvtPD2I, SSEPackedDouble>, PD;
defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
WriteCvtI2PD, SSEPackedDouble>, PD;
let Constraints = "$src1 = $dst" in {
defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
int_x86_sse_cvtpi2ps,
i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
SSEPackedSingle>, PS;
}
// Extract / Insert
let Predicates = [HasMMX, HasSSE1] in
def MMX_PEXTRWrr: MMXIi8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR64:$src1, i32u8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1,
imm:$src2))]>,
Sched<[WriteVecExtract]>;
let Constraints = "$src1 = $dst" in {
let Predicates = [HasMMX, HasSSE1] in {
def MMX_PINSRWrr : MMXIi8<0xC4, MRMSrcReg,
(outs VR64:$dst),
(ins VR64:$src1, GR32orGR64:$src2, i32u8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
GR32orGR64:$src2, imm:$src3))]>,
Sched<[WriteVecInsert]>;
def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem,
(outs VR64:$dst),
(ins VR64:$src1, i16mem:$src2, i32u8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
(i32 (anyext (loadi16 addr:$src2))),
imm:$src3))]>,
Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
}
// Mask creation
let Predicates = [HasMMX, HasSSE1] in
def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR64:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32orGR64:$dst,
(int_x86_mmx_pmovmskb VR64:$src))]>,
Sched<[WriteMMXMOVMSK]>;
// Low word of XMM to MMX.
def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
[SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
(x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
(x86mmx (MMX_MOVQ64rm addr:$src))>;
// Misc.
let SchedRW = [SchedWriteShuffle.MMX] in {
let Uses = [EDI], Predicates = [HasMMX, HasSSE1,Not64BitMode] in
def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
let Uses = [RDI], Predicates = [HasMMX, HasSSE1,In64BitMode] in
def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
}
// 64-bit bit convert.
let Predicates = [HasMMX, HasSSE2] in {
def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
(MMX_MOVQ2FR64rr VR64:$src)>;
def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
(MMX_MOVFR642Qrr FR64:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvtp2Int (v4f32 VR128:$src)))))),
(MMX_CVTPS2PIirr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvttp2si (v4f32 VR128:$src)))))),
(MMX_CVTTPS2PIirr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))),
(MMX_CVTTPS2PIirr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
(MMX_CVTPD2PIirr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
(MMX_CVTTPD2PIirr VR128:$src)>;
}

View File

@@ -0,0 +1,80 @@
//===-- X86InstrMPX.td - MPX Instruction Set ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 MPX instruction set, defining the
// instructions, and properties of the instructions which are needed for code
// generation, machine code emission, and analysis.
//
//===----------------------------------------------------------------------===//
// FIXME: Investigate a better scheduler class once MPX is used inside LLVM.
let SchedRW = [WriteSystem] in {
multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
Requires<[HasMPX, In64BitMode]>;
}
defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, anymem:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, anymem:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, In64BitMode]>;
def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR64:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, In64BitMode]>;
}
defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS, NotMemoryFoldable;
defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD, NotMemoryFoldable;
defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD, NotMemoryFoldable;
def BNDMOVrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX]>, NotMemoryFoldable;
let mayLoad = 1 in {
def BNDMOV32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable;
def BNDMOV64rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable;
}
let isCodeGenOnly = 1, ForceDisassemble = 1 in
def BNDMOVrr_REV : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX]>, NotMemoryFoldable;
let mayStore = 1 in {
def BNDMOV32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable;
def BNDMOV64mr : I<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable;
def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins anymem:$dst, BNDR:$src),
"bndstx\t{$src, $dst|$dst, $src}", []>, PS,
Requires<[HasMPX]>;
}
let mayLoad = 1 in
def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
"bndldx\t{$src, $dst|$dst, $src}", []>, PS,
Requires<[HasMPX]>;
} // SchedRW

View File

@@ -0,0 +1,30 @@
//===-- X86InstrSGX.td - SGX Instruction Set Extension -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the instructions that make up the Intel SGX instruction
// set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SGX instructions
let SchedRW = [WriteSystem], Predicates = [HasSGX] in {
// ENCLS - Execute an Enclave System Function of Specified Leaf Number
def ENCLS : I<0x01, MRM_CF, (outs), (ins),
"encls", []>, TB;
// ENCLU - Execute an Enclave User Function of Specified Leaf Number
def ENCLU : I<0x01, MRM_D7, (outs), (ins),
"enclu", []>, TB;
// ENCLV - Execute an Enclave VMM Function of Specified Leaf Number
def ENCLV : I<0x01, MRM_C0, (outs), (ins),
"enclv", []>, TB;
} // SchedRW

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,63 @@
//===-- X86InstrSVM.td - SVM Instruction Set Extension -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the instructions that make up the AMD SVM instruction
// set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SVM instructions
let SchedRW = [WriteSystem] in {
// 0F 01 D9
def VMMCALL : I<0x01, MRM_D9, (outs), (ins), "vmmcall", []>, TB;
// 0F 01 DC
def STGI : I<0x01, MRM_DC, (outs), (ins), "stgi", []>, TB;
// 0F 01 DD
def CLGI : I<0x01, MRM_DD, (outs), (ins), "clgi", []>, TB;
// 0F 01 DE
let Uses = [EAX] in
def SKINIT : I<0x01, MRM_DE, (outs), (ins), "skinit\t{%eax|eax}", []>, TB;
// 0F 01 D8
let Uses = [EAX] in
def VMRUN32 : I<0x01, MRM_D8, (outs), (ins), "vmrun\t{%eax|eax}", []>, TB,
Requires<[Not64BitMode]>;
let Uses = [RAX] in
def VMRUN64 : I<0x01, MRM_D8, (outs), (ins), "vmrun\t{%rax|rax}", []>, TB,
Requires<[In64BitMode]>;
// 0F 01 DA
let Uses = [EAX] in
def VMLOAD32 : I<0x01, MRM_DA, (outs), (ins), "vmload\t{%eax|eax}", []>, TB,
Requires<[Not64BitMode]>;
let Uses = [RAX] in
def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins), "vmload\t{%rax|rax}", []>, TB,
Requires<[In64BitMode]>;
// 0F 01 DB
let Uses = [EAX] in
def VMSAVE32 : I<0x01, MRM_DB, (outs), (ins), "vmsave\t{%eax|eax}", []>, TB,
Requires<[Not64BitMode]>;
let Uses = [RAX] in
def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins), "vmsave\t{%rax|rax}", []>, TB,
Requires<[In64BitMode]>;
// 0F 01 DF
let Uses = [EAX, ECX] in
def INVLPGA32 : I<0x01, MRM_DF, (outs), (ins),
"invlpga\t{%eax, %ecx|eax, ecx}", []>, TB, Requires<[Not64BitMode]>;
let Uses = [RAX, ECX] in
def INVLPGA64 : I<0x01, MRM_DF, (outs), (ins),
"invlpga\t{%rax, %ecx|rax, ecx}", []>, TB, Requires<[In64BitMode]>;
} // SchedRW

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,755 @@
//===-- X86InstrSystem.td - System Instructions ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 instructions that are generally used in
// privileged modes. These are not typically used by the compiler, but are
// supported for the assembler and disassembler.
//
//===----------------------------------------------------------------------===//
let SchedRW = [WriteSystem] in {
let Defs = [RAX, RDX] in
def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
let Defs = [RAX, RCX, RDX] in
def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB;
// CPU flow control instructions
let mayLoad = 1, mayStore = 0, hasSideEffects = 1, isTrap = 1 in {
def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
def UD1Wm : I<0xB9, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2),
"ud1{w} {$src2, $src1|$src1, $src2}", []>, TB, OpSize16;
def UD1Lm : I<0xB9, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2),
"ud1{l} {$src2, $src1|$src1, $src2}", []>, TB, OpSize32;
def UD1Qm : RI<0xB9, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
"ud1{q} {$src2, $src1|$src1, $src2}", []>, TB;
def UD1Wr : I<0xB9, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
"ud1{w} {$src2, $src1|$src1, $src2}", []>, TB, OpSize16;
def UD1Lr : I<0xB9, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
"ud1{l} {$src2, $src1|$src1, $src2}", []>, TB, OpSize32;
def UD1Qr : RI<0xB9, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
"ud1{q} {$src2, $src1|$src1, $src2}", []>, TB;
}
def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
// Interrupt and SysCall Instructions.
let Uses = [EFLAGS] in
def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>, Requires<[Not64BitMode]>;
def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>;
} // SchedRW
// The long form of "int $3" turns into int3 as a size optimization.
// FIXME: This doesn't work because InstAlias can't match immediate constants.
//// def : InstAlias<"int\t$3", (INT3)>;
let SchedRW = [WriteSystem] in {
def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap",
[(int_x86_int imm:$trap)]>;
def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB;
def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
Requires<[In64BitMode]>;
def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB;
def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexitq", []>, TB,
Requires<[In64BitMode]>;
} // SchedRW
def : Pat<(debugtrap),
(INT3)>, Requires<[NotPS4]>;
def : Pat<(debugtrap),
(INT (i8 0x41))>, Requires<[IsPS4]>;
//===----------------------------------------------------------------------===//
// Input/Output Instructions.
//
let SchedRW = [WriteSystem] in {
let Defs = [AL], Uses = [DX] in
def IN8rr : I<0xEC, RawFrm, (outs), (ins), "in{b}\t{%dx, %al|al, dx}", []>;
let Defs = [AX], Uses = [DX] in
def IN16rr : I<0xED, RawFrm, (outs), (ins), "in{w}\t{%dx, %ax|ax, dx}", []>,
OpSize16;
let Defs = [EAX], Uses = [DX] in
def IN32rr : I<0xED, RawFrm, (outs), (ins), "in{l}\t{%dx, %eax|eax, dx}", []>,
OpSize32;
let Defs = [AL] in
def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins u8imm:$port),
"in{b}\t{$port, %al|al, $port}", []>;
let Defs = [AX] in
def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
"in{w}\t{$port, %ax|ax, $port}", []>, OpSize16;
let Defs = [EAX] in
def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
"in{l}\t{$port, %eax|eax, $port}", []>, OpSize32;
let Uses = [DX, AL] in
def OUT8rr : I<0xEE, RawFrm, (outs), (ins), "out{b}\t{%al, %dx|dx, al}", []>;
let Uses = [DX, AX] in
def OUT16rr : I<0xEF, RawFrm, (outs), (ins), "out{w}\t{%ax, %dx|dx, ax}", []>,
OpSize16;
let Uses = [DX, EAX] in
def OUT32rr : I<0xEF, RawFrm, (outs), (ins), "out{l}\t{%eax, %dx|dx, eax}", []>,
OpSize32;
let Uses = [AL] in
def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins u8imm:$port),
"out{b}\t{%al, $port|$port, al}", []>;
let Uses = [AX] in
def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
"out{w}\t{%ax, $port|$port, ax}", []>, OpSize16;
let Uses = [EAX] in
def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
"out{l}\t{%eax, $port|$port, eax}", []>, OpSize32;
} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from debug registers
let SchedRW = [WriteSystem] in {
def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[Not64BitMode]>;
def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[In64BitMode]>;
def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[Not64BitMode]>;
def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[In64BitMode]>;
} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from control registers
let SchedRW = [WriteSystem] in {
def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[Not64BitMode]>;
def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[In64BitMode]>;
def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[Not64BitMode]>;
def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[In64BitMode]>;
} // SchedRW
//===----------------------------------------------------------------------===//
// Segment override instruction prefixes
//let SchedRW = [WriteNop] in {
//def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
//def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
//def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
//def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
//def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
//def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
//} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from segment registers.
//
let SchedRW = [WriteMove] in {
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
let mayStore = 1 in {
def MOV16ms : I<0x8C, MRMDestMem, (outs), (ins i16mem:$dst, SEGMENT_REG:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>;
}
def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
let mayLoad = 1 in {
def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>;
}
} // SchedRW
//===----------------------------------------------------------------------===//
// Segmentation support instructions.
let SchedRW = [WriteSystem] in {
def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
let mayLoad = 1 in
def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize16, NotMemoryFoldable;
def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize16, NotMemoryFoldable;
// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
let mayLoad = 1 in
def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize32, NotMemoryFoldable;
def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize32, NotMemoryFoldable;
// i16mem operand in LAR64rm and GR32 operand in LAR64rr is not a typo.
let mayLoad = 1 in
def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
// i16mem operand in LSL32rm and GR32 operand in LSL32rr is not a typo.
let mayLoad = 1 in
def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize16, NotMemoryFoldable;
def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize16, NotMemoryFoldable;
// i16mem operand in LSL64rm and GR32 operand in LSL64rr is not a typo.
let mayLoad = 1 in
def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize32, NotMemoryFoldable;
def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize32, NotMemoryFoldable;
let mayLoad = 1 in
def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
"str{w}\t$dst", []>, TB, OpSize16;
def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
"str{l}\t$dst", []>, TB, OpSize32;
def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
"str{q}\t$dst", []>, TB;
let mayStore = 1 in
def STRm : I<0x00, MRM1m, (outs), (ins i16mem:$dst), "str{w}\t$dst", []>, TB;
def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), "ltr{w}\t$src", []>, TB, NotMemoryFoldable;
let mayLoad = 1 in
def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), "ltr{w}\t$src", []>, TB, NotMemoryFoldable;
def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), "push{w}\t{%cs|cs}", []>,
OpSize16, Requires<[Not64BitMode]>;
def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins), "push{l}\t{%cs|cs}", []>,
OpSize32, Requires<[Not64BitMode]>;
def PUSHSS16 : I<0x16, RawFrm, (outs), (ins), "push{w}\t{%ss|ss}", []>,
OpSize16, Requires<[Not64BitMode]>;
def PUSHSS32 : I<0x16, RawFrm, (outs), (ins), "push{l}\t{%ss|ss}", []>,
OpSize32, Requires<[Not64BitMode]>;
def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins), "push{w}\t{%ds|ds}", []>,
OpSize16, Requires<[Not64BitMode]>;
def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins), "push{l}\t{%ds|ds}", []>,
OpSize32, Requires<[Not64BitMode]>;
def PUSHES16 : I<0x06, RawFrm, (outs), (ins), "push{w}\t{%es|es}", []>,
OpSize16, Requires<[Not64BitMode]>;
def PUSHES32 : I<0x06, RawFrm, (outs), (ins), "push{l}\t{%es|es}", []>,
OpSize32, Requires<[Not64BitMode]>;
def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), "push{w}\t{%fs|fs}", []>,
OpSize16, TB;
def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), "push{l}\t{%fs|fs}", []>, TB,
OpSize32, Requires<[Not64BitMode]>;
def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), "push{w}\t{%gs|gs}", []>,
OpSize16, TB;
def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), "push{l}\t{%gs|gs}", []>, TB,
OpSize32, Requires<[Not64BitMode]>;
def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), "push{q}\t{%fs|fs}", []>, TB,
OpSize32, Requires<[In64BitMode]>;
def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), "push{q}\t{%gs|gs}", []>, TB,
OpSize32, Requires<[In64BitMode]>;
// No "pop cs" instruction.
def POPSS16 : I<0x17, RawFrm, (outs), (ins), "pop{w}\t{%ss|ss}", []>,
OpSize16, Requires<[Not64BitMode]>;
def POPSS32 : I<0x17, RawFrm, (outs), (ins), "pop{l}\t{%ss|ss}", []>,
OpSize32, Requires<[Not64BitMode]>;
def POPDS16 : I<0x1F, RawFrm, (outs), (ins), "pop{w}\t{%ds|ds}", []>,
OpSize16, Requires<[Not64BitMode]>;
def POPDS32 : I<0x1F, RawFrm, (outs), (ins), "pop{l}\t{%ds|ds}", []>,
OpSize32, Requires<[Not64BitMode]>;
def POPES16 : I<0x07, RawFrm, (outs), (ins), "pop{w}\t{%es|es}", []>,
OpSize16, Requires<[Not64BitMode]>;
def POPES32 : I<0x07, RawFrm, (outs), (ins), "pop{l}\t{%es|es}", []>,
OpSize32, Requires<[Not64BitMode]>;
def POPFS16 : I<0xa1, RawFrm, (outs), (ins), "pop{w}\t{%fs|fs}", []>,
OpSize16, TB;
def POPFS32 : I<0xa1, RawFrm, (outs), (ins), "pop{l}\t{%fs|fs}", []>, TB,
OpSize32, Requires<[Not64BitMode]>;
def POPFS64 : I<0xa1, RawFrm, (outs), (ins), "pop{q}\t{%fs|fs}", []>, TB,
OpSize32, Requires<[In64BitMode]>;
def POPGS16 : I<0xa9, RawFrm, (outs), (ins), "pop{w}\t{%gs|gs}", []>,
OpSize16, TB;
def POPGS32 : I<0xa9, RawFrm, (outs), (ins), "pop{l}\t{%gs|gs}", []>, TB,
OpSize32, Requires<[Not64BitMode]>;
def POPGS64 : I<0xa9, RawFrm, (outs), (ins), "pop{q}\t{%gs|gs}", []>, TB,
OpSize32, Requires<[In64BitMode]>;
def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
"lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize16,
Requires<[Not64BitMode]>;
def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
"lds{l}\t{$src, $dst|$dst, $src}", []>, OpSize32,
Requires<[Not64BitMode]>;
def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
"lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
"lss{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
"lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
"les{w}\t{$src, $dst|$dst, $src}", []>, OpSize16,
Requires<[Not64BitMode]>;
def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
"les{l}\t{$src, $dst|$dst, $src}", []>, OpSize32,
Requires<[Not64BitMode]>;
def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
"lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
"lfs{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
"lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
"lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
"lgs{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
"lgs\t{$src, $dst|$dst, $src}", []>, TB;
def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), "verr\t$seg", []>, TB, NotMemoryFoldable;
def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", []>, TB, NotMemoryFoldable;
let mayLoad = 1 in {
def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), "verr\t$seg", []>, TB, NotMemoryFoldable;
def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", []>, TB, NotMemoryFoldable;
}
} // SchedRW
//===----------------------------------------------------------------------===//
// Descriptor-table support instructions
let SchedRW = [WriteSystem] in {
def SGDT16m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
"sgdt{w}\t$dst", []>, TB, OpSize16, Requires<[Not64BitMode]>;
def SGDT32m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
"sgdt{l}\t$dst", []>, OpSize32, TB, Requires <[Not64BitMode]>;
def SGDT64m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
"sgdt{q}\t$dst", []>, TB, Requires <[In64BitMode]>;
def SIDT16m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
"sidt{w}\t$dst", []>, TB, OpSize16, Requires<[Not64BitMode]>;
def SIDT32m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
"sidt{l}\t$dst", []>, OpSize32, TB, Requires <[Not64BitMode]>;
def SIDT64m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
"sidt{q}\t$dst", []>, TB, Requires <[In64BitMode]>;
def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
"sldt{w}\t$dst", []>, TB, OpSize16;
let mayStore = 1 in
def SLDT16m : I<0x00, MRM0m, (outs), (ins i16mem:$dst),
"sldt{w}\t$dst", []>, TB;
def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
"sldt{l}\t$dst", []>, OpSize32, TB;
// LLDT is not interpreted specially in 64-bit mode because there is no sign
// extension.
def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
"sldt{q}\t$dst", []>, TB, Requires<[In64BitMode]>;
def LGDT16m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
"lgdt{w}\t$src", []>, TB, OpSize16, Requires<[Not64BitMode]>;
def LGDT32m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
"lgdt{l}\t$src", []>, OpSize32, TB, Requires<[Not64BitMode]>;
def LGDT64m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
"lgdt{q}\t$src", []>, TB, Requires<[In64BitMode]>;
def LIDT16m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
"lidt{w}\t$src", []>, TB, OpSize16, Requires<[Not64BitMode]>;
def LIDT32m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
"lidt{l}\t$src", []>, OpSize32, TB, Requires<[Not64BitMode]>;
def LIDT64m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
"lidt{q}\t$src", []>, TB, Requires<[In64BitMode]>;
def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
"lldt{w}\t$src", []>, TB, NotMemoryFoldable;
let mayLoad = 1 in
def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
"lldt{w}\t$src", []>, TB, NotMemoryFoldable;
} // SchedRW
//===----------------------------------------------------------------------===//
// Specialized register support
let SchedRW = [WriteSystem] in {
let Uses = [EAX, ECX, EDX] in
def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
let Defs = [EAX, EDX], Uses = [ECX] in
def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
let Defs = [RAX, RDX], Uses = [ECX] in
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)]>, TB;
def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
"smsw{w}\t$dst", []>, OpSize16, TB;
def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
"smsw{l}\t$dst", []>, OpSize32, TB;
// no m form encodable; use SMSW16m
def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
"smsw{q}\t$dst", []>, TB;
// For memory operands, there is only a 16-bit form
def SMSW16m : I<0x01, MRM4m, (outs), (ins i16mem:$dst),
"smsw{w}\t$dst", []>, TB;
def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
"lmsw{w}\t$src", []>, TB, NotMemoryFoldable;
let mayLoad = 1 in
def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
"lmsw{w}\t$src", []>, TB, NotMemoryFoldable;
let Defs = [EAX, EBX, ECX, EDX], Uses = [EAX, ECX] in
def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
} // SchedRW
//===----------------------------------------------------------------------===//
// Cache instructions
let SchedRW = [WriteSystem] in {
def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [(int_x86_wbinvd)]>, TB;
// wbnoinvd is like wbinvd, except without invalidation
// encoding: like wbinvd + an 0xF3 prefix
def WBNOINVD : I<0x09, RawFrm, (outs), (ins), "wbnoinvd",
[(int_x86_wbnoinvd)]>, XS,
Requires<[HasWBNOINVD]>;
} // SchedRW
//===----------------------------------------------------------------------===//
// CET instructions
// Use with caution, availability is not predicated on features.
let SchedRW = [WriteSystem] in {
let Uses = [SSP] in {
let Defs = [SSP] in {
def INCSSPD : I<0xAE, MRM5r, (outs), (ins GR32:$src), "incsspd\t$src",
[(int_x86_incsspd GR32:$src)]>, XS;
def INCSSPQ : RI<0xAE, MRM5r, (outs), (ins GR64:$src), "incsspq\t$src",
[(int_x86_incsspq GR64:$src)]>, XS;
} // Defs SSP
let Constraints = "$src = $dst" in {
def RDSSPD : I<0x1E, MRM1r, (outs GR32:$dst), (ins GR32:$src),
"rdsspd\t$dst",
[(set GR32:$dst, (int_x86_rdsspd GR32:$src))]>, XS;
def RDSSPQ : RI<0x1E, MRM1r, (outs GR64:$dst), (ins GR64:$src),
"rdsspq\t$dst",
[(set GR64:$dst, (int_x86_rdsspq GR64:$src))]>, XS;
}
let Defs = [SSP] in {
def SAVEPREVSSP : I<0x01, MRM_EA, (outs), (ins), "saveprevssp",
[(int_x86_saveprevssp)]>, XS;
def RSTORSSP : I<0x01, MRM5m, (outs), (ins i32mem:$src),
"rstorssp\t$src",
[(int_x86_rstorssp addr:$src)]>, XS;
} // Defs SSP
} // Uses SSP
def WRSSD : I<0xF6, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"wrssd\t{$src, $dst|$dst, $src}",
[(int_x86_wrssd GR32:$src, addr:$dst)]>, T8PS;
def WRSSQ : RI<0xF6, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"wrssq\t{$src, $dst|$dst, $src}",
[(int_x86_wrssq GR64:$src, addr:$dst)]>, T8PS;
def WRUSSD : I<0xF5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"wrussd\t{$src, $dst|$dst, $src}",
[(int_x86_wrussd GR32:$src, addr:$dst)]>, T8PD;
def WRUSSQ : RI<0xF5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"wrussq\t{$src, $dst|$dst, $src}",
[(int_x86_wrussq GR64:$src, addr:$dst)]>, T8PD;
let Defs = [SSP] in {
let Uses = [SSP] in {
def SETSSBSY : I<0x01, MRM_E8, (outs), (ins), "setssbsy",
[(int_x86_setssbsy)]>, XS;
} // Uses SSP
def CLRSSBSY : I<0xAE, MRM6m, (outs), (ins i32mem:$src),
"clrssbsy\t$src",
[(int_x86_clrssbsy addr:$src)]>, XS;
} // Defs SSP
} // SchedRW
let SchedRW = [WriteSystem] in {
def ENDBR64 : I<0x1E, MRM_FA, (outs), (ins), "endbr64", []>, XS;
def ENDBR32 : I<0x1E, MRM_FB, (outs), (ins), "endbr32", []>, XS;
} // SchedRW
//===----------------------------------------------------------------------===//
// XSAVE instructions
let SchedRW = [WriteSystem] in {
let Predicates = [HasXSAVE] in {
let Defs = [EDX, EAX], Uses = [ECX] in
def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
let Uses = [EDX, EAX, ECX] in
def XSETBV : I<0x01, MRM_D1, (outs), (ins),
"xsetbv",
[(int_x86_xsetbv ECX, EDX, EAX)]>, TB;
} // HasXSAVE
let Uses = [EDX, EAX] in {
def XSAVE : I<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
"xsave\t$dst",
[(int_x86_xsave addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>;
def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
"xsave64\t$dst",
[(int_x86_xsave64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaquemem:$dst),
"xrstor\t$dst",
[(int_x86_xrstor addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>;
def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaquemem:$dst),
"xrstor64\t$dst",
[(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaquemem:$dst),
"xsaveopt\t$dst",
[(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT]>;
def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaquemem:$dst),
"xsaveopt64\t$dst",
[(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT, In64BitMode]>;
def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
"xsavec\t$dst",
[(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC]>;
def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
"xsavec64\t$dst",
[(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC, In64BitMode]>;
def XSAVES : I<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
"xsaves\t$dst",
[(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
"xsaves64\t$dst",
[(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE, In64BitMode]>;
def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
"xrstors\t$dst",
[(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
"xrstors64\t$dst",
[(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES, In64BitMode]>;
} // Uses
} // SchedRW
//===----------------------------------------------------------------------===//
// VIA PadLock crypto instructions
let Defs = [RAX, RDI], Uses = [RDX, RDI], SchedRW = [WriteSystem] in
def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB;
// def : InstAlias<"xstorerng", (XSTORE)>;
let SchedRW = [WriteSystem] in {
let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
def XCRYPTECB : I<0xa7, MRM_C8, (outs), (ins), "xcryptecb", []>, TB;
def XCRYPTCBC : I<0xa7, MRM_D0, (outs), (ins), "xcryptcbc", []>, TB;
def XCRYPTCTR : I<0xa7, MRM_D8, (outs), (ins), "xcryptctr", []>, TB;
def XCRYPTCFB : I<0xa7, MRM_E0, (outs), (ins), "xcryptcfb", []>, TB;
def XCRYPTOFB : I<0xa7, MRM_E8, (outs), (ins), "xcryptofb", []>, TB;
}
let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
def XSHA1 : I<0xa6, MRM_C8, (outs), (ins), "xsha1", []>, TB;
def XSHA256 : I<0xa6, MRM_D0, (outs), (ins), "xsha256", []>, TB;
}
let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB;
} // SchedRW
/*
//==-----------------------------------------------------------------------===//
// PKU - enable protection key
let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
def WRPKRU : PseudoI<(outs), (ins GR32:$src),
[(int_x86_wrpkru GR32:$src)]>;
def RDPKRU : PseudoI<(outs GR32:$dst), (ins),
[(set GR32:$dst, (int_x86_rdpkru))]>;
}
*/
let SchedRW = [WriteSystem] in {
let Defs = [EAX, EDX], Uses = [ECX] in
def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB;
let Uses = [EAX, ECX, EDX] in
def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB;
} // SchedRW
//===----------------------------------------------------------------------===//
// FS/GS Base Instructions
let Predicates = [HasFSGSBase, In64BitMode], SchedRW = [WriteSystem] in {
def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins),
"rdfsbase{l}\t$dst",
[(set GR32:$dst, (int_x86_rdfsbase_32))]>, XS;
def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins),
"rdfsbase{q}\t$dst",
[(set GR64:$dst, (int_x86_rdfsbase_64))]>, XS;
def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins),
"rdgsbase{l}\t$dst",
[(set GR32:$dst, (int_x86_rdgsbase_32))]>, XS;
def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins),
"rdgsbase{q}\t$dst",
[(set GR64:$dst, (int_x86_rdgsbase_64))]>, XS;
def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$src),
"wrfsbase{l}\t$src",
[(int_x86_wrfsbase_32 GR32:$src)]>, XS;
def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$src),
"wrfsbase{q}\t$src",
[(int_x86_wrfsbase_64 GR64:$src)]>, XS;
def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$src),
"wrgsbase{l}\t$src",
[(int_x86_wrgsbase_32 GR32:$src)]>, XS;
def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$src),
"wrgsbase{q}\t$src",
[(int_x86_wrgsbase_64 GR64:$src)]>, XS;
}
//===----------------------------------------------------------------------===//
// INVPCID Instruction
let SchedRW = [WriteSystem] in {
def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}",
[(int_x86_invpcid GR32:$src1, addr:$src2)]>, T8PD,
Requires<[Not64BitMode, HasINVPCID]>;
def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
Requires<[In64BitMode, HasINVPCID]>;
} // SchedRW
let Predicates = [In64BitMode, HasINVPCID] in {
// The instruction can only use a 64 bit register as the register argument
// in 64 bit mode, while the intrinsic only accepts a 32 bit argument
// corresponding to it.
// The accepted values for now are 0,1,2,3 anyways (see Intel SDM -- INVCPID
// type),/ so it doesn't hurt us that one can't supply a 64 bit value here.
def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2),
(INVPCID64
(SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit),
addr:$src2)>;
}
//===----------------------------------------------------------------------===//
// SMAP Instruction
let Defs = [EFLAGS], SchedRW = [WriteSystem] in {
def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
}
//===----------------------------------------------------------------------===//
// SMX Instruction
let SchedRW = [WriteSystem] in {
let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in {
def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, TB;
} // Uses, Defs
} // SchedRW
//===----------------------------------------------------------------------===//
// TS flag control instruction.
let SchedRW = [WriteSystem] in {
def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
}
//===----------------------------------------------------------------------===//
// IF (inside EFLAGS) management instructions.
let SchedRW = [WriteSystem], Uses = [EFLAGS], Defs = [EFLAGS] in {
def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>;
}
//===----------------------------------------------------------------------===//
// RDPID Instruction
let SchedRW = [WriteSystem] in {
def RDPID32 : I<0xC7, MRM7r, (outs GR32:$dst), (ins),
"rdpid\t$dst", [(set GR32:$dst, (int_x86_rdpid))]>, XS,
Requires<[Not64BitMode, HasRDPID]>;
def RDPID64 : I<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdpid\t$dst", []>, XS,
Requires<[In64BitMode, HasRDPID]>;
} // SchedRW
let Predicates = [In64BitMode, HasRDPID] in {
// Due to silly instruction definition, we have to compensate for the
// instruction outputting a 64-bit register.
def : Pat<(int_x86_rdpid),
(EXTRACT_SUBREG (RDPID64), sub_32bit)>;
}
//===----------------------------------------------------------------------===//
// PTWRITE Instruction - Write Data to a Processor Trace Packet
let SchedRW = [WriteSystem] in {
def PTWRITEm: I<0xAE, MRM4m, (outs), (ins i32mem:$dst),
"ptwrite{l}\t$dst", [(int_x86_ptwrite32 (loadi32 addr:$dst))]>, XS,
Requires<[HasPTWRITE]>;
def PTWRITE64m : RI<0xAE, MRM4m, (outs), (ins i64mem:$dst),
"ptwrite{q}\t$dst", [(int_x86_ptwrite64 (loadi64 addr:$dst))]>, XS,
Requires<[In64BitMode, HasPTWRITE]>;
def PTWRITEr : I<0xAE, MRM4r, (outs), (ins GR32:$dst),
"ptwrite{l}\t$dst", [(int_x86_ptwrite32 GR32:$dst)]>, XS,
Requires<[HasPTWRITE]>;
def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst),
"ptwrite{q}\t$dst", [(int_x86_ptwrite64 GR64:$dst)]>, XS,
Requires<[In64BitMode, HasPTWRITE]>;
} // SchedRW
//===----------------------------------------------------------------------===//
// Platform Configuration instruction
// From ISA docs:
// "This instruction is used to execute functions for configuring platform
// features.
// EAX: Leaf function to be invoked.
// RBX/RCX/RDX: Leaf-specific purpose."
// "Successful execution of the leaf clears RAX (set to zero) and ZF, CF, PF,
// AF, OF, and SF are cleared. In case of failure, the failure reason is
// indicated in RAX with ZF set to 1 and CF, PF, AF, OF, and SF are cleared."
// Thus all these mentioned registers are considered clobbered.
let SchedRW = [WriteSystem] in {
let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX, RDX, EFLAGS] in
def PCONFIG : I<0x01, MRM_C5, (outs), (ins), "pconfig", []>, TB,
Requires<[HasPCONFIG]>;
} // SchedRW

View File

@@ -0,0 +1,60 @@
//===-- X86InstrVMX.td - TSX Instruction Set Extension -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the instructions that make up the Intel TSX instruction
// set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// TSX instructions
def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
[SDNPHasChain, SDNPSideEffect]>;
let SchedRW = [WriteSystem] in {
//let usesCustomInserter = 1 in
//def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
// "# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>,
// Requires<[HasRTM]>;
let isBranch = 1, isTerminator = 1, Defs = [EAX] in {
def XBEGIN_2 : Ii16PCRel<0xc7, MRM_F8, (outs), (ins brtarget16:$dst),
"xbegin\t$dst", []>, OpSize16;
def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget32:$dst),
"xbegin\t$dst", []>, OpSize32;
}
// Pseudo instruction to fake the definition of EAX on the fallback code path.
//let isPseudo = 1, Defs = [EAX] in {
//def XABORT_DEF : I<0, Pseudo, (outs), (ins), "# XABORT DEF", []>;
//}
def XEND : I<0x01, MRM_D5, (outs), (ins),
"xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
let Defs = [EFLAGS] in
def XTEST : I<0x01, MRM_D6, (outs), (ins),
"xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasRTM]>;
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
"xabort\t$imm",
[(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
} // SchedRW
// HLE prefixes
let SchedRW = [WriteSystem] in {
let isAsmParserOnly = 1 in {
def XACQUIRE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "xacquire", []>;
def XRELEASE_PREFIX : I<0xF3, RawFrm, (outs), (ins), "xrelease", []>;
}
} // SchedRW

View File

@@ -0,0 +1,88 @@
//===-- X86InstrVMX.td - VMX Instruction Set Extension -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the instructions that make up the Intel VMX instruction
// set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// VMX instructions
let SchedRW = [WriteSystem] in {
// 66 0F 38 80
def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
"invept\t{$src2, $src1|$src1, $src2}", []>, T8PD,
Requires<[Not64BitMode]>;
def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invept\t{$src2, $src1|$src1, $src2}", []>, T8PD,
Requires<[In64BitMode]>;
// 66 0F 38 81
def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
"invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
Requires<[Not64BitMode]>;
def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
Requires<[In64BitMode]>;
// 0F 01 C1
def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
"vmclear\t$vmcs", []>, PD;
// OF 01 D4
def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB;
// 0F 01 C2
def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
// 0F 01 C3
def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
"vmptrld\t$vmcs", []>, PS;
def VMPTRSTm : I<0xC7, MRM7m, (outs), (ins i64mem:$vmcs),
"vmptrst\t$vmcs", []>, PS;
def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
NotMemoryFoldable;
def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
NotMemoryFoldable;
let mayStore = 1 in {
def VMREAD64mr : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
NotMemoryFoldable;
def VMREAD32mr : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
NotMemoryFoldable;
} // mayStore
def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
NotMemoryFoldable;
def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
NotMemoryFoldable;
let mayLoad = 1 in {
def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
NotMemoryFoldable;
def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
NotMemoryFoldable;
} // mayLoad
// 0F 01 C4
def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
"vmxon\t$vmxon", []>, XS;
} // SchedRW

View File

@@ -0,0 +1,511 @@
//===- X86InstrVecCompiler.td - Vector Compiler Patterns ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the various vector pseudo instructions used by the
// compiler, as well as Pat patterns used during instruction selection.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// No op bitconverts
//===----------------------------------------------------------------------===//
// Bitcasts between 128-bit vector types. Return the original type since
// no instruction is needed for the conversion
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
// Bitcasts between 256-bit vector types. Return the original type since
// no instruction is needed for the conversion
def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>;
def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>;
def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>;
def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>;
def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>;
def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>;
def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>;
def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>;
def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>;
def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>;
def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>;
def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>;
def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>;
def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>;
def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>;
def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
// Bitcasts between 512-bit vector types. Return the original type since
// no instruction is needed for the conversion.
def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
//===----------------------------------------------------------------------===//
// Non-instruction patterns
//===----------------------------------------------------------------------===//
// A vector extract of the first f32/f64 position is a subregister copy
def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
(COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
(COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
(COPY_TO_REGCLASS FR32:$src, VR128)>;
// Implicitly promote a 64-bit scalar to a vector.
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
(COPY_TO_REGCLASS FR64:$src, VR128)>;
//===----------------------------------------------------------------------===//
// Subvector tricks
//===----------------------------------------------------------------------===//
// Patterns for insert_subvector/extract_subvector to/from index=0
multiclass subvector_subreg_lowering<RegisterClass subRC, ValueType subVT,
RegisterClass RC, ValueType VT,
SubRegIndex subIdx> {
def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
(subVT (EXTRACT_SUBREG RC:$src, subIdx))>;
def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
(VT (INSERT_SUBREG (IMPLICIT_DEF), subRC:$src, subIdx))>;
}
// A 128-bit subvector extract from the first 256-bit vector position is a
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
// insert to the first 256-bit vector position is a subregister copy that needs
// no instruction.
defm : subvector_subreg_lowering<VR128, v4i32, VR256, v8i32, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v4f32, VR256, v8f32, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v2i64, VR256, v4i64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>;
// A 128-bit subvector extract from the first 512-bit vector position is a
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
// insert to the first 512-bit vector position is a subregister copy that needs
// no instruction.
defm : subvector_subreg_lowering<VR128, v4i32, VR512, v16i32, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v4f32, VR512, v16f32, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v2i64, VR512, v8i64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>;
// A 128-bit subvector extract from the first 512-bit vector position is a
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
// insert to the first 512-bit vector position is a subregister copy that needs
// no instruction.
defm : subvector_subreg_lowering<VR256, v8i32, VR512, v16i32, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v8f32, VR512, v16f32, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v4i64, VR512, v8i64, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
multiclass subvector_store_lowering<string AlignedStr, string UnalignedStr,
RegisterClass RC, ValueType DstTy,
ValueType SrcTy, SubRegIndex SubIdx> {
def : Pat<(alignedstore (DstTy (extract_subvector
(SrcTy RC:$src), (iPTR 0))), addr:$dst),
(!cast<Instruction>("VMOV"#AlignedStr#"mr") addr:$dst,
(DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
def : Pat<(store (DstTy (extract_subvector
(SrcTy RC:$src), (iPTR 0))), addr:$dst),
(!cast<Instruction>("VMOV"#UnalignedStr#"mr") addr:$dst,
(DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
}
let Predicates = [HasAVX, NoVLX] in {
defm : subvector_store_lowering<"APD", "UPD", VR256X, v2f64, v4f64, sub_xmm>;
defm : subvector_store_lowering<"APS", "UPS", VR256X, v4f32, v8f32, sub_xmm>;
defm : subvector_store_lowering<"DQA", "DQU", VR256X, v2i64, v4i64, sub_xmm>;
defm : subvector_store_lowering<"DQA", "DQU", VR256X, v4i32, v8i32, sub_xmm>;
defm : subvector_store_lowering<"DQA", "DQU", VR256X, v8i16, v16i16, sub_xmm>;
defm : subvector_store_lowering<"DQA", "DQU", VR256X, v16i8, v32i8, sub_xmm>;
}
let Predicates = [HasVLX] in {
// Special patterns for storing subvector extracts of lower 128-bits
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR256X, v2f64, v4f64,
sub_xmm>;
defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR256X, v4f32, v8f32,
sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v2i64,
v4i64, sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v4i32,
v8i32, sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v8i16,
v16i16, sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v16i8,
v32i8, sub_xmm>;
// Special patterns for storing subvector extracts of lower 128-bits of 512.
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR512, v2f64, v8f64,
sub_xmm>;
defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR512, v4f32, v16f32,
sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v2i64,
v8i64, sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v4i32,
v16i32, sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v8i16,
v32i16, sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v16i8,
v64i8, sub_xmm>;
// Special patterns for storing subvector extracts of lower 256-bits of 512.
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
defm : subvector_store_lowering<"APDZ256", "UPDZ256", VR512, v4f64, v8f64,
sub_ymm>;
defm : subvector_store_lowering<"APSZ256", "UPSZ256", VR512, v8f32, v16f32,
sub_ymm>;
defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v4i64,
v8i64, sub_ymm>;
defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v8i32,
v16i32, sub_ymm>;
defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v16i16,
v32i16, sub_ymm>;
defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v32i8,
v64i8, sub_ymm>;
}
// If we're inserting into an all zeros vector, just use a plain move which
// will zero the upper bits. A post-isel hook will take care of removing
// any moves that we can prove are unnecessary.
multiclass subvec_zero_lowering<string MoveStr,
RegisterClass RC, ValueType DstTy,
ValueType SrcTy, ValueType ZeroTy,
SubRegIndex SubIdx> {
def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
(SrcTy RC:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0),
(SrcTy (!cast<Instruction>("VMOV"#MoveStr#"rr") RC:$src)), SubIdx)>;
}
let Predicates = [HasAVX, NoVLX] in {
defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, sub_xmm>;
}
let Predicates = [HasVLX] in {
defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32, sub_ymm>;
}
let Predicates = [HasAVX512, NoVLX] in {
defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32, sub_ymm>;
}
class maskzeroupper<ValueType vt, RegisterClass RC> :
PatLeaf<(vt RC:$src), [{
return isMaskZeroExtended(N);
}]>;
def maskzeroupperv1i1 : maskzeroupper<v1i1, VK1>;
def maskzeroupperv2i1 : maskzeroupper<v2i1, VK2>;
def maskzeroupperv4i1 : maskzeroupper<v4i1, VK4>;
def maskzeroupperv8i1 : maskzeroupper<v8i1, VK8>;
def maskzeroupperv16i1 : maskzeroupper<v16i1, VK16>;
def maskzeroupperv32i1 : maskzeroupper<v32i1, VK32>;
// The patterns determine if we can depend on the upper bits of a mask register
// being zeroed by the previous operation so that we can skip explicit
// zeroing.
let Predicates = [HasBWI] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
maskzeroupperv1i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK1:$src, VK32)>;
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
maskzeroupperv8i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK8:$src, VK32)>;
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
maskzeroupperv16i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK16:$src, VK32)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
maskzeroupperv1i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK1:$src, VK64)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
maskzeroupperv8i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK8:$src, VK64)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
maskzeroupperv16i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK16:$src, VK64)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
maskzeroupperv32i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK32:$src, VK64)>;
}
let Predicates = [HasAVX512] in {
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
maskzeroupperv1i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK1:$src, VK16)>;
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
maskzeroupperv8i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK8:$src, VK16)>;
}
let Predicates = [HasDQI] in {
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
maskzeroupperv1i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK1:$src, VK8)>;
}
let Predicates = [HasVLX, HasDQI] in {
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
maskzeroupperv2i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK2:$src, VK8)>;
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
maskzeroupperv4i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK4:$src, VK8)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
maskzeroupperv2i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK2:$src, VK16)>;
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
maskzeroupperv4i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK4:$src, VK16)>;
}
let Predicates = [HasBWI, HasVLX] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
maskzeroupperv2i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK2:$src, VK32)>;
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
maskzeroupperv4i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK4:$src, VK32)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
maskzeroupperv2i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK2:$src, VK64)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
maskzeroupperv4i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK4:$src, VK64)>;
}
// If the bits are not zero we have to fall back to explicitly zeroing by
// using shifts.
let Predicates = [HasAVX512] in {
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v1i1 VK1:$mask), (iPTR 0))),
(KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK1:$mask, VK16),
(i8 15)), (i8 15))>;
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v2i1 VK2:$mask), (iPTR 0))),
(KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16),
(i8 14)), (i8 14))>;
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v4i1 VK4:$mask), (iPTR 0))),
(KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16),
(i8 12)), (i8 12))>;
}
let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16),
(i8 8)), (i8 8))>;
}
let Predicates = [HasDQI] in {
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>;
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
(v1i1 VK1:$mask), (iPTR 0))),
(KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK1:$mask, VK8),
(i8 7)), (i8 7))>;
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
(v2i1 VK2:$mask), (iPTR 0))),
(KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8),
(i8 6)), (i8 6))>;
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
(v4i1 VK4:$mask), (iPTR 0))),
(KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK4:$mask, VK8),
(i8 4)), (i8 4))>;
}
let Predicates = [HasBWI] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v16i1 VK16:$mask), (iPTR 0))),
(COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK32)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v16i1 VK16:$mask), (iPTR 0))),
(COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK64)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v32i1 VK32:$mask), (iPTR 0))),
(COPY_TO_REGCLASS (KMOVDkk VK32:$mask), VK64)>;
}
let Predicates = [HasBWI, NoDQI] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32),
(i8 24)), (i8 24))>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64),
(i8 56)), (i8 56))>;
}
let Predicates = [HasBWI, HasDQI] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK32)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>;
}
let Predicates = [HasBWI, HasVLX] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v1i1 VK1:$mask), (iPTR 0))),
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK1:$mask, VK32),
(i8 31)), (i8 31))>;
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v2i1 VK2:$mask), (iPTR 0))),
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK2:$mask, VK32),
(i8 30)), (i8 30))>;
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v4i1 VK4:$mask), (iPTR 0))),
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK4:$mask, VK32),
(i8 28)), (i8 28))>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v1i1 VK1:$mask), (iPTR 0))),
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK1:$mask, VK64),
(i8 63)), (i8 63))>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v2i1 VK2:$mask), (iPTR 0))),
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK2:$mask, VK64),
(i8 62)), (i8 62))>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v4i1 VK4:$mask), (iPTR 0))),
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
(i8 60)), (i8 60))>;
}

View File

@@ -0,0 +1,446 @@
//===-- X86InstrXOP.td - XOP Instruction Set ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes XOP (eXtended OPerations)
//
//===----------------------------------------------------------------------===//
multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWritePHAdd.XMM]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>;
}
let ExeDomain = SSEPackedInt in {
defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, loadv2i64>;
defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, loadv2i64>;
defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, loadv2i64>;
defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, loadv2i64>;
defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, loadv2i64>;
defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, loadv2i64>;
defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, loadv2i64>;
defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, loadv2i64>;
defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, loadv2i64>;
defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, loadv2i64>;
defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, loadv2i64>;
defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, loadv2i64>;
defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, loadv2i64>;
defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, loadv2i64>;
defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, loadv2i64>;
}
// Scalar load 2 addr operand instructions
multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
Operand memop, ComplexPattern mem_cpat,
X86FoldableSchedWrite sched> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, XOP,
Sched<[sched.Folded, ReadAfterLd]>;
}
multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
PatFrag memop, X86FoldableSchedWrite sched> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
Sched<[sched.Folded, ReadAfterLd]>;
}
multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
PatFrag memop, X86FoldableSchedWrite sched> {
def Yrr : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (Int VR256:$src))]>, XOP, VEX_L, Sched<[sched]>;
def Yrm : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L,
Sched<[sched.Folded, ReadAfterLd]>;
}
let ExeDomain = SSEPackedSingle in {
defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
ssmem, sse_load_f32, SchedWriteFRnd.Scl>;
defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32,
SchedWriteFRnd.XMM>;
defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32,
SchedWriteFRnd.YMM>;
}
let ExeDomain = SSEPackedDouble in {
defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
sdmem, sse_load_f64, SchedWriteFRnd.Scl>;
defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64,
SchedWriteFRnd.XMM>;
defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64,
SchedWriteFRnd.YMM>;
}
multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType vt128, X86FoldableSchedWrite sched> {
def rr : IXOP<opc, MRMSrcReg4VOp3, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2))))]>,
XOP, Sched<[sched]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
(vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd]>;
def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst),
(ins i128mem:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))),
(vt128 VR128:$src2))))]>,
XOP, Sched<[sched.Folded, ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>,
XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>;
}
let ExeDomain = SSEPackedInt in {
defm VPROTB : xop3op<0x90, "vprotb", rotl, v16i8, SchedWriteVarVecShift.XMM>;
defm VPROTD : xop3op<0x92, "vprotd", rotl, v4i32, SchedWriteVarVecShift.XMM>;
defm VPROTQ : xop3op<0x93, "vprotq", rotl, v2i64, SchedWriteVarVecShift.XMM>;
defm VPROTW : xop3op<0x91, "vprotw", rotl, v8i16, SchedWriteVarVecShift.XMM>;
defm VPSHAB : xop3op<0x98, "vpshab", X86vpsha, v16i8, SchedWriteVarVecShift.XMM>;
defm VPSHAD : xop3op<0x9A, "vpshad", X86vpsha, v4i32, SchedWriteVarVecShift.XMM>;
defm VPSHAQ : xop3op<0x9B, "vpshaq", X86vpsha, v2i64, SchedWriteVarVecShift.XMM>;
defm VPSHAW : xop3op<0x99, "vpshaw", X86vpsha, v8i16, SchedWriteVarVecShift.XMM>;
defm VPSHLB : xop3op<0x94, "vpshlb", X86vpshl, v16i8, SchedWriteVarVecShift.XMM>;
defm VPSHLD : xop3op<0x96, "vpshld", X86vpshl, v4i32, SchedWriteVarVecShift.XMM>;
defm VPSHLQ : xop3op<0x97, "vpshlq", X86vpshl, v2i64, SchedWriteVarVecShift.XMM>;
defm VPSHLW : xop3op<0x95, "vpshlw", X86vpshl, v8i16, SchedWriteVarVecShift.XMM>;
}
multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType vt128, X86FoldableSchedWrite sched> {
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>,
XOP, Sched<[sched]>;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>,
XOP, Sched<[sched.Folded, ReadAfterLd]>;
}
let ExeDomain = SSEPackedInt in {
defm VPROTB : xop3opimm<0xC0, "vprotb", X86vrotli, v16i8,
SchedWriteVecShiftImm.XMM>;
defm VPROTD : xop3opimm<0xC2, "vprotd", X86vrotli, v4i32,
SchedWriteVecShiftImm.XMM>;
defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vrotli, v2i64,
SchedWriteVecShiftImm.XMM>;
defm VPROTW : xop3opimm<0xC1, "vprotw", X86vrotli, v8i16,
SchedWriteVecShiftImm.XMM>;
}
// Instruction where second source can be memory, but third must be register
multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int,
X86FoldableSchedWrite sched> {
let isCommutable = 1 in
def rr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP_4V,
Sched<[sched]>;
def rm : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
}
let ExeDomain = SSEPackedInt in {
defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd",
int_x86_xop_vpmadcswd, SchedWriteVecIMul.XMM>;
defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd",
int_x86_xop_vpmadcsswd, SchedWriteVecIMul.XMM>;
defm VPMACSWW : xop4opm2<0x95, "vpmacsww",
int_x86_xop_vpmacsww, SchedWriteVecIMul.XMM>;
defm VPMACSWD : xop4opm2<0x96, "vpmacswd",
int_x86_xop_vpmacswd, SchedWriteVecIMul.XMM>;
defm VPMACSSWW : xop4opm2<0x85, "vpmacssww",
int_x86_xop_vpmacssww, SchedWriteVecIMul.XMM>;
defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd",
int_x86_xop_vpmacsswd, SchedWriteVecIMul.XMM>;
defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql",
int_x86_xop_vpmacssdql, SchedWritePMULLD.XMM>;
defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh",
int_x86_xop_vpmacssdqh, SchedWritePMULLD.XMM>;
defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd",
int_x86_xop_vpmacssdd, SchedWritePMULLD.XMM>;
defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql",
int_x86_xop_vpmacsdql, SchedWritePMULLD.XMM>;
defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh",
int_x86_xop_vpmacsdqh, SchedWritePMULLD.XMM>;
defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd",
int_x86_xop_vpmacsdd, SchedWritePMULLD.XMM>;
}
// IFMA patterns - for cases where we can safely ignore the overflow bits from
// the multiply or easily match with existing intrinsics.
let Predicates = [HasXOP] in {
def : Pat<(v8i16 (add (mul (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
(v8i16 VR128:$src3))),
(VPMACSWWrr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(v4i32 (add (mul (v4i32 VR128:$src1), (v4i32 VR128:$src2)),
(v4i32 VR128:$src3))),
(VPMACSDDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(v2i64 (add (X86pmuldq (bc_v2i64 (X86PShufd (v4i32 VR128:$src1), (i8 -11))),
(bc_v2i64 (X86PShufd (v4i32 VR128:$src2), (i8 -11)))),
(v2i64 VR128:$src3))),
(VPMACSDQHrr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(v2i64 (add (X86pmuldq (v2i64 VR128:$src1), (v2i64 VR128:$src2)),
(v2i64 VR128:$src3))),
(VPMACSDQLrr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(v4i32 (add (X86vpmaddwd (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
(v4i32 VR128:$src3))),
(VPMADCSWDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
}
// Transforms to swizzle an immediate to help matching memory operand in first
// operand.
def CommuteVPCOMCC : SDNodeXForm<imm, [{
uint8_t Imm = N->getZExtValue() & 0x7;
Imm = X86::getSwappedVPCOMImm(Imm);
return getI8Imm(Imm, SDLoc(N));
}]>;
// Instruction where second source can be memory, third must be imm8
multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
X86FoldableSchedWrite sched> {
let ExeDomain = SSEPackedInt in { // SSE integer instructions
let isCommutable = 1 in
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, XOPCC:$cc),
!strconcat("vpcom${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
imm:$cc)))]>,
XOP_4V, Sched<[sched]>;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, XOPCC:$cc),
!strconcat("vpcom${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
(vt128 (bitconvert (loadv2i64 addr:$src2))),
imm:$cc)))]>,
XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
!strconcat("vpcom", Suffix,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V, Sched<[sched]>, NotMemoryFoldable;
let mayLoad = 1 in
def mi_alt : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat("vpcom", Suffix,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>,
NotMemoryFoldable;
}
}
def : Pat<(OpNode (bitconvert (loadv2i64 addr:$src2)),
(vt128 VR128:$src1), imm:$cc),
(!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2,
(CommuteVPCOMCC imm:$cc))>;
}
defm VPCOMB : xopvpcom<0xCC, "b", X86vpcom, v16i8, SchedWriteVecALU.XMM>;
defm VPCOMW : xopvpcom<0xCD, "w", X86vpcom, v8i16, SchedWriteVecALU.XMM>;
defm VPCOMD : xopvpcom<0xCE, "d", X86vpcom, v4i32, SchedWriteVecALU.XMM>;
defm VPCOMQ : xopvpcom<0xCF, "q", X86vpcom, v2i64, SchedWriteVecALU.XMM>;
defm VPCOMUB : xopvpcom<0xEC, "ub", X86vpcomu, v16i8, SchedWriteVecALU.XMM>;
defm VPCOMUW : xopvpcom<0xED, "uw", X86vpcomu, v8i16, SchedWriteVecALU.XMM>;
defm VPCOMUD : xopvpcom<0xEE, "ud", X86vpcomu, v4i32, SchedWriteVecALU.XMM>;
defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64, SchedWriteVecALU.XMM>;
multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType vt128, X86FoldableSchedWrite sched> {
def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 VR128:$src3))))]>,
XOP_4V, Sched<[sched]>;
def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
(vt128 VR128:$src3))))]>,
XOP_4V, Sched<[sched.Folded, ReadAfterLd,
// 128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR128:$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>;
}
let ExeDomain = SSEPackedInt in {
defm VPPERM : xop4op<0xA3, "vpperm", X86vpperm, v16i8,
SchedWriteVarShuffle.XMM>;
}
// Instruction where either second or third source can be memory
multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType VT,
X86FoldableSchedWrite sched> {
def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
(X86andnp RC:$src3, RC:$src2))))]>, XOP_4V,
Sched<[sched]>;
def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
(X86andnp (load addr:$src3), RC:$src2))))]>,
XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
(X86andnp RC:$src3, (load addr:$src2)))))]>,
XOP_4V, Sched<[sched.Folded, ReadAfterLd,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC::$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>;
}
let ExeDomain = SSEPackedInt in {
defm VPCMOV : xop4op_int<0xA2, "vpcmov", VR128, i128mem, v2i64,
SchedWriteShuffle.XMM>;
defm VPCMOVY : xop4op_int<0xA2, "vpcmov", VR256, i256mem, v4i64,
SchedWriteShuffle.YMM>, VEX_L;
}
multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
X86MemOperand intmemop, X86MemOperand fpmemop,
ValueType VT, PatFrag FPLdFrag, PatFrag IntLdFrag,
X86FoldableSchedWrite sched> {
def rr : IXOP5<Opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>,
Sched<[sched]>;
def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, RC:$src2,
(bitconvert (IntLdFrag addr:$src3)),
(i8 imm:$src4))))]>, VEX_W,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
RC:$src3, (i8 imm:$src4))))]>,
Sched<[sched.Folded, ReadAfterLd,
// fpmemop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// RC:$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[]>, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>;
}
let ExeDomain = SSEPackedDouble in {
defm VPERMIL2PD : xop_vpermil2<0x49, "vpermil2pd", VR128, i128mem, f128mem,
v2f64, loadv2f64, loadv2i64,
SchedWriteFVarShuffle.XMM>;
defm VPERMIL2PDY : xop_vpermil2<0x49, "vpermil2pd", VR256, i256mem, f256mem,
v4f64, loadv4f64, loadv4i64,
SchedWriteFVarShuffle.YMM>, VEX_L;
}
let ExeDomain = SSEPackedSingle in {
defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem,
v4f32, loadv4f32, loadv2i64,
SchedWriteFVarShuffle.XMM>;
defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem,
v8f32, loadv8f32, loadv4i64,
SchedWriteFVarShuffle.YMM>, VEX_L;
}

View File

@@ -0,0 +1,77 @@
//===-- X86PfmCounters.td - X86 Hardware Counters ----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This describes the available hardware counters for various subtargets.
//
//===----------------------------------------------------------------------===//
let SchedModel = SandyBridgeModel in {
def SBCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
def SBPort0Counter : PfmIssueCounter<SBPort0, ["uops_dispatched_port:port_0"]>;
def SBPort1Counter : PfmIssueCounter<SBPort1, ["uops_dispatched_port:port_1"]>;
def SBPort23Counter : PfmIssueCounter<SBPort23,
["uops_dispatched_port:port_2",
"uops_dispatched_port:port_3"]>;
def SBPort4Counter : PfmIssueCounter<SBPort4, ["uops_dispatched_port:port_4"]>;
def SBPort5Counter : PfmIssueCounter<SBPort5, ["uops_dispatched_port:port_5"]>;
}
let SchedModel = HaswellModel in {
def HWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
def HWPort0Counter : PfmIssueCounter<HWPort0, ["uops_dispatched_port:port_0"]>;
def HWPort1Counter : PfmIssueCounter<HWPort1, ["uops_dispatched_port:port_1"]>;
def HWPort2Counter : PfmIssueCounter<HWPort2, ["uops_dispatched_port:port_2"]>;
def HWPort3Counter : PfmIssueCounter<HWPort3, ["uops_dispatched_port:port_3"]>;
def HWPort4Counter : PfmIssueCounter<HWPort4, ["uops_dispatched_port:port_4"]>;
def HWPort5Counter : PfmIssueCounter<HWPort5, ["uops_dispatched_port:port_5"]>;
def HWPort6Counter : PfmIssueCounter<HWPort6, ["uops_dispatched_port:port_6"]>;
def HWPort7Counter : PfmIssueCounter<HWPort7, ["uops_dispatched_port:port_7"]>;
}
let SchedModel = BroadwellModel in {
def BWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
def BWPort0Counter : PfmIssueCounter<BWPort0, ["uops_executed_port:port_0"]>;
def BWPort1Counter : PfmIssueCounter<BWPort1, ["uops_executed_port:port_1"]>;
def BWPort2Counter : PfmIssueCounter<BWPort2, ["uops_executed_port:port_2"]>;
def BWPort3Counter : PfmIssueCounter<BWPort3, ["uops_executed_port:port_3"]>;
def BWPort4Counter : PfmIssueCounter<BWPort4, ["uops_executed_port:port_4"]>;
def BWPort5Counter : PfmIssueCounter<BWPort5, ["uops_executed_port:port_5"]>;
def BWPort6Counter : PfmIssueCounter<BWPort6, ["uops_executed_port:port_6"]>;
def BWPort7Counter : PfmIssueCounter<BWPort7, ["uops_executed_port:port_7"]>;
}
let SchedModel = SkylakeClientModel in {
def SKLCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
def SKLPort0Counter : PfmIssueCounter<SKLPort0, ["uops_dispatched_port:port_0"]>;
def SKLPort1Counter : PfmIssueCounter<SKLPort1, ["uops_dispatched_port:port_1"]>;
def SKLPort2Counter : PfmIssueCounter<SKLPort2, ["uops_dispatched_port:port_2"]>;
def SKLPort3Counter : PfmIssueCounter<SKLPort3, ["uops_dispatched_port:port_3"]>;
def SKLPort4Counter : PfmIssueCounter<SKLPort4, ["uops_dispatched_port:port_4"]>;
def SKLPort5Counter : PfmIssueCounter<SKLPort5, ["uops_dispatched_port:port_5"]>;
def SKLPort6Counter : PfmIssueCounter<SKLPort6, ["uops_dispatched_port:port_6"]>;
def SKLPort7Counter : PfmIssueCounter<SKLPort7, ["uops_dispatched_port:port_7"]>;
}
let SchedModel = SkylakeServerModel in {
def SKXCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
def SKXPort0Counter : PfmIssueCounter<SKXPort0, ["uops_dispatched_port:port_0"]>;
def SKXPort1Counter : PfmIssueCounter<SKXPort1, ["uops_dispatched_port:port_1"]>;
def SKXPort2Counter : PfmIssueCounter<SKXPort2, ["uops_dispatched_port:port_2"]>;
def SKXPort3Counter : PfmIssueCounter<SKXPort3, ["uops_dispatched_port:port_3"]>;
def SKXPort4Counter : PfmIssueCounter<SKXPort4, ["uops_dispatched_port:port_4"]>;
def SKXPort5Counter : PfmIssueCounter<SKXPort5, ["uops_dispatched_port:port_5"]>;
def SKXPort6Counter : PfmIssueCounter<SKXPort6, ["uops_dispatched_port:port_6"]>;
def SKXPort7Counter : PfmIssueCounter<SKXPort7, ["uops_dispatched_port:port_7"]>;
}
let SchedModel = BtVer2Model in {
def JCycleCounter : PfmCycleCounter<"cpu_clk_unhalted">;
def JFPU0Counter : PfmIssueCounter<JFPU0, ["dispatched_fpu:pipe0"]>;
def JFPU1Counter : PfmIssueCounter<JFPU1, ["dispatched_fpu:pipe1"]>;
}

View File

@@ -0,0 +1,17 @@
//=- X86RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//
/// General Purpose Registers: RAX, RCX,...
def GPRRegBank : RegisterBank<"GPR", [GR64]>;
/// Floating Point/Vector Registers
def VECRRegBank : RegisterBank<"VECR", [VR512]>;

View File

@@ -0,0 +1,591 @@
//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 Register file, defining the registers themselves,
// aliases between the registers, and the register classes built out of the
// registers.
//
//===----------------------------------------------------------------------===//
class X86Reg<string n, bits<16> Enc, list<Register> subregs = []> : Register<n> {
let Namespace = "X86";
let HWEncoding = Enc;
let SubRegs = subregs;
}
// Subregister indices.
let Namespace = "X86" in {
def sub_8bit : SubRegIndex<8>;
def sub_8bit_hi : SubRegIndex<8, 8>;
def sub_8bit_hi_phony : SubRegIndex<8, 8>;
def sub_16bit : SubRegIndex<16>;
def sub_16bit_hi : SubRegIndex<16, 16>;
def sub_32bit : SubRegIndex<32>;
def sub_xmm : SubRegIndex<128>;
def sub_ymm : SubRegIndex<256>;
}
//===----------------------------------------------------------------------===//
// Register definitions...
//
// In the register alias definitions below, we define which registers alias
// which others. We only specify which registers the small registers alias,
// because the register file generator is smart enough to figure out that
// AL aliases AX if we tell it that AX aliased AL (for example).
// Dwarf numbering is different for 32-bit and 64-bit, and there are
// variations by target as well. Currently the first entry is for X86-64,
// second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
// and debug information on X86-32/Darwin)
// 8-bit registers
// Low registers
def AL : X86Reg<"al", 0>;
def DL : X86Reg<"dl", 2>;
def CL : X86Reg<"cl", 1>;
def BL : X86Reg<"bl", 3>;
// High registers. On x86-64, these cannot be used in any instruction
// with a REX prefix.
def AH : X86Reg<"ah", 4>;
def DH : X86Reg<"dh", 6>;
def CH : X86Reg<"ch", 5>;
def BH : X86Reg<"bh", 7>;
// X86-64 only, requires REX.
let CostPerUse = 1 in {
def SIL : X86Reg<"sil", 6>;
def DIL : X86Reg<"dil", 7>;
def BPL : X86Reg<"bpl", 5>;
def SPL : X86Reg<"spl", 4>;
def R8B : X86Reg<"r8b", 8>;
def R9B : X86Reg<"r9b", 9>;
def R10B : X86Reg<"r10b", 10>;
def R11B : X86Reg<"r11b", 11>;
def R12B : X86Reg<"r12b", 12>;
def R13B : X86Reg<"r13b", 13>;
def R14B : X86Reg<"r14b", 14>;
def R15B : X86Reg<"r15b", 15>;
}
let isArtificial = 1 in {
// High byte of the low 16 bits of the super-register:
def SIH : X86Reg<"", -1>;
def DIH : X86Reg<"", -1>;
def BPH : X86Reg<"", -1>;
def SPH : X86Reg<"", -1>;
def R8BH : X86Reg<"", -1>;
def R9BH : X86Reg<"", -1>;
def R10BH : X86Reg<"", -1>;
def R11BH : X86Reg<"", -1>;
def R12BH : X86Reg<"", -1>;
def R13BH : X86Reg<"", -1>;
def R14BH : X86Reg<"", -1>;
def R15BH : X86Reg<"", -1>;
// High word of the low 32 bits of the super-register:
def HAX : X86Reg<"", -1>;
def HDX : X86Reg<"", -1>;
def HCX : X86Reg<"", -1>;
def HBX : X86Reg<"", -1>;
def HSI : X86Reg<"", -1>;
def HDI : X86Reg<"", -1>;
def HBP : X86Reg<"", -1>;
def HSP : X86Reg<"", -1>;
def HIP : X86Reg<"", -1>;
def R8WH : X86Reg<"", -1>;
def R9WH : X86Reg<"", -1>;
def R10WH : X86Reg<"", -1>;
def R11WH : X86Reg<"", -1>;
def R12WH : X86Reg<"", -1>;
def R13WH : X86Reg<"", -1>;
def R14WH : X86Reg<"", -1>;
def R15WH : X86Reg<"", -1>;
}
// 16-bit registers
let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in {
def AX : X86Reg<"ax", 0, [AL,AH]>;
def DX : X86Reg<"dx", 2, [DL,DH]>;
def CX : X86Reg<"cx", 1, [CL,CH]>;
def BX : X86Reg<"bx", 3, [BL,BH]>;
}
let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CoveredBySubRegs = 1 in {
def SI : X86Reg<"si", 6, [SIL,SIH]>;
def DI : X86Reg<"di", 7, [DIL,DIH]>;
def BP : X86Reg<"bp", 5, [BPL,BPH]>;
def SP : X86Reg<"sp", 4, [SPL,SPH]>;
}
def IP : X86Reg<"ip", 0>;
// X86-64 only, requires REX.
let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CostPerUse = 1,
CoveredBySubRegs = 1 in {
def R8W : X86Reg<"r8w", 8, [R8B,R8BH]>;
def R9W : X86Reg<"r9w", 9, [R9B,R9BH]>;
def R10W : X86Reg<"r10w", 10, [R10B,R10BH]>;
def R11W : X86Reg<"r11w", 11, [R11B,R11BH]>;
def R12W : X86Reg<"r12w", 12, [R12B,R12BH]>;
def R13W : X86Reg<"r13w", 13, [R13B,R13BH]>;
def R14W : X86Reg<"r14w", 14, [R14B,R14BH]>;
def R15W : X86Reg<"r15w", 15, [R15B,R15BH]>;
}
// 32-bit registers
let SubRegIndices = [sub_16bit, sub_16bit_hi], CoveredBySubRegs = 1 in {
def EAX : X86Reg<"eax", 0, [AX, HAX]>, DwarfRegNum<[-2, 0, 0]>;
def EDX : X86Reg<"edx", 2, [DX, HDX]>, DwarfRegNum<[-2, 2, 2]>;
def ECX : X86Reg<"ecx", 1, [CX, HCX]>, DwarfRegNum<[-2, 1, 1]>;
def EBX : X86Reg<"ebx", 3, [BX, HBX]>, DwarfRegNum<[-2, 3, 3]>;
def ESI : X86Reg<"esi", 6, [SI, HSI]>, DwarfRegNum<[-2, 6, 6]>;
def EDI : X86Reg<"edi", 7, [DI, HDI]>, DwarfRegNum<[-2, 7, 7]>;
def EBP : X86Reg<"ebp", 5, [BP, HBP]>, DwarfRegNum<[-2, 4, 5]>;
def ESP : X86Reg<"esp", 4, [SP, HSP]>, DwarfRegNum<[-2, 5, 4]>;
def EIP : X86Reg<"eip", 0, [IP, HIP]>, DwarfRegNum<[-2, 8, 8]>;
}
// X86-64 only, requires REX
let SubRegIndices = [sub_16bit, sub_16bit_hi], CostPerUse = 1,
CoveredBySubRegs = 1 in {
def R8D : X86Reg<"r8d", 8, [R8W,R8WH]>;
def R9D : X86Reg<"r9d", 9, [R9W,R9WH]>;
def R10D : X86Reg<"r10d", 10, [R10W,R10WH]>;
def R11D : X86Reg<"r11d", 11, [R11W,R11WH]>;
def R12D : X86Reg<"r12d", 12, [R12W,R12WH]>;
def R13D : X86Reg<"r13d", 13, [R13W,R13WH]>;
def R14D : X86Reg<"r14d", 14, [R14W,R14WH]>;
def R15D : X86Reg<"r15d", 15, [R15W,R15WH]>;
}
// 64-bit registers, X86-64 only
let SubRegIndices = [sub_32bit] in {
def RAX : X86Reg<"rax", 0, [EAX]>, DwarfRegNum<[0, -2, -2]>;
def RDX : X86Reg<"rdx", 2, [EDX]>, DwarfRegNum<[1, -2, -2]>;
def RCX : X86Reg<"rcx", 1, [ECX]>, DwarfRegNum<[2, -2, -2]>;
def RBX : X86Reg<"rbx", 3, [EBX]>, DwarfRegNum<[3, -2, -2]>;
def RSI : X86Reg<"rsi", 6, [ESI]>, DwarfRegNum<[4, -2, -2]>;
def RDI : X86Reg<"rdi", 7, [EDI]>, DwarfRegNum<[5, -2, -2]>;
def RBP : X86Reg<"rbp", 5, [EBP]>, DwarfRegNum<[6, -2, -2]>;
def RSP : X86Reg<"rsp", 4, [ESP]>, DwarfRegNum<[7, -2, -2]>;
// These also require REX.
let CostPerUse = 1 in {
def R8 : X86Reg<"r8", 8, [R8D]>, DwarfRegNum<[ 8, -2, -2]>;
def R9 : X86Reg<"r9", 9, [R9D]>, DwarfRegNum<[ 9, -2, -2]>;
def R10 : X86Reg<"r10", 10, [R10D]>, DwarfRegNum<[10, -2, -2]>;
def R11 : X86Reg<"r11", 11, [R11D]>, DwarfRegNum<[11, -2, -2]>;
def R12 : X86Reg<"r12", 12, [R12D]>, DwarfRegNum<[12, -2, -2]>;
def R13 : X86Reg<"r13", 13, [R13D]>, DwarfRegNum<[13, -2, -2]>;
def R14 : X86Reg<"r14", 14, [R14D]>, DwarfRegNum<[14, -2, -2]>;
def R15 : X86Reg<"r15", 15, [R15D]>, DwarfRegNum<[15, -2, -2]>;
def RIP : X86Reg<"rip", 0, [EIP]>, DwarfRegNum<[16, -2, -2]>;
}}
// MMX Registers. These are actually aliased to ST0 .. ST7
def MM0 : X86Reg<"mm0", 0>, DwarfRegNum<[41, 29, 29]>;
def MM1 : X86Reg<"mm1", 1>, DwarfRegNum<[42, 30, 30]>;
def MM2 : X86Reg<"mm2", 2>, DwarfRegNum<[43, 31, 31]>;
def MM3 : X86Reg<"mm3", 3>, DwarfRegNum<[44, 32, 32]>;
def MM4 : X86Reg<"mm4", 4>, DwarfRegNum<[45, 33, 33]>;
def MM5 : X86Reg<"mm5", 5>, DwarfRegNum<[46, 34, 34]>;
def MM6 : X86Reg<"mm6", 6>, DwarfRegNum<[47, 35, 35]>;
def MM7 : X86Reg<"mm7", 7>, DwarfRegNum<[48, 36, 36]>;
// Pseudo Floating Point registers
def FP0 : X86Reg<"fp0", 0>;
def FP1 : X86Reg<"fp1", 0>;
def FP2 : X86Reg<"fp2", 0>;
def FP3 : X86Reg<"fp3", 0>;
def FP4 : X86Reg<"fp4", 0>;
def FP5 : X86Reg<"fp5", 0>;
def FP6 : X86Reg<"fp6", 0>;
def FP7 : X86Reg<"fp7", 0>;
// XMM Registers, used by the various SSE instruction set extensions.
def XMM0: X86Reg<"xmm0", 0>, DwarfRegNum<[17, 21, 21]>;
def XMM1: X86Reg<"xmm1", 1>, DwarfRegNum<[18, 22, 22]>;
def XMM2: X86Reg<"xmm2", 2>, DwarfRegNum<[19, 23, 23]>;
def XMM3: X86Reg<"xmm3", 3>, DwarfRegNum<[20, 24, 24]>;
def XMM4: X86Reg<"xmm4", 4>, DwarfRegNum<[21, 25, 25]>;
def XMM5: X86Reg<"xmm5", 5>, DwarfRegNum<[22, 26, 26]>;
def XMM6: X86Reg<"xmm6", 6>, DwarfRegNum<[23, 27, 27]>;
def XMM7: X86Reg<"xmm7", 7>, DwarfRegNum<[24, 28, 28]>;
// X86-64 only
let CostPerUse = 1 in {
def XMM8: X86Reg<"xmm8", 8>, DwarfRegNum<[25, -2, -2]>;
def XMM9: X86Reg<"xmm9", 9>, DwarfRegNum<[26, -2, -2]>;
def XMM10: X86Reg<"xmm10", 10>, DwarfRegNum<[27, -2, -2]>;
def XMM11: X86Reg<"xmm11", 11>, DwarfRegNum<[28, -2, -2]>;
def XMM12: X86Reg<"xmm12", 12>, DwarfRegNum<[29, -2, -2]>;
def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>;
def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>;
def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>;
def XMM16: X86Reg<"xmm16", 16>, DwarfRegNum<[67, -2, -2]>;
def XMM17: X86Reg<"xmm17", 17>, DwarfRegNum<[68, -2, -2]>;
def XMM18: X86Reg<"xmm18", 18>, DwarfRegNum<[69, -2, -2]>;
def XMM19: X86Reg<"xmm19", 19>, DwarfRegNum<[70, -2, -2]>;
def XMM20: X86Reg<"xmm20", 20>, DwarfRegNum<[71, -2, -2]>;
def XMM21: X86Reg<"xmm21", 21>, DwarfRegNum<[72, -2, -2]>;
def XMM22: X86Reg<"xmm22", 22>, DwarfRegNum<[73, -2, -2]>;
def XMM23: X86Reg<"xmm23", 23>, DwarfRegNum<[74, -2, -2]>;
def XMM24: X86Reg<"xmm24", 24>, DwarfRegNum<[75, -2, -2]>;
def XMM25: X86Reg<"xmm25", 25>, DwarfRegNum<[76, -2, -2]>;
def XMM26: X86Reg<"xmm26", 26>, DwarfRegNum<[77, -2, -2]>;
def XMM27: X86Reg<"xmm27", 27>, DwarfRegNum<[78, -2, -2]>;
def XMM28: X86Reg<"xmm28", 28>, DwarfRegNum<[79, -2, -2]>;
def XMM29: X86Reg<"xmm29", 29>, DwarfRegNum<[80, -2, -2]>;
def XMM30: X86Reg<"xmm30", 30>, DwarfRegNum<[81, -2, -2]>;
def XMM31: X86Reg<"xmm31", 31>, DwarfRegNum<[82, -2, -2]>;
} // CostPerUse
// YMM0-15 registers, used by AVX instructions and
// YMM16-31 registers, used by AVX-512 instructions.
let SubRegIndices = [sub_xmm] in {
foreach Index = 0-31 in {
def YMM#Index : X86Reg<"ymm"#Index, Index, [!cast<X86Reg>("XMM"#Index)]>,
DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
}
}
// ZMM Registers, used by AVX-512 instructions.
let SubRegIndices = [sub_ymm] in {
foreach Index = 0-31 in {
def ZMM#Index : X86Reg<"zmm"#Index, Index, [!cast<X86Reg>("YMM"#Index)]>,
DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
}
}
// Mask Registers, used by AVX-512 instructions.
def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, 93, 93]>;
def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, 94, 94]>;
def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, 95, 95]>;
def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, 96, 96]>;
def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, 97, 97]>;
def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, 98, 98]>;
def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, 99, 99]>;
def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, 100, 100]>;
// Floating point stack registers. These don't map one-to-one to the FP
// pseudo registers, but we still mark them as aliasing FP registers. That
// way both kinds can be live without exceeding the stack depth. ST registers
// are only live around inline assembly.
def ST0 : X86Reg<"st(0)", 0>, DwarfRegNum<[33, 12, 11]>;
def ST1 : X86Reg<"st(1)", 1>, DwarfRegNum<[34, 13, 12]>;
def ST2 : X86Reg<"st(2)", 2>, DwarfRegNum<[35, 14, 13]>;
def ST3 : X86Reg<"st(3)", 3>, DwarfRegNum<[36, 15, 14]>;
def ST4 : X86Reg<"st(4)", 4>, DwarfRegNum<[37, 16, 15]>;
def ST5 : X86Reg<"st(5)", 5>, DwarfRegNum<[38, 17, 16]>;
def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]>;
def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
// Floating-point status word
def FPSW : X86Reg<"fpsw", 0>;
// Status flags register.
//
// Note that some flags that are commonly thought of as part of the status
// flags register are modeled separately. Typically this is due to instructions
// reading and updating those flags independently of all the others. We don't
// want to create false dependencies between these instructions and so we use
// a separate register to model them.
def EFLAGS : X86Reg<"flags", 0>;
// The direction flag.
def DF : X86Reg<"dirflag", 0>;
// Segment registers
def CS : X86Reg<"cs", 1>;
def DS : X86Reg<"ds", 3>;
def SS : X86Reg<"ss", 2>;
def ES : X86Reg<"es", 0>;
def FS : X86Reg<"fs", 4>;
def GS : X86Reg<"gs", 5>;
// Debug registers
def DR0 : X86Reg<"dr0", 0>;
def DR1 : X86Reg<"dr1", 1>;
def DR2 : X86Reg<"dr2", 2>;
def DR3 : X86Reg<"dr3", 3>;
def DR4 : X86Reg<"dr4", 4>;
def DR5 : X86Reg<"dr5", 5>;
def DR6 : X86Reg<"dr6", 6>;
def DR7 : X86Reg<"dr7", 7>;
def DR8 : X86Reg<"dr8", 8>;
def DR9 : X86Reg<"dr9", 9>;
def DR10 : X86Reg<"dr10", 10>;
def DR11 : X86Reg<"dr11", 11>;
def DR12 : X86Reg<"dr12", 12>;
def DR13 : X86Reg<"dr13", 13>;
def DR14 : X86Reg<"dr14", 14>;
def DR15 : X86Reg<"dr15", 15>;
// Control registers
def CR0 : X86Reg<"cr0", 0>;
def CR1 : X86Reg<"cr1", 1>;
def CR2 : X86Reg<"cr2", 2>;
def CR3 : X86Reg<"cr3", 3>;
def CR4 : X86Reg<"cr4", 4>;
def CR5 : X86Reg<"cr5", 5>;
def CR6 : X86Reg<"cr6", 6>;
def CR7 : X86Reg<"cr7", 7>;
def CR8 : X86Reg<"cr8", 8>;
def CR9 : X86Reg<"cr9", 9>;
def CR10 : X86Reg<"cr10", 10>;
def CR11 : X86Reg<"cr11", 11>;
def CR12 : X86Reg<"cr12", 12>;
def CR13 : X86Reg<"cr13", 13>;
def CR14 : X86Reg<"cr14", 14>;
def CR15 : X86Reg<"cr15", 15>;
// Pseudo index registers
def EIZ : X86Reg<"eiz", 4>;
def RIZ : X86Reg<"riz", 4>;
// Bound registers, used in MPX instructions
def BND0 : X86Reg<"bnd0", 0>;
def BND1 : X86Reg<"bnd1", 1>;
def BND2 : X86Reg<"bnd2", 2>;
def BND3 : X86Reg<"bnd3", 3>;
// CET registers - Shadow Stack Pointer
def SSP : X86Reg<"ssp", 0>;
//===----------------------------------------------------------------------===//
// Register Class Definitions... now that we have all of the pieces, define the
// top-level register classes. The order specified in the register list is
// implicitly defined to be the register allocation order.
//
// List call-clobbered registers before callee-save registers. RBX, RBP, (and
// R12, R13, R14, and R15 for X86-64) are callee-save registers.
// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
// R8B, ... R15B.
// Allocate R12 and R13 last, as these require an extra byte when
// encoded in x86_64 instructions.
// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
// 64-bit mode. The main complication is that they cannot be encoded in an
// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
// cannot be encoded.
def GR8 : RegisterClass<"X86", [i8], 8,
(add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
let AltOrders = [(sub GR8, AH, BH, CH, DH)];
let AltOrderSelect = [{
return MF.getSubtarget<X86Subtarget>().is64Bit();
}];
}
let isAllocatable = 0 in
def GRH8 : RegisterClass<"X86", [i8], 8,
(add SIH, DIH, BPH, SPH, R8BH, R9BH, R10BH, R11BH,
R12BH, R13BH, R14BH, R15BH)>;
def GR16 : RegisterClass<"X86", [i16], 16,
(add AX, CX, DX, SI, DI, BX, BP, SP,
R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)>;
let isAllocatable = 0 in
def GRH16 : RegisterClass<"X86", [i16], 16,
(add HAX, HCX, HDX, HSI, HDI, HBX, HBP, HSP, HIP,
R8WH, R9WH, R10WH, R11WH, R12WH, R13WH, R14WH,
R15WH)>;
def GR32 : RegisterClass<"X86", [i32], 32,
(add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>;
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
// RIP isn't really a register and it can't be used anywhere except in an
// address, but it doesn't cause trouble.
// FIXME: it *does* cause trouble - CheckBaseRegAndIndexReg() has extra
// tests because of the inclusion of RIP in this register class.
def GR64 : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
// Segment registers for use by MOV instructions (and others) that have a
// segment register as one operand. Always contain a 16-bit segment
// descriptor.
def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>;
// Debug registers.
def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 15)>;
// Control registers.
def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>;
// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers
// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
// and GR64_ABCD are classes for registers that support 8-bit h-register
// operations.
def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>;
def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>;
def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>;
def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>;
def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
R8, R9, R11, RIP)>;
def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
R8, R9, R10, R11, RIP)>;
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
(add AL, CL, DL, AH, CH, DH, BL, BH)> {
let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
let AltOrderSelect = [{
return MF.getSubtarget<X86Subtarget>().is64Bit();
}];
}
// GR16_NOREX - GR16 registers which do not require a REX prefix.
def GR16_NOREX : RegisterClass<"X86", [i16], 16,
(add AX, CX, DX, SI, DI, BX, BP, SP)>;
// GR32_NOREX - GR32 registers which do not require a REX prefix.
def GR32_NOREX : RegisterClass<"X86", [i32], 32,
(add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)>;
// GR64_NOREX - GR64 registers which do not require a REX prefix.
def GR64_NOREX : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>;
// GR32_NOSP - GR32 registers except ESP.
def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>;
// GR64_NOSP - GR64 registers except RSP (and RIP).
def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)>;
// GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
// ESP.
def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
(and GR32_NOREX, GR32_NOSP)>;
// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
(and GR64_NOREX, GR64_NOSP)>;
// Register classes used for ABIs that use 32-bit address accesses,
// while using the whole x84_64 ISA.
// In such cases, it is fine to use RIP as we are sure the 32 high
// bits are not set. We do not need variants for NOSP as RIP is not
// allowed there.
// RIP is not spilled anywhere for now, so stick to 32-bit alignment
// to save on memory space.
// FIXME: We could allow all 64bit registers, but we would need
// something to check that the 32 high bits are not set,
// which we do not have right now.
def LOW32_ADDR_ACCESS : RegisterClass<"X86", [i32], 32, (add GR32, RIP)>;
// When RBP is used as a base pointer in a 32-bit addresses environment,
// this is also safe to use the full register to access addresses.
// Since RBP will never be spilled, stick to a 32 alignment to save
// on memory consumption.
def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32,
(add LOW32_ADDR_ACCESS, RBP)>;
// A class to support the 'A' assembler constraint: [ER]AX then [ER]DX.
def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>;
// Scalar SSE2 floating point registers.
def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
// FIXME: This sets up the floating point register files as though they are f64
// values, though they really are f80 values. This will cause us to spill
// values as 64-bit quantities instead of 80-bit quantities, which is much much
// faster on common hardware. In reality, this should be controlled by a
// command line option or something.
def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
// Floating point stack registers (these are not allocatable by the
// register allocator - the floating point stackifier is responsible
// for transforming FPn allocations to STn registers)
def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
let isAllocatable = 0;
}
// Generic vector registers: VR64 and VR128.
// Ensure that float types are declared first - only float is legal on SSE1.
def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
128, (add FR32)>;
def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 15)>;
// Special classes that help the assembly parser choose some alternate
// instructions to favor 2-byte VEX encodings.
def VR128L : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
128, (sequence "XMM%u", 0, 7)>;
def VR128H : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
128, (sequence "XMM%u", 8, 15)>;
def VR256L : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 7)>;
def VR256H : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 8, 15)>;
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
let CopyCost = -1; // Don't allow copying of status registers.
let isAllocatable = 0;
}
def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
let CopyCost = -1; // Don't allow copying of status registers.
let isAllocatable = 0;
}
def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> {
let CopyCost = -1; // Don't allow copying of status registers.
let isAllocatable = 0;
}
// AVX-512 vector/mask registers.
def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
512, (sequence "ZMM%u", 0, 31)>;
// Scalar AVX-512 floating point registers.
def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
// Extended VR128 and VR256 for AVX-512 instructions
def VR128X : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
128, (add FR32X)>;
def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 31)>;
// Mask registers
def VK1 : RegisterClass<"X86", [v1i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;}
def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;}
def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;}
def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;}
def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
def VK1WM : RegisterClass<"X86", [v1i1], 16, (sub VK1, K0)> {let Size = 16;}
def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;}
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
// Bound registers
def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,49 @@
//===-- X86SchedPredicates.td - X86 Scheduling Predicates --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines scheduling predicate definitions that are common to
// all X86 subtargets.
//
//===----------------------------------------------------------------------===//
// A predicate used to identify dependency-breaking instructions that clear the
// content of the destination register. Note that this predicate only checks if
// input registers are the same. This predicate doesn't make any assumptions on
// the expected instruction opcodes, because different processors may implement
// different zero-idioms.
def ZeroIdiomPredicate : CheckSameRegOperand<1, 2>;
// A predicate used to check if an instruction is a LEA, and if it uses all
// three source operands: base, index, and offset.
def IsThreeOperandsLEAPredicate: CheckAll<[
CheckOpcode<[LEA32r, LEA64r, LEA64_32r, LEA16r]>,
// isRegOperand(Base)
CheckIsRegOperand<1>,
CheckNot<CheckInvalidRegOperand<1>>,
// isRegOperand(Index)
CheckIsRegOperand<3>,
CheckNot<CheckInvalidRegOperand<3>>,
// hasLEAOffset(Offset)
CheckAny<[
CheckAll<[
CheckIsImmOperand<4>,
CheckNot<CheckZeroOperand<4>>
]>,
CheckNonPortable<"MI.getOperand(4).isGlobal()">
]>
]>;
// This predicate evaluates to true only if the input machine instruction is a
// 3-operands LEA. Tablegen automatically generates a new method for it in
// X86GenInstrInfo.
def IsThreeOperandsLEAFn :
TIIPredicate<"X86", "isThreeOperandsLEA", IsThreeOperandsLEAPredicate>;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,661 @@
//===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// InstrSchedModel annotations for out-of-order CPUs.
// Instructions with folded loads need to read the memory operand immediately,
// but other register operands don't have to be read until the load is ready.
// These operands are marked with ReadAfterLd.
def ReadAfterLd : SchedRead;
// Instructions with both a load and a store folded are modeled as a folded
// load + WriteRMW.
def WriteRMW : SchedWrite;
// Helper to set SchedWrite ExePorts/Latency/ResourceCycles/NumMicroOps.
multiclass X86WriteRes<SchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res, int UOps> {
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let ResourceCycles = Res;
let NumMicroOps = UOps;
}
}
// Most instructions can fold loads, so almost every SchedWrite comes in two
// variants: With and without a folded load.
// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
// with a folded load.
class X86FoldableSchedWrite : SchedWrite {
// The SchedWrite to use when a load is folded into the instruction.
SchedWrite Folded;
}
// Multiclass that produces a linked pair of SchedWrites.
multiclass X86SchedWritePair {
// Register-Memory operation.
def Ld : SchedWrite;
// Register-Register operation.
def NAME : X86FoldableSchedWrite {
let Folded = !cast<SchedWrite>(NAME#"Ld");
}
}
// Helpers to mark SchedWrites as unsupported.
multiclass X86WriteResUnsupported<SchedWrite SchedRW> {
let Unsupported = 1 in {
def : WriteRes<SchedRW, []>;
}
}
multiclass X86WriteResPairUnsupported<X86FoldableSchedWrite SchedRW> {
let Unsupported = 1 in {
def : WriteRes<SchedRW, []>;
def : WriteRes<SchedRW.Folded, []>;
}
}
// Multiclass that wraps X86FoldableSchedWrite for each vector width.
class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
X86FoldableSchedWrite s128,
X86FoldableSchedWrite s256,
X86FoldableSchedWrite s512> {
X86FoldableSchedWrite Scl = sScl; // Scalar float/double operations.
X86FoldableSchedWrite MMX = sScl; // MMX operations.
X86FoldableSchedWrite XMM = s128; // XMM operations.
X86FoldableSchedWrite YMM = s256; // YMM operations.
X86FoldableSchedWrite ZMM = s512; // ZMM operations.
}
// Multiclass that wraps X86SchedWriteWidths for each fp vector type.
class X86SchedWriteSizes<X86SchedWriteWidths sPS,
X86SchedWriteWidths sPD> {
X86SchedWriteWidths PS = sPS;
X86SchedWriteWidths PD = sPD;
}
// Multiclass that wraps move/load/store triple for a vector width.
class X86SchedWriteMoveLS<SchedWrite MoveRR,
SchedWrite LoadRM,
SchedWrite StoreMR> {
SchedWrite RR = MoveRR;
SchedWrite RM = LoadRM;
SchedWrite MR = StoreMR;
}
// Multiclass that wraps X86SchedWriteMoveLS for each vector width.
class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
X86SchedWriteMoveLS s128,
X86SchedWriteMoveLS s256,
X86SchedWriteMoveLS s512> {
X86SchedWriteMoveLS Scl = sScl; // Scalar float/double operations.
X86SchedWriteMoveLS MMX = sScl; // MMX operations.
X86SchedWriteMoveLS XMM = s128; // XMM operations.
X86SchedWriteMoveLS YMM = s256; // YMM operations.
X86SchedWriteMoveLS ZMM = s512; // ZMM operations.
}
// Loads, stores, and moves, not folded with other operations.
def WriteLoad : SchedWrite;
def WriteStore : SchedWrite;
def WriteStoreNT : SchedWrite;
def WriteMove : SchedWrite;
// Arithmetic.
defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
defm WriteADC : X86SchedWritePair; // Integer ALU + flags op.
def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
def WriteADCRMW : WriteSequence<[WriteADCLd, WriteStore]>;
defm WriteIMul : X86SchedWritePair; // Integer multiplication.
defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
def WriteIMulH : SchedWrite; // Integer multiplication, high part.
def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
// Integer division.
defm WriteDiv8 : X86SchedWritePair;
defm WriteDiv16 : X86SchedWritePair;
defm WriteDiv32 : X86SchedWritePair;
defm WriteDiv64 : X86SchedWritePair;
defm WriteIDiv8 : X86SchedWritePair;
defm WriteIDiv16 : X86SchedWritePair;
defm WriteIDiv32 : X86SchedWritePair;
defm WriteIDiv64 : X86SchedWritePair;
defm WriteBSF : X86SchedWritePair; // Bit scan forward.
defm WriteBSR : X86SchedWritePair; // Bit scan reverse.
defm WritePOPCNT : X86SchedWritePair; // Bit population count.
defm WriteLZCNT : X86SchedWritePair; // Leading zero count.
defm WriteTZCNT : X86SchedWritePair; // Trailing zero count.
defm WriteCMOV : X86SchedWritePair; // Conditional move.
defm WriteCMOV2 : X86SchedWritePair; // Conditional (CF + ZF flag) move.
def WriteFCMOV : SchedWrite; // X87 conditional move.
def WriteSETCC : SchedWrite; // Set register based on condition code.
def WriteSETCCStore : SchedWrite;
def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support
// Integer shifts and rotates.
defm WriteShift : X86SchedWritePair;
// Double shift instructions.
def WriteSHDrri : SchedWrite;
def WriteSHDrrcl : SchedWrite;
def WriteSHDmri : SchedWrite;
def WriteSHDmrcl : SchedWrite;
// BMI1 BEXTR, BMI2 BZHI
defm WriteBEXTR : X86SchedWritePair;
defm WriteBZHI : X86SchedWritePair;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
def WriteZero : SchedWrite;
// Branches don't produce values, so they have no latency, but they still
// consume resources. Indirect branches can fold loads.
defm WriteJump : X86SchedWritePair;
// Floating point. This covers both scalar and vector operations.
def WriteFLD0 : SchedWrite;
def WriteFLD1 : SchedWrite;
def WriteFLDC : SchedWrite;
def WriteFLoad : SchedWrite;
def WriteFLoadX : SchedWrite;
def WriteFLoadY : SchedWrite;
def WriteFMaskedLoad : SchedWrite;
def WriteFMaskedLoadY : SchedWrite;
def WriteFStore : SchedWrite;
def WriteFStoreX : SchedWrite;
def WriteFStoreY : SchedWrite;
def WriteFStoreNT : SchedWrite;
def WriteFStoreNTX : SchedWrite;
def WriteFStoreNTY : SchedWrite;
def WriteFMaskedStore : SchedWrite;
def WriteFMaskedStoreY : SchedWrite;
def WriteFMove : SchedWrite;
def WriteFMoveX : SchedWrite;
def WriteFMoveY : SchedWrite;
defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM).
defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM).
defm WriteFAddZ : X86SchedWritePair; // Floating point add/sub (ZMM).
defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub.
defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM).
defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM).
defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM).
defm WriteFCmp : X86SchedWritePair; // Floating point compare.
defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM).
defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM).
defm WriteFCmpZ : X86SchedWritePair; // Floating point compare (ZMM).
defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare.
defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM).
defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM).
defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM).
defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM).
defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM).
defm WriteFMulZ : X86SchedWritePair; // Floating point multiplication (YMM).
defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication.
defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM).
defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM).
defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM).
defm WriteFDiv : X86SchedWritePair; // Floating point division.
defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM).
defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM).
defm WriteFDivZ : X86SchedWritePair; // Floating point division (ZMM).
defm WriteFDiv64 : X86SchedWritePair; // Floating point double division.
defm WriteFDiv64X : X86SchedWritePair; // Floating point double division (XMM).
defm WriteFDiv64Y : X86SchedWritePair; // Floating point double division (YMM).
defm WriteFDiv64Z : X86SchedWritePair; // Floating point double division (ZMM).
defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM).
defm WriteFSqrt64 : X86SchedWritePair; // Floating point double square root.
defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM).
defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM).
defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM).
defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root.
defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate.
defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM).
defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM).
defm WriteFRcpZ : X86SchedWritePair; // Floating point reciprocal estimate (ZMM).
defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM).
defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM).
defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM).
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM).
defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM).
defm WriteFMAZ : X86SchedWritePair; // Fused Multiply Add (ZMM).
defm WriteDPPD : X86SchedWritePair; // Floating point double dot product.
defm WriteDPPS : X86SchedWritePair; // Floating point single dot product.
defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM).
defm WriteDPPSZ : X86SchedWritePair; // Floating point single dot product (ZMM).
defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
defm WriteFRnd : X86SchedWritePair; // Floating point rounding.
defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM).
defm WriteFRndZ : X86SchedWritePair; // Floating point rounding (ZMM).
defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM).
defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM).
defm WriteFTest : X86SchedWritePair; // Floating point TEST instructions.
defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM).
defm WriteFTestZ : X86SchedWritePair; // Floating point TEST instructions (ZMM).
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM).
defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM).
defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM).
defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM).
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM).
defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM).
defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM).
defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM).
// FMA Scheduling helper class.
class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Horizontal Add/Sub (float and integer)
defm WriteFHAdd : X86SchedWritePair;
defm WriteFHAddY : X86SchedWritePair;
defm WriteFHAddZ : X86SchedWritePair;
defm WritePHAdd : X86SchedWritePair;
defm WritePHAddX : X86SchedWritePair;
defm WritePHAddY : X86SchedWritePair;
defm WritePHAddZ : X86SchedWritePair;
// Vector integer operations.
def WriteVecLoad : SchedWrite;
def WriteVecLoadX : SchedWrite;
def WriteVecLoadY : SchedWrite;
def WriteVecLoadNT : SchedWrite;
def WriteVecLoadNTY : SchedWrite;
def WriteVecMaskedLoad : SchedWrite;
def WriteVecMaskedLoadY : SchedWrite;
def WriteVecStore : SchedWrite;
def WriteVecStoreX : SchedWrite;
def WriteVecStoreY : SchedWrite;
def WriteVecStoreNT : SchedWrite;
def WriteVecStoreNTY : SchedWrite;
def WriteVecMaskedStore : SchedWrite;
def WriteVecMaskedStoreY : SchedWrite;
def WriteVecMove : SchedWrite;
def WriteVecMoveX : SchedWrite;
def WriteVecMoveY : SchedWrite;
def WriteVecMoveToGpr : SchedWrite;
def WriteVecMoveFromGpr : SchedWrite;
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).
defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM).
defm WriteVecALUZ : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM).
defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM).
defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM).
defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM).
defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions.
defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM).
defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM).
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default).
defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM).
defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM).
defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM).
defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default).
defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM).
defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM).
defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM).
defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply (default).
defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM).
defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM).
defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM).
defm WritePMULLD : X86SchedWritePair; // Vector PMULLD.
defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM).
defm WritePMULLDZ : X86SchedWritePair; // Vector PMULLD (ZMM).
defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM).
defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM).
defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM).
defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles.
defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM).
defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM).
defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM).
defm WriteBlend : X86SchedWritePair; // Vector blends.
defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM).
defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM).
defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM).
defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM).
defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM).
defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM).
defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM).
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM).
defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM).
defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
// Vector insert/extract operations.
defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
def WriteVecExtract : SchedWrite; // Extract vector element to gpr.
def WriteVecExtractSt : SchedWrite; // Extract vector element and store.
// MOVMSK operations.
def WriteFMOVMSK : SchedWrite;
def WriteVecMOVMSK : SchedWrite;
def WriteVecMOVMSKY : SchedWrite;
def WriteMMXMOVMSK : SchedWrite;
// Conversion between integer and float.
defm WriteCvtSD2I : X86SchedWritePair; // Double -> Integer.
defm WriteCvtPD2I : X86SchedWritePair; // Double -> Integer (XMM).
defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM).
defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM).
defm WriteCvtSS2I : X86SchedWritePair; // Float -> Integer.
defm WriteCvtPS2I : X86SchedWritePair; // Float -> Integer (XMM).
defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM).
defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM).
defm WriteCvtI2SD : X86SchedWritePair; // Integer -> Double.
defm WriteCvtI2PD : X86SchedWritePair; // Integer -> Double (XMM).
defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM).
defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM).
defm WriteCvtI2SS : X86SchedWritePair; // Integer -> Float.
defm WriteCvtI2PS : X86SchedWritePair; // Integer -> Float (XMM).
defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM).
defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM).
defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion.
defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM).
defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM).
defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM).
defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion.
defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM).
defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM).
defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM).
defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion.
defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM).
defm WriteCvtPH2PSZ : X86SchedWritePair; // Half -> Float size conversion (ZMM).
def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion.
def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM).
def WriteCvtPS2PHZ : SchedWrite; // // Float -> Half size conversion (ZMM).
def WriteCvtPS2PHSt : SchedWrite; // // Float -> Half + store size conversion.
def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM).
def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
// CRC32 instruction.
defm WriteCRC32 : X86SchedWritePair;
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
defm WritePCmpIStrM : X86SchedWritePair;
// Packed Compare Explicit Length Strings, Return Mask
defm WritePCmpEStrM : X86SchedWritePair;
// Packed Compare Implicit Length Strings, Return Index
defm WritePCmpIStrI : X86SchedWritePair;
// Packed Compare Explicit Length Strings, Return Index
defm WritePCmpEStrI : X86SchedWritePair;
// AES instructions.
defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption.
defm WriteAESIMC : X86SchedWritePair; // InvMixColumn.
defm WriteAESKeyGen : X86SchedWritePair; // Key Generation.
// Carry-less multiplication instructions.
defm WriteCLMul : X86SchedWritePair;
// EMMS/FEMMS
def WriteEMMS : SchedWrite;
// Load/store MXCSR
def WriteLDMXCSR : SchedWrite;
def WriteSTMXCSR : SchedWrite;
// Catch-all for expensive system instructions.
def WriteSystem : SchedWrite;
// AVX2.
defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles.
defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles.
defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM).
defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM).
// Old microcoded instructions that nobody use.
def WriteMicrocoded : SchedWrite;
// Fence instructions.
def WriteFence : SchedWrite;
// Nop, not very useful expect it provides a model for nops!
def WriteNop : SchedWrite;
// Move/Load/Store wrappers.
def WriteFMoveLS
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStore>;
def WriteFMoveLSX
: X86SchedWriteMoveLS<WriteFMoveX, WriteFLoadX, WriteFStoreX>;
def WriteFMoveLSY
: X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreY>;
def SchedWriteFMoveLS
: X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
WriteFMoveLSY, WriteFMoveLSY>;
def WriteFMoveLSNT
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
def WriteFMoveLSNTX
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>;
def WriteFMoveLSNTY
: X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>;
def SchedWriteFMoveLSNT
: X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX,
WriteFMoveLSNTY, WriteFMoveLSNTY>;
def WriteVecMoveLS
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
def WriteVecMoveLSX
: X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadX, WriteVecStoreX>;
def WriteVecMoveLSY
: X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadY, WriteVecStoreY>;
def SchedWriteVecMoveLS
: X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
WriteVecMoveLSY, WriteVecMoveLSY>;
def WriteVecMoveLSNT
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
def WriteVecMoveLSNTX
: X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>;
def WriteVecMoveLSNTY
: X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>;
def SchedWriteVecMoveLSNT
: X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
// Vector width wrappers.
def SchedWriteFAdd
: X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>;
def SchedWriteFAdd64
: X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Z>;
def SchedWriteFHAdd
: X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddZ>;
def SchedWriteFCmp
: X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpZ>;
def SchedWriteFCmp64
: X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Z>;
def SchedWriteFMul
: X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulZ>;
def SchedWriteFMul64
: X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Z>;
def SchedWriteFMA
: X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAZ>;
def SchedWriteDPPD
: X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>;
def SchedWriteDPPS
: X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSZ>;
def SchedWriteFDiv
: X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>;
def SchedWriteFDiv64
: X86SchedWriteWidths<WriteFDiv64, WriteFDiv64X, WriteFDiv64Y, WriteFDiv64Z>;
def SchedWriteFSqrt
: X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX,
WriteFSqrtY, WriteFSqrtZ>;
def SchedWriteFSqrt64
: X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X,
WriteFSqrt64Y, WriteFSqrt64Z>;
def SchedWriteFRcp
: X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpZ>;
def SchedWriteFRsqrt
: X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtZ>;
def SchedWriteFRnd
: X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndZ>;
def SchedWriteFLogic
: X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicZ>;
def SchedWriteFTest
: X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestZ>;
def SchedWriteFShuffle
: X86SchedWriteWidths<WriteFShuffle, WriteFShuffle,
WriteFShuffleY, WriteFShuffleZ>;
def SchedWriteFVarShuffle
: X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle,
WriteFVarShuffleY, WriteFVarShuffleZ>;
def SchedWriteFBlend
: X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendZ>;
def SchedWriteFVarBlend
: X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
WriteFVarBlendY, WriteFVarBlendZ>;
def SchedWriteCvtDQ2PD
: X86SchedWriteWidths<WriteCvtI2SD, WriteCvtI2PD,
WriteCvtI2PDY, WriteCvtI2PDZ>;
def SchedWriteCvtDQ2PS
: X86SchedWriteWidths<WriteCvtI2SS, WriteCvtI2PS,
WriteCvtI2PSY, WriteCvtI2PSZ>;
def SchedWriteCvtPD2DQ
: X86SchedWriteWidths<WriteCvtSD2I, WriteCvtPD2I,
WriteCvtPD2IY, WriteCvtPD2IZ>;
def SchedWriteCvtPS2DQ
: X86SchedWriteWidths<WriteCvtSS2I, WriteCvtPS2I,
WriteCvtPS2IY, WriteCvtPS2IZ>;
def SchedWriteCvtPS2PD
: X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD,
WriteCvtPS2PDY, WriteCvtPS2PDZ>;
def SchedWriteCvtPD2PS
: X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS,
WriteCvtPD2PSY, WriteCvtPD2PSZ>;
def SchedWriteVecALU
: X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUZ>;
def SchedWritePHAdd
: X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddZ>;
def SchedWriteVecLogic
: X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX,
WriteVecLogicY, WriteVecLogicZ>;
def SchedWriteVecTest
: X86SchedWriteWidths<WriteVecTest, WriteVecTest,
WriteVecTestY, WriteVecTestZ>;
def SchedWriteVecShift
: X86SchedWriteWidths<WriteVecShift, WriteVecShiftX,
WriteVecShiftY, WriteVecShiftZ>;
def SchedWriteVecShiftImm
: X86SchedWriteWidths<WriteVecShiftImm, WriteVecShiftImmX,
WriteVecShiftImmY, WriteVecShiftImmZ>;
def SchedWriteVarVecShift
: X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift,
WriteVarVecShiftY, WriteVarVecShiftZ>;
def SchedWriteVecIMul
: X86SchedWriteWidths<WriteVecIMul, WriteVecIMulX,
WriteVecIMulY, WriteVecIMulZ>;
def SchedWritePMULLD
: X86SchedWriteWidths<WritePMULLD, WritePMULLD,
WritePMULLDY, WritePMULLDZ>;
def SchedWriteMPSAD
: X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
WriteMPSADY, WriteMPSADZ>;
def SchedWritePSADBW
: X86SchedWriteWidths<WritePSADBW, WritePSADBWX,
WritePSADBWY, WritePSADBWZ>;
def SchedWriteShuffle
: X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
WriteShuffleY, WriteShuffleZ>;
def SchedWriteVarShuffle
: X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX,
WriteVarShuffleY, WriteVarShuffleZ>;
def SchedWriteBlend
: X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendZ>;
def SchedWriteVarBlend
: X86SchedWriteWidths<WriteVarBlend, WriteVarBlend,
WriteVarBlendY, WriteVarBlendZ>;
// Vector size wrappers.
def SchedWriteFAddSizes
: X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd64>;
def SchedWriteFCmpSizes
: X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp64>;
def SchedWriteFMulSizes
: X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>;
def SchedWriteFDivSizes
: X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv64>;
def SchedWriteFSqrtSizes
: X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>;
def SchedWriteFLogicSizes
: X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic>;
def SchedWriteFShuffleSizes
: X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle>;
//===----------------------------------------------------------------------===//
// Generic Processor Scheduler Models.
// IssueWidth is analogous to the number of decode units. Core and its
// descendants, including Nehalem and SandyBridge have 4 decoders.
// Resources beyond the decoder operate on micro-ops and are buffered
// so adjacent micro-ops don't directly compete.
//
// MicroOpBufferSize > 1 indicates that RAW dependencies can be
// decoded in the same cycle. The value 32 is a reasonably arbitrary
// number of in-flight instructions.
//
// HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef
// indicates high latency opcodes. Alternatively, InstrItinData
// entries may be included here to define specific operand
// latencies. Since these latencies are not used for pipeline hazards,
// they do not need to be exact.
//
// The GenericX86Model contains no instruction schedules
// and disables PostRAScheduler.
class GenericX86Model : SchedMachineModel {
let IssueWidth = 4;
let MicroOpBufferSize = 32;
let LoadLatency = 4;
let HighLatency = 10;
let PostRAScheduler = 0;
let CompleteModel = 0;
}
def GenericModel : GenericX86Model;
// Define a model with the PostRAScheduler enabled.
def GenericPostRAModel : GenericX86Model {
let PostRAScheduler = 1;
}

View File

@@ -0,0 +1,917 @@
//===- X86ScheduleAtom.td - X86 Atom Scheduling Definitions -*- tablegen -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the schedule class data for the Intel Atom
// in order (Saltwell-32nm/Bonnell-45nm) processors.
//
//===----------------------------------------------------------------------===//
//
// Scheduling information derived from the "Intel 64 and IA32 Architectures
// Optimization Reference Manual", Chapter 13, Section 4.
// Atom machine model.
def AtomModel : SchedMachineModel {
let IssueWidth = 2; // Allows 2 instructions per scheduling group.
let MicroOpBufferSize = 0; // In-order execution, always hide latency.
let LoadLatency = 3; // Expected cycles, may be overridden.
let HighLatency = 30;// Expected, may be overridden.
// On the Atom, the throughput for taken branches is 2 cycles. For small
// simple loops, expand by a small factor to hide the backedge cost.
let LoopMicroOpBufferSize = 10;
let PostRAScheduler = 1;
let CompleteModel = 0;
}
let SchedModel = AtomModel in {
// Functional Units
def AtomPort0 : ProcResource<1>; // ALU: ALU0, shift/rotate, load/store
// SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
def AtomPort1 : ProcResource<1>; // ALU: ALU1, bit processing, jump, and LEA
// SIMD/FP: SIMD ALU, FP Adder
def AtomPort01 : ProcResGroup<[AtomPort0, AtomPort1]>;
// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when dispatched by the schedulers.
// This multiclass defines the resource usage for variants with and without
// folded loads.
multiclass AtomWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> RRPorts,
list<ProcResourceKind> RMPorts,
int RRLat = 1, int RMLat = 1,
list<int> RRRes = [1],
list<int> RMRes = [1]> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, RRPorts> {
let Latency = RRLat;
let ResourceCycles = RRRes;
}
// Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
// latency.
def : WriteRes<SchedRW.Folded, RMPorts> {
let Latency = RMLat;
let ResourceCycles = RMRes;
}
}
// A folded store needs a cycle on Port0 for the store data.
def : WriteRes<WriteRMW, [AtomPort0]>;
////////////////////////////////////////////////////////////////////////////////
// Arithmetic.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteADC, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [AtomPort0], 1, [1], 1>;
defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
defm : AtomWriteResPair<WriteDiv32, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
defm : AtomWriteResPair<WriteDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
defm : AtomWriteResPair<WriteIDiv8, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteIDiv16, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteIDiv32, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteIDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
defm : X86WriteResPairUnsupported<WriteCRC32>;
defm : AtomWriteResPair<WriteCMOV, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteCMOV2, [AtomPort01], [AtomPort0]>;
defm : X86WriteRes<WriteFCMOV, [AtomPort01], 9, [9], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [AtomPort01]>;
def : WriteRes<WriteSETCCStore, [AtomPort01]> {
let Latency = 2;
let ResourceCycles = [2];
}
def : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
let Latency = 2;
let ResourceCycles = [2];
}
def : WriteRes<WriteBitTest,[AtomPort01]>;
defm : X86WriteResUnsupported<WriteIMulH>;
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [AtomPort1]>;
def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> {
let Latency = 8;
let ResourceCycles = [8];
}
def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>;
def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> {
let Latency = 6;
let ResourceCycles = [6];
}
def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>;
def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
let Latency = 14;
let ResourceCycles = [14];
}
def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
IMUL64rmi8, IMUL64rmi32)>;
// Bit counts.
defm : AtomWriteResPair<WriteBSF, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
defm : AtomWriteResPair<WriteBSR, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
defm : X86WriteResPairUnsupported<WritePOPCNT>;
defm : X86WriteResPairUnsupported<WriteLZCNT>;
defm : X86WriteResPairUnsupported<WriteTZCNT>;
// BMI1 BEXTR, BMI2 BZHI
defm : X86WriteResPairUnsupported<WriteBEXTR>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
defm : X86WriteRes<WriteSHDrri, [AtomPort01], 2, [2], 1>;
defm : X86WriteRes<WriteSHDrrcl,[AtomPort01], 2, [2], 1>;
defm : X86WriteRes<WriteSHDmri, [AtomPort01], 4, [4], 1>;
defm : X86WriteRes<WriteSHDmrcl,[AtomPort01], 4, [4], 1>;
////////////////////////////////////////////////////////////////////////////////
// Loads, stores, and moves, not folded with other operations.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteLoad, [AtomPort0]>;
def : WriteRes<WriteStore, [AtomPort0]>;
def : WriteRes<WriteStoreNT, [AtomPort0]>;
def : WriteRes<WriteMove, [AtomPort01]>;
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
////////////////////////////////////////////////////////////////////////////////
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteZero, []>;
////////////////////////////////////////////////////////////////////////////////
// Branches don't produce values, so they have no latency, but they still
// consume resources. Indirect branches can fold loads.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteJump, [AtomPort1], [AtomPort1]>;
////////////////////////////////////////////////////////////////////////////////
// Special case scheduling classes.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteSystem, [AtomPort01]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [AtomPort01]> { let Latency = 100; }
def : WriteRes<WriteFence, [AtomPort0]>;
// Nops don't have dependencies, so there's no actual latency, but we set this
// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
def : WriteRes<WriteNop, [AtomPort01]>;
////////////////////////////////////////////////////////////////////////////////
// Floating point. This covers both scalar and vector operations.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteRes<WriteFLD0, [AtomPort01], 1, [1], 1>;
defm : X86WriteRes<WriteFLD1, [AtomPort01], 6, [6], 1>;
def : WriteRes<WriteFLoad, [AtomPort0]>;
def : WriteRes<WriteFLoadX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteFLoadY>;
defm : X86WriteResUnsupported<WriteFMaskedLoad>;
defm : X86WriteResUnsupported<WriteFMaskedLoadY>;
def : WriteRes<WriteFStore, [AtomPort0]>;
def : WriteRes<WriteFStoreX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteFStoreY>;
def : WriteRes<WriteFStoreNT, [AtomPort0]>;
def : WriteRes<WriteFStoreNTX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteFStoreNTY>;
defm : X86WriteResUnsupported<WriteFMaskedStore>;
defm : X86WriteResUnsupported<WriteFMaskedStoreY>;
def : WriteRes<WriteFMove, [AtomPort01]>;
def : WriteRes<WriteFMoveX, [AtomPort01]>;
defm : X86WriteResUnsupported<WriteFMoveY>;
defm : X86WriteRes<WriteEMMS, [AtomPort01], 5, [5], 1>;
defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAddX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFAddY>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : AtomWriteResPair<WriteFAdd64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAdd64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : X86WriteResPairUnsupported<WriteFAdd64Y>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : AtomWriteResPair<WriteFCmp, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmpX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFCmpY>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : AtomWriteResPair<WriteFCmp64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmp64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : X86WriteResPairUnsupported<WriteFCmp64Y>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFMulY>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : AtomWriteResPair<WriteFMul64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul64X, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : X86WriteResPairUnsupported<WriteFMul64Y>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRcpX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : X86WriteResPairUnsupported<WriteFRcpY>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRsqrtX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFDivX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
defm : X86WriteResPairUnsupported<WriteFDivY>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
defm : AtomWriteResPair<WriteFDiv64, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteFDiv64X, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFSqrtX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
defm : X86WriteResPairUnsupported<WriteFSqrtY>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
defm : AtomWriteResPair<WriteFSqrt64, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteFSqrt64X, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : AtomWriteResPair<WriteFSqrt80, [AtomPort01], [AtomPort01], 71, 71, [71], [71]>;
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
defm : AtomWriteResPair<WriteFRnd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFRndY>;
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>;
defm : X86WriteResPairUnsupported<WriteFLogicY>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : AtomWriteResPair<WriteFTest, [AtomPort01], [AtomPort0]>;
defm : X86WriteResPairUnsupported<WriteFTestY>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;
defm : X86WriteResPairUnsupported<WriteFShuffleY>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : X86WriteResPairUnsupported<WriteFMA>;
defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : X86WriteResPairUnsupported<WriteDPPD>;
defm : X86WriteResPairUnsupported<WriteDPPS>;
defm : X86WriteResPairUnsupported<WriteDPPSY>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : X86WriteResPairUnsupported<WriteFBlend>;
defm : X86WriteResPairUnsupported<WriteFBlendY>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : X86WriteResPairUnsupported<WriteFVarBlend>;
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
////////////////////////////////////////////////////////////////////////////////
// Conversions.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteCvtSS2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteCvtPS2I, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IY>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : AtomWriteResPair<WriteCvtSD2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteCvtPD2I, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IY>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : AtomWriteResPair<WriteCvtI2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtI2PS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : AtomWriteResPair<WriteCvtI2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtI2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDY>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : AtomWriteResPair<WriteCvtSS2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtPS2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDY>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : AtomWriteResPair<WriteCvtSD2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtPD2PS, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PH>;
defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
////////////////////////////////////////////////////////////////////////////////
// Vector integer operations.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteVecLoad, [AtomPort0]>;
def : WriteRes<WriteVecLoadX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecLoadY>;
def : WriteRes<WriteVecLoadNT, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecLoadNTY>;
defm : X86WriteResUnsupported<WriteVecMaskedLoad>;
defm : X86WriteResUnsupported<WriteVecMaskedLoadY>;
def : WriteRes<WriteVecStore, [AtomPort0]>;
def : WriteRes<WriteVecStoreX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecStoreY>;
def : WriteRes<WriteVecStoreNT, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecStoreNTY>;
def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecMaskedStoreY>;
def : WriteRes<WriteVecMove, [AtomPort0]>;
def : WriteRes<WriteVecMoveX, [AtomPort01]>;
defm : X86WriteResUnsupported<WriteVecMoveY>;
defm : X86WriteRes<WriteVecMoveToGpr, [AtomPort0], 3, [3], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [AtomPort0], 1, [1], 1>;
defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecALUX, [AtomPort01], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteVecALUY>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogicX, [AtomPort01], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicY>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : AtomWriteResPair<WriteVecTest, [AtomPort01], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteVecTestY>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
defm : AtomWriteResPair<WriteVecShiftX, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
defm : X86WriteResPairUnsupported<WriteVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : AtomWriteResPair<WriteVecShiftImm, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
defm : AtomWriteResPair<WriteVecShiftImmX, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteVecIMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteVecIMulY>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : X86WriteResPairUnsupported<WritePMULLD>;
defm : X86WriteResPairUnsupported<WritePMULLDY>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : X86WriteResPairUnsupported<WritePHMINPOS>;
defm : X86WriteResPairUnsupported<WriteMPSAD>;
defm : X86WriteResPairUnsupported<WriteMPSADY>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : AtomWriteResPair<WritePSADBW, [AtomPort01], [AtomPort01], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WritePSADBWX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WritePSADBWY>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : AtomWriteResPair<WriteShuffle, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteShuffleX, [AtomPort0], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteShuffleY>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : AtomWriteResPair<WriteVarShuffle, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVarShuffleX, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : X86WriteResPairUnsupported<WriteBlend>;
defm : X86WriteResPairUnsupported<WriteBlendY>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : X86WriteResPairUnsupported<WriteVarBlend>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarVecShift>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
////////////////////////////////////////////////////////////////////////////////
// Vector insert/extract operations.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteVecInsert, [AtomPort0], [AtomPort0], 1, 1>;
def : WriteRes<WriteVecExtract, [AtomPort0]>;
def : WriteRes<WriteVecExtractSt, [AtomPort0]>;
////////////////////////////////////////////////////////////////////////////////
// SSE42 String instructions.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteResPairUnsupported<WritePCmpIStrI>;
defm : X86WriteResPairUnsupported<WritePCmpIStrM>;
defm : X86WriteResPairUnsupported<WritePCmpEStrI>;
defm : X86WriteResPairUnsupported<WritePCmpEStrM>;
////////////////////////////////////////////////////////////////////////////////
// MOVMSK Instructions.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteFMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
def : WriteRes<WriteVecMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
def : WriteRes<WriteMMXMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
////////////////////////////////////////////////////////////////////////////////
// AES instructions.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteResPairUnsupported<WriteAESIMC>;
defm : X86WriteResPairUnsupported<WriteAESKeyGen>;
defm : X86WriteResPairUnsupported<WriteAESDecEnc>;
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteFHAddY, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 3, 4, [3], [4]>;
defm : AtomWriteResPair<WritePHAddX, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WritePHAddY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteResPairUnsupported<WriteCLMul>;
////////////////////////////////////////////////////////////////////////////////
// Load/store MXCSR.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteLDMXCSR, [AtomPort01]> { let Latency = 5; let ResourceCycles = [5]; }
def : WriteRes<WriteSTMXCSR, [AtomPort01]> { let Latency = 15; let ResourceCycles = [15]; }
////////////////////////////////////////////////////////////////////////////////
// Special Cases.
////////////////////////////////////////////////////////////////////////////////
// Port0
def AtomWrite0_1 : SchedWriteRes<[AtomPort0]> {
let Latency = 1;
let ResourceCycles = [1];
}
def : InstRW<[AtomWrite0_1], (instrs FXAM, LD_Frr,
MOVSX64rr32)>;
def : SchedAlias<WriteALURMW, AtomWrite0_1>;
def : SchedAlias<WriteADCRMW, AtomWrite0_1>;
def : InstRW<[AtomWrite0_1], (instregex "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m",
"MOV(S|Z)X(32|64)rr(8|8_NOREX|16)")>;
def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> {
let Latency = 5;
let ResourceCycles = [5];
}
def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)")>;
// Port1
def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
let Latency = 1;
let ResourceCycles = [1];
}
def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>;
def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r",
"BT(C|R|S)?(16|32|64)(rr|ri8)")>;
def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {
let Latency = 5;
let ResourceCycles = [5];
}
def : InstRW<[AtomWrite1_5], (instrs MMX_CVTPI2PSirr, MMX_CVTPI2PSirm,
MMX_CVTPS2PIirr, MMX_CVTTPS2PIirr)>;
// Port0 and Port1
def AtomWrite0_1_1 : SchedWriteRes<[AtomPort0, AtomPort1]> {
let Latency = 1;
let ResourceCycles = [1, 1];
}
def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r,
POP16rmr, POP32rmr, POP64rmr,
PUSH16r, PUSH32r, PUSH64r,
PUSHi16, PUSHi32,
PUSH16rmr, PUSH32rmr, PUSH64rmr,
PUSH16i8, PUSH32i8, PUSH64i8, PUSH64i32,
XCH_F)>;
def : InstRW<[AtomWrite0_1_1], (instregex "RETI(L|Q|W)$",
"IRET(16|32|64)?")>;
def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
let Latency = 5;
let ResourceCycles = [5, 5];
}
def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIirm, MMX_CVTTPS2PIirm)>;
def : InstRW<[AtomWrite0_1_5], (instregex "ILD_F(16|32|64)")>;
// Port0 or Port1
def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
let Latency = 1;
let ResourceCycles = [1];
}
def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, WAIT,
LFENCE,
STOSB, STOSL, STOSQ, STOSW,
MOVSSrr, MOVSSrr_REV,
PSLLDQri, PSRLDQri)>;
def : InstRW<[AtomWrite01_1], (instregex "MMX_PACK(SSDW|SSWB|USWB)irr",
"MMX_PUNPCKH(BW|DQ|WD)irr")>;
def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
let Latency = 2;
let ResourceCycles = [2];
}
def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
PUSH16rmm, PUSH32rmm, PUSH64rmm,
LODSB, LODSL, LODSQ, LODSW,
SCASB, SCASL, SCASQ, SCASW)>;
def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
"PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
"XADD(8|16|32|64)rr",
"XCHG(8|16|32|64)(ar|rr)",
"(ST|ISTT)_F(P)?(16|32|64)?(m|rr)",
"MMX_P(ADD|SUB)Qirr",
"MOV(S|Z)X16rr8",
"MOV(UPS|UPD|DQU)mr",
"MASKMOVDQU(64)?",
"P(ADD|SUB)Qrr")>;
def AtomWrite01_3 : SchedWriteRes<[AtomPort01]> {
let Latency = 3;
let ResourceCycles = [3];
}
def : InstRW<[AtomWrite01_3], (instrs CLD, LDDQUrm,
CMPSB, CMPSL, CMPSQ, CMPSW,
MOVSB, MOVSL, MOVSQ, MOVSW,
POP16rmm, POP32rmm, POP64rmm)>;
def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm",
"XCHG(8|16|32|64)rm",
"PH(ADD|SUB)Drr",
"MOV(S|Z)X16rm8",
"MMX_P(ADD|SUB)Qirm",
"MOV(UPS|UPD|DQU)rm",
"P(ADD|SUB)Qrm")>;
def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> {
let Latency = 4;
let ResourceCycles = [4];
}
def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO,
JCXZ, JECXZ, JRCXZ,
LD_F80m)>;
def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm",
"(MMX_)?PEXTRWrr(_REV)?")>;
def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> {
let Latency = 5;
let ResourceCycles = [5];
}
def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, ST_FP80m)>;
def : InstRW<[AtomWrite01_5], (instregex "MMX_PH(ADD|SUB)S?Wrr")>;
def AtomWrite01_6 : SchedWriteRes<[AtomPort01]> {
let Latency = 6;
let ResourceCycles = [6];
}
def : InstRW<[AtomWrite01_6], (instrs CMPXCHG8rm, INTO, XLAT,
SHLD16rrCL, SHRD16rrCL,
SHLD16rri8, SHRD16rri8,
SHLD16mrCL, SHRD16mrCL,
SHLD16mri8, SHRD16mri8)>;
def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr",
"IST_F(P)?(16|32|64)?m",
"MMX_PH(ADD|SUB)S?Wrm")>;
def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
let Latency = 7;
let ResourceCycles = [7];
}
def : InstRW<[AtomWrite01_7], (instrs AAD8i8)>;
def AtomWrite01_8 : SchedWriteRes<[AtomPort01]> {
let Latency = 8;
let ResourceCycles = [8];
}
def : InstRW<[AtomWrite01_8], (instrs LOOPE,
PUSHA16, PUSHA32,
SHLD64rrCL, SHRD64rrCL,
FNSTCW16m)>;
def AtomWrite01_9 : SchedWriteRes<[AtomPort01]> {
let Latency = 9;
let ResourceCycles = [9];
}
def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr,
POPA16, POPA32,
PUSHF16, PUSHF32, PUSHF64,
SHLD64mrCL, SHRD64mrCL,
SHLD64mri8, SHRD64mri8,
SHLD64rri8, SHRD64rri8,
CMPXCHG8rr)>;
def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F",
"(U)?COMIS(D|S)rr",
"CVT(T)?SS2SI64rr(_Int)?")>;
def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
let Latency = 10;
let ResourceCycles = [10];
}
def : SchedAlias<WriteFLDC, AtomWrite01_10>;
def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
"CVT(T)?SS2SI64rm(_Int)?")>;
def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
let Latency = 11;
let ResourceCycles = [11];
}
def : InstRW<[AtomWrite01_11], (instrs BOUNDS16rm, BOUNDS32rm)>;
def : InstRW<[AtomWrite01_11], (instregex "BT(C|R|S)(16|32|64)mr")>;
def AtomWrite01_13 : SchedWriteRes<[AtomPort01]> {
let Latency = 13;
let ResourceCycles = [13];
}
def : InstRW<[AtomWrite01_13], (instrs AAA, AAS)>;
def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> {
let Latency = 14;
let ResourceCycles = [14];
}
def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> {
let Latency = 15;
let ResourceCycles = [15];
}
def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr)>;
def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> {
let Latency = 17;
let ResourceCycles = [17];
}
def : InstRW<[AtomWrite01_17], (instrs LOOPNE, PAUSE)>;
def AtomWrite01_18 : SchedWriteRes<[AtomPort01]> {
let Latency = 18;
let ResourceCycles = [18];
}
def : InstRW<[AtomWrite01_18], (instrs CMPXCHG8B, DAA, LOOP)>;
def AtomWrite01_20 : SchedWriteRes<[AtomPort01]> {
let Latency = 20;
let ResourceCycles = [20];
}
def : InstRW<[AtomWrite01_20], (instrs DAS)>;
def AtomWrite01_21 : SchedWriteRes<[AtomPort01]> {
let Latency = 21;
let ResourceCycles = [21];
}
def : InstRW<[AtomWrite01_21], (instrs AAM8i8, STD)>;
def AtomWrite01_22 : SchedWriteRes<[AtomPort01]> {
let Latency = 22;
let ResourceCycles = [22];
}
def : InstRW<[AtomWrite01_22], (instrs CMPXCHG16B)>;
def AtomWrite01_23 : SchedWriteRes<[AtomPort01]> {
let Latency = 23;
let ResourceCycles = [23];
}
def : InstRW<[AtomWrite01_23], (instrs ARPL16mr, ARPL16rr)>;
def AtomWrite01_25 : SchedWriteRes<[AtomPort01]> {
let Latency = 25;
let ResourceCycles = [25];
}
def : InstRW<[AtomWrite01_25], (instrs FNCLEX, FXTRACT)>;
def AtomWrite01_26 : SchedWriteRes<[AtomPort01]> {
let Latency = 26;
let ResourceCycles = [26];
}
def : InstRW<[AtomWrite01_26], (instrs POPF32, POPF64)>;
def AtomWrite01_29 : SchedWriteRes<[AtomPort01]> {
let Latency = 29;
let ResourceCycles = [29];
}
def : InstRW<[AtomWrite01_29], (instregex "POP(DS|ES|FS|GS)(16|32|64)")>;
def AtomWrite01_30 : SchedWriteRes<[AtomPort01]> {
let Latency = 30;
let ResourceCycles = [30];
}
def : InstRW<[AtomWrite01_30], (instrs RDTSC, RDTSCP)>;
def AtomWrite01_32 : SchedWriteRes<[AtomPort01]> {
let Latency = 32;
let ResourceCycles = [32];
}
def : InstRW<[AtomWrite01_32], (instrs ENTER, POPF16)>;
def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> {
let Latency = 45;
let ResourceCycles = [45];
}
def : InstRW<[AtomWrite01_45], (instrs MONITORrrr)>;
def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> {
let Latency = 46;
let ResourceCycles = [46];
}
def : InstRW<[AtomWrite01_46], (instrs FRNDINT, MWAITrr, RDPMC)>;
def AtomWrite01_48 : SchedWriteRes<[AtomPort01]> {
let Latency = 48;
let ResourceCycles = [48];
}
def : InstRW<[AtomWrite01_48], (instrs POPSS16, POPSS32)>;
def AtomWrite01_55 : SchedWriteRes<[AtomPort01]> {
let Latency = 55;
let ResourceCycles = [55];
}
def : InstRW<[AtomWrite01_55], (instrs FPREM)>;
def AtomWrite01_59 : SchedWriteRes<[AtomPort01]> {
let Latency = 59;
let ResourceCycles = [59];
}
def : InstRW<[AtomWrite01_59], (instrs INSB, INSL, INSW)>;
def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> {
let Latency = 63;
let ResourceCycles = [63];
}
def : InstRW<[AtomWrite01_63], (instrs FNINIT)>;
def AtomWrite01_68 : SchedWriteRes<[AtomPort01]> {
let Latency = 68;
let ResourceCycles = [68];
}
def : InstRW<[AtomWrite01_68], (instrs OUT8rr, OUT16rr, OUT32rr)>;
def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
let Latency = 71;
let ResourceCycles = [71];
}
def : InstRW<[AtomWrite01_71], (instrs FPREM1,
INVLPG, INVLPGA32, INVLPGA64)>;
def AtomWrite01_72 : SchedWriteRes<[AtomPort01]> {
let Latency = 72;
let ResourceCycles = [72];
}
def : InstRW<[AtomWrite01_72], (instrs OUT8ir, OUT16ir, OUT32ir)>;
def AtomWrite01_74 : SchedWriteRes<[AtomPort01]> {
let Latency = 74;
let ResourceCycles = [74];
}
def : InstRW<[AtomWrite01_74], (instrs OUTSB, OUTSL, OUTSW)>;
def AtomWrite01_77 : SchedWriteRes<[AtomPort01]> {
let Latency = 77;
let ResourceCycles = [77];
}
def : InstRW<[AtomWrite01_77], (instrs FSCALE)>;
def AtomWrite01_78 : SchedWriteRes<[AtomPort01]> {
let Latency = 78;
let ResourceCycles = [78];
}
def : InstRW<[AtomWrite01_78], (instrs RDMSR)>;
def AtomWrite01_79 : SchedWriteRes<[AtomPort01]> {
let Latency = 79;
let ResourceCycles = [79];
}
def : InstRW<[AtomWrite01_79], (instregex "RET(L|Q|W)?$",
"LRETI?(L|Q|W)")>;
def AtomWrite01_92 : SchedWriteRes<[AtomPort01]> {
let Latency = 92;
let ResourceCycles = [92];
}
def : InstRW<[AtomWrite01_92], (instrs IN8ri, IN16ri, IN32ri)>;
def AtomWrite01_94 : SchedWriteRes<[AtomPort01]> {
let Latency = 94;
let ResourceCycles = [94];
}
def : InstRW<[AtomWrite01_94], (instrs IN8rr, IN16rr, IN32rr)>;
def AtomWrite01_99 : SchedWriteRes<[AtomPort01]> {
let Latency = 99;
let ResourceCycles = [99];
}
def : InstRW<[AtomWrite01_99], (instrs F2XM1)>;
def AtomWrite01_121 : SchedWriteRes<[AtomPort01]> {
let Latency = 121;
let ResourceCycles = [121];
}
def : InstRW<[AtomWrite01_121], (instrs CPUID)>;
def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> {
let Latency = 127;
let ResourceCycles = [127];
}
def : InstRW<[AtomWrite01_127], (instrs INT)>;
def AtomWrite01_130 : SchedWriteRes<[AtomPort01]> {
let Latency = 130;
let ResourceCycles = [130];
}
def : InstRW<[AtomWrite01_130], (instrs INT3)>;
def AtomWrite01_140 : SchedWriteRes<[AtomPort01]> {
let Latency = 140;
let ResourceCycles = [140];
}
def : InstRW<[AtomWrite01_140], (instrs FXSAVE, FXSAVE64)>;
def AtomWrite01_141 : SchedWriteRes<[AtomPort01]> {
let Latency = 141;
let ResourceCycles = [141];
}
def : InstRW<[AtomWrite01_141], (instrs FXRSTOR, FXRSTOR64)>;
def AtomWrite01_146 : SchedWriteRes<[AtomPort01]> {
let Latency = 146;
let ResourceCycles = [146];
}
def : InstRW<[AtomWrite01_146], (instrs FYL2X)>;
def AtomWrite01_147 : SchedWriteRes<[AtomPort01]> {
let Latency = 147;
let ResourceCycles = [147];
}
def : InstRW<[AtomWrite01_147], (instrs FYL2XP1)>;
def AtomWrite01_168 : SchedWriteRes<[AtomPort01]> {
let Latency = 168;
let ResourceCycles = [168];
}
def : InstRW<[AtomWrite01_168], (instrs FPTAN)>;
def AtomWrite01_174 : SchedWriteRes<[AtomPort01]> {
let Latency = 174;
let ResourceCycles = [174];
}
def : InstRW<[AtomWrite01_174], (instrs FSINCOS)>;
def : InstRW<[AtomWrite01_174], (instregex "(COS|SIN)_F")>;
def AtomWrite01_183 : SchedWriteRes<[AtomPort01]> {
let Latency = 183;
let ResourceCycles = [183];
}
def : InstRW<[AtomWrite01_183], (instrs FPATAN)>;
def AtomWrite01_202 : SchedWriteRes<[AtomPort01]> {
let Latency = 202;
let ResourceCycles = [202];
}
def : InstRW<[AtomWrite01_202], (instrs WRMSR)>;
} // SchedModel

View File

@@ -0,0 +1,682 @@
//=- X86ScheduleBtVer2.td - X86 BtVer2 (Jaguar) Scheduling ---*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the machine model for AMD btver2 (Jaguar) to support
// instruction scheduling and other instruction cost heuristics. Based off AMD Software
// Optimization Guide for AMD Family 16h Processors & Instruction Latency appendix.
//
//===----------------------------------------------------------------------===//
def BtVer2Model : SchedMachineModel {
// All x86 instructions are modeled as a single micro-op, and btver2 can
// decode 2 instructions per cycle.
let IssueWidth = 2;
let MicroOpBufferSize = 64; // Retire Control Unit
let LoadLatency = 5; // FPU latency (worse case cf Integer 3 cycle latency)
let HighLatency = 25;
let MispredictPenalty = 14; // Minimum branch misdirection penalty
let PostRAScheduler = 1;
// FIXME: SSE4/AVX is unimplemented. This flag is set to allow
// the scheduler to assign a default model to unrecognized opcodes.
let CompleteModel = 0;
}
let SchedModel = BtVer2Model in {
// Jaguar can issue up to 6 micro-ops in one cycle
def JALU0 : ProcResource<1>; // Integer Pipe0: integer ALU0 (also handle FP->INT jam)
def JALU1 : ProcResource<1>; // Integer Pipe1: integer ALU1/MUL/DIV
def JLAGU : ProcResource<1>; // Integer Pipe2: LAGU
def JSAGU : ProcResource<1>; // Integer Pipe3: SAGU (also handles 3-operand LEA)
def JFPU0 : ProcResource<1>; // Vector/FPU Pipe0: VALU0/VIMUL/FPA
def JFPU1 : ProcResource<1>; // Vector/FPU Pipe1: VALU1/STC/FPM
// The Integer PRF for Jaguar is 64 entries, and it holds the architectural and
// speculative version of the 64-bit integer registers.
// Reference: www.realworldtech.com/jaguar/4/
//
// The processor always keeps the different parts of an integer register
// together. An instruction that writes to a part of a register will therefore
// have a false dependence on any previous write to the same register or any
// part of it.
// Reference: Section 21.10 "AMD Bobcat and Jaguar pipeline: Partial register
// access" - Agner Fog's "microarchitecture.pdf".
def JIntegerPRF : RegisterFile<64, [GR64, CCR]>;
// The Jaguar FP Retire Queue renames SIMD and FP uOps onto a pool of 72 SSE
// registers. Operations on 256-bit data types are cracked into two COPs.
// Reference: www.realworldtech.com/jaguar/4/
def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2]>;
// The retire control unit (RCU) can track up to 64 macro-ops in-flight. It can
// retire up to two macro-ops per cycle.
// Reference: "Software Optimization Guide for AMD Family 16h Processors"
def JRCU : RetireControlUnit<64, 2>;
// Integer Pipe Scheduler
def JALU01 : ProcResGroup<[JALU0, JALU1]> {
let BufferSize=20;
}
// AGU Pipe Scheduler
def JLSAGU : ProcResGroup<[JLAGU, JSAGU]> {
let BufferSize=12;
}
// Fpu Pipe Scheduler
def JFPU01 : ProcResGroup<[JFPU0, JFPU1]> {
let BufferSize=18;
}
// Functional units
def JDiv : ProcResource<1>; // integer division
def JMul : ProcResource<1>; // integer multiplication
def JVALU0 : ProcResource<1>; // vector integer
def JVALU1 : ProcResource<1>; // vector integer
def JVIMUL : ProcResource<1>; // vector integer multiplication
def JSTC : ProcResource<1>; // vector store/convert
def JFPM : ProcResource<1>; // FP multiplication
def JFPA : ProcResource<1>; // FP addition
// Functional unit groups
def JFPX : ProcResGroup<[JFPA, JFPM]>;
def JVALU : ProcResGroup<[JVALU0, JVALU1]>;
// Integer loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when dispatched by the schedulers.
// This multiclass defines the resource usage for variants with and without
// folded loads.
multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [], int UOps = 1> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let ResourceCycles = Res;
let NumMicroOps = UOps;
}
// Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
// latency.
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 3);
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
let NumMicroOps = UOps;
}
}
multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [], int UOps = 1> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let ResourceCycles = Res;
let NumMicroOps = UOps;
}
// Memory variant also uses a cycle on JLAGU and adds 5 cycles to the
// latency.
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 5);
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
let NumMicroOps = UOps;
}
}
multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [2], int UOps = 2> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let ResourceCycles = Res;
let NumMicroOps = UOps;
}
// Memory variant also uses 2 cycles on JLAGU and adds 5 cycles to the
// latency.
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 5);
let ResourceCycles = !listconcat([2], Res);
let NumMicroOps = UOps;
}
}
// A folded store needs a cycle on the SAGU for the store data.
def : WriteRes<WriteRMW, [JSAGU]>;
////////////////////////////////////////////////////////////////////////////////
// Arithmetic.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
defm : JWriteResIntPair<WriteADC, [JALU01], 1, [2]>;
defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication
defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
defm : JWriteResIntPair<WriteDiv32, [JALU1, JDiv], 25, [1, 25], 2>;
defm : JWriteResIntPair<WriteDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
defm : JWriteResIntPair<WriteIDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteIDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
defm : JWriteResIntPair<WriteIDiv32, [JALU1, JDiv], 25, [1, 25], 2>;
defm : JWriteResIntPair<WriteIDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
defm : JWriteResIntPair<WriteCRC32, [JALU01], 3, [4], 3>;
defm : JWriteResIntPair<WriteCMOV, [JALU01], 1>; // Conditional move.
defm : JWriteResIntPair<WriteCMOV2, [JALU01], 1>; // Conditional (CF + ZF flag) move.
defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
def : WriteRes<WriteLAHFSAHF, [JALU01]>;
def : WriteRes<WriteBitTest,[JALU01]>;
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [JALU01]>;
// Bit counts.
defm : JWriteResIntPair<WriteBSF, [JALU01], 5, [4], 8>;
defm : JWriteResIntPair<WriteBSR, [JALU01], 5, [4], 8>;
defm : JWriteResIntPair<WritePOPCNT, [JALU01], 1>;
defm : JWriteResIntPair<WriteLZCNT, [JALU01], 1>;
defm : JWriteResIntPair<WriteTZCNT, [JALU01], 2, [2]>;
// BMI1 BEXTR, BMI2 BZHI
defm : JWriteResIntPair<WriteBEXTR, [JALU01], 1>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResIntPair<WriteShift, [JALU01], 1>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [JALU01], 3, [6], 6>;
defm : X86WriteRes<WriteSHDrrcl,[JALU01], 4, [8], 7>;
defm : X86WriteRes<WriteSHDmri, [JLAGU, JALU01], 9, [1, 22], 8>;
defm : X86WriteRes<WriteSHDmrcl,[JLAGU, JALU01], 9, [1, 22], 8>;
////////////////////////////////////////////////////////////////////////////////
// Loads, stores, and moves, not folded with other operations.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
def : WriteRes<WriteStore, [JSAGU]>;
def : WriteRes<WriteStoreNT, [JSAGU]>;
def : WriteRes<WriteMove, [JALU01]>;
// Load/store MXCSR.
// FIXME: These are copy and pasted from WriteLoad/Store.
def : WriteRes<WriteLDMXCSR, [JLAGU]> { let Latency = 5; }
def : WriteRes<WriteSTMXCSR, [JSAGU]>;
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
////////////////////////////////////////////////////////////////////////////////
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteZero, []>;
////////////////////////////////////////////////////////////////////////////////
// Branches don't produce values, so they have no latency, but they still
// consume resources. Indirect branches can fold loads.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResIntPair<WriteJump, [JALU01], 1>;
////////////////////////////////////////////////////////////////////////////////
// Special case scheduling classes.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteSystem, [JALU01]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [JALU01]> { let Latency = 100; }
def : WriteRes<WriteFence, [JSAGU]>;
// Nops don't have dependencies, so there's no actual latency, but we set this
// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
def : WriteRes<WriteNop, [JALU01]> { let Latency = 1; }
////////////////////////////////////////////////////////////////////////////////
// Floating point. This covers both scalar and vector operations.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteRes<WriteFLD0, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLD1, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLDC, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFLoadX, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFLoadY, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 1, 2], 1>;
defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>;
defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteFMove, [JFPU01, JFPX], 1, [1, 1], 1>;
defm : X86WriteRes<WriteFMoveX, [JFPU01, JFPX], 1, [1, 1], 1>;
defm : X86WriteRes<WriteFMoveY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteRes<WriteEMMS, [JFPU01, JFPX], 2, [1, 1], 1>;
defm : JWriteResFpuPair<WriteFAdd, [JFPU0, JFPA], 3>;
defm : JWriteResFpuPair<WriteFAddX, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFAddY, [JFPU0, JFPA], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : JWriteResFpuPair<WriteFAdd64, [JFPU0, JFPA], 3>;
defm : JWriteResFpuPair<WriteFAdd64X, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFAdd64Y, [JFPU0, JFPA], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : JWriteResFpuPair<WriteFCmp, [JFPU0, JFPA], 2>;
defm : JWriteResFpuPair<WriteFCmpX, [JFPU0, JFPA], 2>;
defm : JWriteResYMMPair<WriteFCmpY, [JFPU0, JFPA], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : JWriteResFpuPair<WriteFCmp64, [JFPU0, JFPA], 2>;
defm : JWriteResFpuPair<WriteFCmp64X, [JFPU0, JFPA], 2>;
defm : JWriteResYMMPair<WriteFCmp64Y, [JFPU0, JFPA], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFMulX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFMulY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : JWriteResFpuPair<WriteFMul64, [JFPU1, JFPM], 4, [1,2]>;
defm : JWriteResFpuPair<WriteFMul64X, [JFPU1, JFPM], 4, [1,2]>;
defm : JWriteResYMMPair<WriteFMul64Y, [JFPU1, JFPM], 4, [2,4], 2>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : X86WriteResPairUnsupported<WriteFMA>;
defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : JWriteResFpuPair<WriteDPPD, [JFPU1, JFPM, JFPA], 9, [1, 3, 3], 3>;
defm : JWriteResFpuPair<WriteDPPS, [JFPU1, JFPM, JFPA], 11, [1, 3, 3], 5>;
defm : JWriteResYMMPair<WriteDPPSY, [JFPU1, JFPM, JFPA], 12, [2, 6, 6], 10>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRcpX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFRcpY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : JWriteResFpuPair<WriteFRsqrt, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRsqrtX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFRsqrtY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResFpuPair<WriteFDivX, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResYMMPair<WriteFDivY, [JFPU1, JFPM], 38, [2, 38], 2>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
defm : JWriteResFpuPair<WriteFDiv64, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResFpuPair<WriteFDiv64X, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResYMMPair<WriteFDiv64Y, [JFPU1, JFPM], 38, [2, 38], 2>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>;
defm : JWriteResFpuPair<WriteFSqrtX, [JFPU1, JFPM], 21, [1, 21]>;
defm : JWriteResYMMPair<WriteFSqrtY, [JFPU1, JFPM], 42, [2, 42], 2>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
defm : JWriteResFpuPair<WriteFSqrt64, [JFPU1, JFPM], 27, [1, 27]>;
defm : JWriteResFpuPair<WriteFSqrt64X, [JFPU1, JFPM], 27, [1, 27]>;
defm : JWriteResYMMPair<WriteFSqrt64Y, [JFPU1, JFPM], 54, [2, 54], 2>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : JWriteResFpuPair<WriteFSqrt80, [JFPU1, JFPM], 35, [1, 35]>;
defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRnd, [JFPU1, JSTC], 3>;
defm : JWriteResYMMPair<WriteFRndY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : JWriteResFpuPair<WriteFTest, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResYMMPair<WriteFTestY , [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFShuffleY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX], 3, [2, 6], 6>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFBlendY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResYMMPair<WriteFVarBlendY, [JFPU01, JFPX], 3, [2, 6], 6>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
////////////////////////////////////////////////////////////////////////////////
// Conversions.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResFpuPair<WriteCvtSS2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
defm : JWriteResFpuPair<WriteCvtPS2I, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPS2IY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : JWriteResFpuPair<WriteCvtSD2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
defm : JWriteResFpuPair<WriteCvtPD2I, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPD2IY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
// FIXME: f+3 ST, LD+STC latency
defm : JWriteResFpuPair<WriteCvtI2SS, [JFPU1, JSTC], 9, [1,1], 2>;
defm : JWriteResFpuPair<WriteCvtI2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtI2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : JWriteResFpuPair<WriteCvtI2SD, [JFPU1, JSTC], 9, [1,1], 2>;
defm : JWriteResFpuPair<WriteCvtI2PD, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtI2PDY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : JWriteResFpuPair<WriteCvtSS2SD, [JFPU1, JSTC], 7, [1,2], 2>;
defm : JWriteResFpuPair<WriteCvtPS2PD, [JFPU1, JSTC], 2, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPS2PDY, [JFPU1, JSTC], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : JWriteResFpuPair<WriteCvtSD2SS, [JFPU1, JSTC], 7, [1,2], 2>;
defm : JWriteResFpuPair<WriteCvtPD2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPD2PSY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
defm : JWriteResFpuPair<WriteCvtPH2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPH2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteRes<WriteCvtPS2PH, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHY, [JFPU1, JSTC, JFPX], 6, [2,2,2], 3>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteRes<WriteCvtPS2PHSt, [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
////////////////////////////////////////////////////////////////////////////////
// Vector integer operations.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteVecMove, [JFPU01, JVALU], 1, [1, 1], 1>;
defm : X86WriteRes<WriteVecMoveX, [JFPU01, JVALU], 1, [1, 1], 1>;
defm : X86WriteRes<WriteVecMoveY, [JFPU01, JVALU], 1, [2, 2], 2>;
defm : X86WriteRes<WriteVecMoveToGpr, [JFPU0, JFPA, JALU0], 4, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [JFPU01, JFPX], 8, [1, 1], 2>;
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecALUX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecALUY>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShiftX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : JWriteResFpuPair<WriteVecShiftImm, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : X86WriteResPairUnsupported<WriteVarVecShift>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteVecIMulX, [JFPU0, JVIMUL], 2>;
defm : X86WriteResPairUnsupported<WriteVecIMulY>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
defm : X86WriteResPairUnsupported<WritePMULLDY>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteMPSADY>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : JWriteResFpuPair<WritePSADBW, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePSADBWX, [JFPU01, JVALU], 2>;
defm : X86WriteResPairUnsupported<WritePSADBWY>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : JWriteResFpuPair<WritePHMINPOS, [JFPU0, JVALU], 2>;
defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteShuffleX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteShuffleY>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteVarShuffleX, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteBlendY>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecLogicX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicY>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : JWriteResFpuPair<WriteVecTest, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResYMMPair<WriteVecTestY, [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : X86WriteResPairUnsupported<WriteShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
////////////////////////////////////////////////////////////////////////////////
// Vector insert/extract operations.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteRes<WriteVecInsert, [JFPU01, JVALU], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecInsertLd, [JFPU01, JVALU, JLAGU], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteVecExtract, [JFPU0, JFPA, JALU0], 3, [1,1,1], 1>;
defm : X86WriteRes<WriteVecExtractSt, [JFPU1, JSTC, JSAGU], 3, [1,1,1], 1>;
////////////////////////////////////////////////////////////////////////////////
// SSE42 String instructions.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResFpuPair<WritePCmpIStrI, [JFPU1, JVALU1, JFPA, JALU0], 7, [1, 2, 1, 1], 3>;
defm : JWriteResFpuPair<WritePCmpIStrM, [JFPU1, JVALU1, JFPA, JALU0], 8, [1, 2, 1, 1], 3>;
defm : JWriteResFpuPair<WritePCmpEStrI, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
defm : JWriteResFpuPair<WritePCmpEStrM, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
////////////////////////////////////////////////////////////////////////////////
// MOVMSK Instructions.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteFMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
def : WriteRes<WriteVecMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
def : WriteRes<WriteMMXMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
////////////////////////////////////////////////////////////////////////////////
// AES Instructions.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResFpuPair<WriteAESIMC, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteAESKeyGen, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU0, JVIMUL], 3, [1, 1], 2>;
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 3, [2,2], 2>;
defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WritePHAddX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WritePHAddY>;
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResFpuPair<WriteCLMul, [JFPU0, JVIMUL], 2>;
////////////////////////////////////////////////////////////////////////////////
// SSE4A instructions.
////////////////////////////////////////////////////////////////////////////////
def JWriteINSERTQ: SchedWriteRes<[JFPU01, JVALU]> {
let Latency = 2;
let ResourceCycles = [1, 4];
}
def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
////////////////////////////////////////////////////////////////////////////////
// AVX instructions.
////////////////////////////////////////////////////////////////////////////////
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [1, 2, 4];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
VBROADCASTSSYrm)>;
def JWriteJVZEROALL: SchedWriteRes<[]> {
let Latency = 90;
let NumMicroOps = 73;
}
def : InstRW<[JWriteJVZEROALL], (instrs VZEROALL)>;
def JWriteJVZEROUPPER: SchedWriteRes<[]> {
let Latency = 46;
let NumMicroOps = 37;
}
def : InstRW<[JWriteJVZEROUPPER], (instrs VZEROUPPER)>;
///////////////////////////////////////////////////////////////////////////////
// SchedWriteVariant definitions.
///////////////////////////////////////////////////////////////////////////////
def JWriteZeroLatency : SchedWriteRes<[]> {
let Latency = 0;
}
// Certain instructions that use the same register for both source
// operands do not have a real dependency on the previous contents of the
// register, and thus, do not have to wait before completing. They can be
// optimized out at register renaming stage.
// Reference: Section 10.8 of the "Software Optimization Guide for AMD Family
// 15h Processors".
// Reference: Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
// Section 21.8 [Dependency-breaking instructions].
def JWriteZeroIdiom : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteALU]>
]>;
def : InstRW<[JWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
XOR32rr, XOR64rr)>;
def JWriteFZeroIdiom : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]>
]>;
def : InstRW<[JWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, VXORPDrr,
ANDNPSrr, VANDNPSrr,
ANDNPDrr, VANDNPDrr)>;
def JWriteVZeroIdiomLogic : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]>
]>;
def : InstRW<[JWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
def JWriteVZeroIdiomLogicX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]>
]>;
def : InstRW<[JWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
PANDNrr, VPANDNrr)>;
def JWriteVZeroIdiomALU : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]>
]>;
def : InstRW<[JWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr,
MMX_PSUBQirr, MMX_PSUBWirr,
MMX_PCMPGTBirr, MMX_PCMPGTDirr,
MMX_PCMPGTWirr)>;
def JWriteVZeroIdiomALUX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]>
]>;
def : InstRW<[JWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
PSUBDrr, VPSUBDrr,
PSUBQrr, VPSUBQrr,
PSUBWrr, VPSUBWrr,
PCMPGTBrr, VPCMPGTBrr,
PCMPGTDrr, VPCMPGTDrr,
PCMPGTQrr, VPCMPGTQrr,
PCMPGTWrr, VPCMPGTWrr)>;
// This write is used for slow LEA instructions.
def JWrite3OpsLEA : SchedWriteRes<[JALU1, JSAGU]> {
let Latency = 2;
}
// On Jaguar, a slow LEA is either a 3Ops LEA (base, index, offset), or an LEA
// with a `Scale` value different than 1.
def JSlowLEAPredicate : MCSchedPredicate<
CheckAny<[
// A 3-operand LEA (base, index, offset).
IsThreeOperandsLEAFn,
// An LEA with a "Scale" different than 1.
CheckAll<[
CheckIsImmOperand<2>,
CheckNot<CheckImmOperand<2, 1>>
]>
]>
>;
def JWriteLEA : SchedWriteVariant<[
SchedVar<JSlowLEAPredicate, [JWrite3OpsLEA]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteLEA]>
]>;
def : InstRW<[JWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
def JSlowLEA16r : SchedWriteRes<[JALU01]> {
let Latency = 3;
let ResourceCycles = [4];
}
def : InstRW<[JSlowLEA16r], (instrs LEA16r)>;
} // SchedModel

View File

@@ -0,0 +1,486 @@
//=- X86ScheduleSLM.td - X86 Silvermont Scheduling -----------*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the machine model for Intel Silvermont to support
// instruction scheduling and other instruction cost heuristics.
//
//===----------------------------------------------------------------------===//
def SLMModel : SchedMachineModel {
// All x86 instructions are modeled as a single micro-op, and SLM can decode 2
// instructions per cycle.
let IssueWidth = 2;
let MicroOpBufferSize = 32; // Based on the reorder buffer.
let LoadLatency = 3;
let MispredictPenalty = 10;
let PostRAScheduler = 1;
// For small loops, expand by a small factor to hide the backedge cost.
let LoopMicroOpBufferSize = 10;
// FIXME: SSE4 is unimplemented. This flag is set to allow
// the scheduler to assign a default model to unrecognized opcodes.
let CompleteModel = 0;
}
let SchedModel = SLMModel in {
// Silvermont has 5 reservation stations for micro-ops
def SLM_IEC_RSV0 : ProcResource<1>;
def SLM_IEC_RSV1 : ProcResource<1>;
def SLM_FPC_RSV0 : ProcResource<1> { let BufferSize = 1; }
def SLM_FPC_RSV1 : ProcResource<1> { let BufferSize = 1; }
def SLM_MEC_RSV : ProcResource<1>;
// Many micro-ops are capable of issuing on multiple ports.
def SLM_IEC_RSV01 : ProcResGroup<[SLM_IEC_RSV0, SLM_IEC_RSV1]>;
def SLM_FPC_RSV01 : ProcResGroup<[SLM_FPC_RSV0, SLM_FPC_RSV1]>;
def SLMDivider : ProcResource<1>;
def SLMFPMultiplier : ProcResource<1>;
def SLMFPDivider : ProcResource<1>;
// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
// This multiclass defines the resource usage for variants with and without
// folded loads.
multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [1], int UOps = 1,
int LoadLat = 3> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let ResourceCycles = Res;
let NumMicroOps = UOps;
}
// Memory variant also uses a cycle on MEC_RSV and adds LoadLat cycles to
// the latency (default = 3).
def : WriteRes<SchedRW.Folded, !listconcat([SLM_MEC_RSV], ExePorts)> {
let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
let NumMicroOps = UOps;
}
}
// A folded store needs a cycle on MEC_RSV for the store data, but it does not
// need an extra port cycle to recompute the address.
def : WriteRes<WriteRMW, [SLM_MEC_RSV]>;
def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteStoreNT, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteMove, [SLM_IEC_RSV01]>;
def : WriteRes<WriteZero, []>;
// Load/store MXCSR.
// FIXME: These are probably wrong. They are copy pasted from WriteStore/Load.
def : WriteRes<WriteSTMXCSR, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteLDMXCSR, [SLM_MEC_RSV]> { let Latency = 3; }
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteADC, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
defm : X86WriteRes<WriteSHDrri, [SLM_IEC_RSV0], 1, [1], 1>;
defm : X86WriteRes<WriteSHDrrcl,[SLM_IEC_RSV0], 1, [1], 1>;
defm : X86WriteRes<WriteSHDmri, [SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
defm : X86WriteRes<WriteSHDmrcl,[SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>;
defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteCMOV, [SLM_IEC_RSV01], 2, [2]>;
defm : SLMWriteResPair<WriteCMOV2, [SLM_IEC_RSV01], 2, [2]>;
defm : X86WriteRes<WriteFCMOV, [SLM_FPC_RSV1], 3, [1], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [SLM_IEC_RSV01]>;
def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
// FIXME Latency and NumMicrOps?
let ResourceCycles = [2,1];
}
def : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
def : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
// the port to read all inputs. We don't model that.
def : WriteRes<WriteLEA, [SLM_IEC_RSV1]>;
// Bit counts.
defm : SLMWriteResPair<WriteBSF, [SLM_IEC_RSV01], 10, [20], 10>;
defm : SLMWriteResPair<WriteBSR, [SLM_IEC_RSV01], 10, [20], 10>;
defm : SLMWriteResPair<WriteLZCNT, [SLM_IEC_RSV0], 3>;
defm : SLMWriteResPair<WriteTZCNT, [SLM_IEC_RSV0], 3>;
defm : SLMWriteResPair<WritePOPCNT, [SLM_IEC_RSV0], 3>;
// BMI1 BEXTR, BMI2 BZHI
defm : X86WriteResPairUnsupported<WriteBEXTR>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
defm : SLMWriteResPair<WriteDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteIDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteIDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteIDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteIDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
// Scalar and vector floating point.
defm : X86WriteRes<WriteFLD0, [SLM_FPC_RSV01], 1, [1], 1>;
defm : X86WriteRes<WriteFLD1, [SLM_FPC_RSV01], 1, [1], 1>;
defm : X86WriteRes<WriteFLDC, [SLM_FPC_RSV01], 1, [2], 2>;
def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFLoadX, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNT, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNTX, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNTY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteFMoveX, [SLM_FPC_RSV01]>;
def : WriteRes<WriteFMoveY, [SLM_FPC_RSV01]>;
defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : SLMWriteResPair<WriteFAdd64, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAdd64X, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAdd64Y, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmpX, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmpY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : SLMWriteResPair<WriteFCmp64, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmp64X, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmp64Y, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMulX, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : SLMWriteResPair<WriteFMul64, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMul64X, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMul64Y, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
defm : SLMWriteResPair<WriteFDiv64, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
defm : SLMWriteResPair<WriteFDiv64Y, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRsqrtY, [SLM_FPC_RSV0], 5>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20], 1, 3>;
defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35], 1, 3>;
defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFRndY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : SLMWriteResPair<WriteFTest, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFTestY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFShuffleY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
// Conversion between integer and float.
defm : SLMWriteResPair<WriteCvtSS2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2IY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : SLMWriteResPair<WriteCvtSD2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2IY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : SLMWriteResPair<WriteCvtI2SS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PSY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : SLMWriteResPair<WriteCvtI2SD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PDY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : SLMWriteResPair<WriteCvtSS2SD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2PD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : SLMWriteResPair<WriteCvtSD2SS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2PS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
// Vector integer operations.
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadX, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadNT, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadNTY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreNT, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreNTY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveX, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveY, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveToGpr, [SLM_IEC_RSV01]>;
def : WriteRes<WriteVecMoveFromGpr, [SLM_IEC_RSV01]>;
defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : SLMWriteResPair<WriteVecShiftImm, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftImmX,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftImmY,[SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicX,[SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : SLMWriteResPair<WriteVecTest, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecTestY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 4>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
// FIXME: The below is closer to correct, but caused some perf regressions.
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0], 4>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : SLMWriteResPair<WriteShuffleX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0], 7>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0], 4>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : SLMWriteResPair<WritePHMINPOS, [SLM_FPC_RSV0], 4>;
// Vector insert/extract operations.
defm : SLMWriteResPair<WriteVecInsert, [SLM_FPC_RSV0], 1>;
def : WriteRes<WriteVecExtract, [SLM_FPC_RSV0]>;
def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 4;
let NumMicroOps = 2;
let ResourceCycles = [1, 2];
}
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>;
defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>;
defm : X86WriteResPairUnsupported<WriteFHAddZ>;
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WritePHAddZ>;
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
def : WriteRes<WritePCmpIStrM, [SLM_FPC_RSV0]> {
let Latency = 13;
let ResourceCycles = [13];
}
def : WriteRes<WritePCmpIStrMLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 13;
let ResourceCycles = [13, 1];
}
// Packed Compare Explicit Length Strings, Return Mask
def : WriteRes<WritePCmpEStrM, [SLM_FPC_RSV0]> {
let Latency = 17;
let ResourceCycles = [17];
}
def : WriteRes<WritePCmpEStrMLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 17;
let ResourceCycles = [17, 1];
}
// Packed Compare Implicit Length Strings, Return Index
def : WriteRes<WritePCmpIStrI, [SLM_FPC_RSV0]> {
let Latency = 17;
let ResourceCycles = [17];
}
def : WriteRes<WritePCmpIStrILd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 17;
let ResourceCycles = [17, 1];
}
// Packed Compare Explicit Length Strings, Return Index
def : WriteRes<WritePCmpEStrI, [SLM_FPC_RSV0]> {
let Latency = 21;
let ResourceCycles = [21];
}
def : WriteRes<WritePCmpEStrILd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 21;
let ResourceCycles = [21, 1];
}
// MOVMSK Instructions.
def : WriteRes<WriteFMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
def : WriteRes<WriteVecMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
def : WriteRes<WriteVecMOVMSKY, [SLM_FPC_RSV1]> { let Latency = 4; }
def : WriteRes<WriteMMXMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
// AES Instructions.
def : WriteRes<WriteAESDecEnc, [SLM_FPC_RSV0]> {
let Latency = 8;
let ResourceCycles = [5];
}
def : WriteRes<WriteAESDecEncLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 8;
let ResourceCycles = [5, 1];
}
def : WriteRes<WriteAESIMC, [SLM_FPC_RSV0]> {
let Latency = 8;
let ResourceCycles = [5];
}
def : WriteRes<WriteAESIMCLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 8;
let ResourceCycles = [5, 1];
}
def : WriteRes<WriteAESKeyGen, [SLM_FPC_RSV0]> {
let Latency = 8;
let ResourceCycles = [5];
}
def : WriteRes<WriteAESKeyGenLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 8;
let ResourceCycles = [5, 1];
}
// Carry-less multiplication instructions.
def : WriteRes<WriteCLMul, [SLM_FPC_RSV0]> {
let Latency = 10;
let ResourceCycles = [10];
}
def : WriteRes<WriteCLMulLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 10;
let ResourceCycles = [10, 1];
}
def : WriteRes<WriteSystem, [SLM_FPC_RSV0]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [SLM_FPC_RSV0]> { let Latency = 100; }
def : WriteRes<WriteFence, [SLM_MEC_RSV]>;
def : WriteRes<WriteNop, []>;
// AVX/FMA is not supported on that architecture, but we should define the basic
// scheduling resources anyway.
def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
defm : X86WriteResPairUnsupported<WriteFBlendY>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
defm : X86WriteResPairUnsupported<WriteShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
defm : SLMWriteResPair<WriteVarVecShift, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
defm : X86WriteResPairUnsupported<WriteFMA>;
defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PH>;
defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
} // SchedModel

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,459 @@
//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This is a target description file for the Intel i386 architecture, referred
// to here as the "X86" architecture.
//
//===----------------------------------------------------------------------===//
// Get the target-independent interfaces which we are implementing...
//
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// X86 Subtarget state
//
def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
"64-bit mode (x86_64)">;
def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
"32-bit mode (80386)">;
def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
"16-bit mode (i8086)">;
//===----------------------------------------------------------------------===//
// X86 Subtarget features
//===----------------------------------------------------------------------===//
def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
"Enable X87 float instructions">;
def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true",
"Enable NOPL instruction">;
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
"Enable conditional move instructions">;
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
"Support POPCNT instruction">;
def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true",
"Support fxsave/fxrestore instructions">;
def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true",
"Support xsave instructions">;
def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
"Support xsaveopt instructions">;
def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
"Support xsavec instructions">;
def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
"Support xsaves instructions">;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
"Enable SSE instructions",
// SSE codegen depends on cmovs, and all
// SSE1+ processors support them.
[FeatureCMOV]>;
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
"Enable SSE2 instructions",
[FeatureSSE1]>;
def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
"Enable SSE3 instructions",
[FeatureSSE2]>;
def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
"Enable SSSE3 instructions",
[FeatureSSE3]>;
def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
"Enable SSE 4.1 instructions",
[FeatureSSSE3]>;
def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
[FeatureSSE41]>;
// The MMX subtarget feature is separate from the rest of the SSE features
// because it's important (for odd compatibility reasons) to be able to
// turn it off explicitly while allowing SSE+ to be on.
def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
"Enable MMX instructions">;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
"Enable 3DNow! instructions",
[FeatureMMX]>;
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
"Enable 3DNow! Athlon instructions",
[Feature3DNow]>;
// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
// without disabling 64-bit mode.
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions",
[FeatureCMOV]>;
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
"64-bit with cmpxchg16b",
[Feature64Bit]>;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
"PMULLD instruction is slow">;
// FIXME: This should not apply to CPUs that do not have SSE.
def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
"IsUAMem16Slow", "true",
"Slow unaligned 16-byte memory access">;
def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
"IsUAMem32Slow", "true",
"Slow unaligned 32-byte memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions",
[FeatureSSE3]>;
def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
"Enable AVX instructions",
[FeatureSSE42]>;
def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
"Enable AVX2 instructions",
[FeatureAVX]>;
def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
"Enable three-operand fused multiple-add",
[FeatureAVX]>;
def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
"Support 16-bit floating point conversion instructions",
[FeatureAVX]>;
def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
"Enable AVX-512 instructions",
[FeatureAVX2, FeatureFMA, FeatureF16C]>;
def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
"Enable AVX-512 Exponential and Reciprocal Instructions",
[FeatureAVX512]>;
def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
"Enable AVX-512 Conflict Detection Instructions",
[FeatureAVX512]>;
def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
"true", "Enable AVX-512 Population Count Instructions",
[FeatureAVX512]>;
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>;
def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
"true",
"Prefetch with Intent to Write and T1 Hint">;
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
"Enable AVX-512 Doubleword and Quadword Instructions",
[FeatureAVX512]>;
def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
"Enable AVX-512 Byte and Word Instructions",
[FeatureAVX512]>;
def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
"Enable AVX-512 Vector Length eXtensions",
[FeatureAVX512]>;
def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
"Enable AVX-512 Vector Byte Manipulation Instructions",
[FeatureBWI]>;
def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
"Enable AVX-512 further Vector Byte Manipulation Instructions",
[FeatureBWI]>;
def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
"Enable AVX-512 Integer Fused Multiple-Add",
[FeatureAVX512]>;
def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
"Enable protection keys">;
def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
"Enable AVX-512 Vector Neural Network Instructions",
[FeatureAVX512]>;
def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
"Enable AVX-512 Bit Algorithms",
[FeatureBWI]>;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true",
"Enable Galois Field Arithmetic Instructions",
[FeatureSSE2]>;
def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
"Enable vpclmulqdq instructions",
[FeatureAVX, FeaturePCLMUL]>;
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
"Enable four-operand fused multiple-add",
[FeatureAVX, FeatureSSE4A]>;
def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
"Enable XOP instructions",
[FeatureFMA4]>;
def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
"HasSSEUnalignedMem", "true",
"Allow unaligned memory operands with SSE instructions">;
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
"Enable AES instructions",
[FeatureSSE2]>;
def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true",
"Promote selected AES instructions to AVX512/AVX registers",
[FeatureAVX, FeatureAES]>;
def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true",
"Enable TBM instructions">;
def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true",
"Enable LWP instructions">;
def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
"Support MOVBE instruction">;
def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
"Support RDRAND instruction">;
def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
"Support FS/GS Base instructions">;
def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
"Support LZCNT instruction">;
def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
"Support BMI instructions">;
def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
"Support BMI2 instructions">;
def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
"Support RTM instructions">;
def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
"Support ADX instructions">;
def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
"Enable SHA instructions",
[FeatureSSE2]>;
def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
"Support CET Shadow-Stack instructions">;
def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
"Support RDSEED instruction">;
def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
"Support LAHF and SAHF instructions">;
def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
"Enable MONITORX/MWAITX timer functionality">;
def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
"Enable Cache Line Zero">;
def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
"Enable Cache Demote">;
def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
"Support ptwrite instruction">;
def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
"Support MPX instructions">;
def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
"HasSlowDivide32", "true",
"Use 8-bit divide for positive values less than 256">;
def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
"HasSlowDivide64", "true",
"Use 32-bit divide for positive values less than 2^32">;
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
"Invalidate Process-Context Identifier">;
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
"Enable Software Guard Extensions">;
def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
"Flush A Cache Line Optimized">;
def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
"Cache Line Write Back">;
def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
"Write Back No Invalidate">;
def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
"Support RDPID instructions">;
def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
"Wait and pause enhancements">;
// On some processors, instructions that implicitly take two memory operands are
// slow. In practice, this means that CALL, PUSH, and POP with memory operands
// should be avoided in favor of a MOV + register CALL/PUSH/POP.
def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
"SlowTwoMemOps", "true",
"Two memory operand instructions are slow">;
def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
"LEA instruction with 3 ops or certain registers is slow">;
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
def FeatureSoftFloat
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
"Use software floating point features.">;
def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
"HasPOPCNTFalseDeps", "true",
"POPCNT has a false dependency on dest register">;
def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
"HasLZCNTFalseDeps", "true",
"LZCNT/TZCNT have a false dependency on dest register">;
def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
"platform configuration instruction">;
// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
// using a variable mask over multiple fixed shuffles.
def FeatureFastVariableShuffle
: SubtargetFeature<"fast-variable-shuffle",
"HasFastVariableShuffle",
"true", "Shuffles with variable masks are fast">;
// On some X86 processors, there is no performance hazard to writing only the
// lower parts of a YMM or ZMM register without clearing the upper part.
def FeatureFastPartialYMMorZMMWrite
: SubtargetFeature<"fast-partial-ymm-or-zmm-write",
"HasFastPartialYMMorZMMWrite",
"true", "Partial writes to YMM/ZMM registers are fast">;
// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
// vector FSQRT has higher throughput than the corresponding NR code.
// The idea is that throughput bound code is likely to be vectorized, so for
// vectorized code we should care about the throughput of SQRT operations.
// But if the code is scalar that probably means that the code has some kind of
// dependency and we should care more about reducing the latency.
def FeatureFastScalarFSQRT
: SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
"true", "Scalar SQRT is fast (disable Newton-Raphson)">;
def FeatureFastVectorFSQRT
: SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
"true", "Vector SQRT is fast (disable Newton-Raphson)">;
// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
// be used to replace test/set sequences.
def FeatureFastLZCNT
: SubtargetFeature<
"fast-lzcnt", "HasFastLZCNT", "true",
"LZCNT instructions are as fast as most simple integer ops">;
// If the target can efficiently decode NOPs upto 11-bytes in length.
def FeatureFast11ByteNOP
: SubtargetFeature<
"fast-11bytenop", "HasFast11ByteNOP", "true",
"Target can quickly decode up to 11 byte NOPs">;
// If the target can efficiently decode NOPs upto 15-bytes in length.
def FeatureFast15ByteNOP
: SubtargetFeature<
"fast-15bytenop", "HasFast15ByteNOP", "true",
"Target can quickly decode up to 15 byte NOPs">;
// Sandy Bridge and newer processors can use SHLD with the same source on both
// inputs to implement rotate to avoid the partial flag update of the normal
// rotate instructions.
def FeatureFastSHLDRotate
: SubtargetFeature<
"fast-shld-rotate", "HasFastSHLDRotate", "true",
"SHLD can be used as a faster rotate">;
// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
// "string operations"). See "REP String Enhancement" in the Intel Software
// Development Manual. This feature essentially means that REP MOVSB will copy
// using the largest available size instead of copying bytes one by one, making
// it at least as fast as REPMOVS{W,D,Q}.
def FeatureERMSB
: SubtargetFeature<
"ermsb", "HasERMSB", "true",
"REP MOVS/STOS are fast">;
// Sandy Bridge and newer processors have many instructions that can be
// fused with conditional branches and pass through the CPU as a single
// operation.
def FeatureMacroFusion
: SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
"Various instructions can be fused with conditional branches">;
// Gather is available since Haswell (AVX2 set). So technically, we can
// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
// Skylake Client processor has faster Gathers than HSW and performance is
// similar to Skylake Server (AVX-512).
def FeatureHasFastGather
: SubtargetFeature<"fast-gather", "HasFastGather", "true",
"Indicates if gather is reasonably fast.">;
def FeaturePrefer256Bit
: SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
"Prefer 256-bit AVX instructions">;
// Enable mitigation of some aspects of speculative execution related
// vulnerabilities by removing speculatable indirect branches. This disables
// jump-table formation, rewrites explicit `indirectbr` instructions into
// `switch` instructions, and uses a special construct called a "retpoline" to
// prevent speculation of the remaining indirect branches (indirect calls and
// tail calls).
def FeatureRetpoline
: SubtargetFeature<"retpoline", "UseRetpoline", "true",
"Remove speculation of indirect branches from the "
"generated code, either by avoiding them entirely or "
"lowering them with a speculation blocking construct.">;
// Rely on external thunks for the emitted retpoline calls. This allows users
// to provide their own custom thunk definitions in highly specialized
// environments such as a kernel that does boot-time hot patching.
def FeatureRetpolineExternalThunk
: SubtargetFeature<
"retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
"Enable retpoline, but with an externally provided thunk.",
[FeatureRetpoline]>;
// Direct Move instructions.
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
"Support movdiri instruction">;
def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
"Support movdir64b instruction">;
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
include "X86RegisterInfo.td"
include "X86RegisterBanks.td"
//===----------------------------------------------------------------------===//
// Instruction Descriptions
//===----------------------------------------------------------------------===//
include "X86Schedule.td"
include "X86InstrInfo_reduce.td"
def X86InstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
// Assembly Parser
//===----------------------------------------------------------------------===//
def ATTAsmParserVariant : AsmParserVariant {
int Variant = 0;
// Variant name.
string Name = "att";
// Discard comments in assembly strings.
string CommentDelimiter = "#";
// Recognize hard coded registers.
string RegisterPrefix = "%";
}
def IntelAsmParserVariant : AsmParserVariant {
int Variant = 1;
// Variant name.
string Name = "intel";
// Discard comments in assembly strings.
string CommentDelimiter = ";";
// Recognize hard coded registers.
string RegisterPrefix = "";
}
//===----------------------------------------------------------------------===//
// Assembly Printers
//===----------------------------------------------------------------------===//
// The X86 target supports two different syntaxes for emitting machine code.
// This is controlled by the -x86-asm-syntax={att|intel}
def ATTAsmWriter : AsmWriter {
string AsmWriterClassName = "ATTInstPrinter";
int Variant = 0;
}
def IntelAsmWriter : AsmWriter {
string AsmWriterClassName = "IntelInstPrinter";
int Variant = 1;
}
def X86 : Target {
// Information about the instructions...
let InstructionSet = X86InstrInfo;
let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
let AllowRegisterRenaming = 1;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,7 @@
// Capstone definitions fix for X86 LLVM instructions.
let Defs = [EFLAGS] in
def INT1 : I<0xf1, RawFrm, (outs), (ins), "int1", []>;
// def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", [], IIC_FNCLEX>;
def FSETPM : I<0xDB, MRM_E4, (outs), (ins), "fsetpm", []>;

View File

@@ -0,0 +1,103 @@
// Capstone definitions fix for X86 LLVM instructions.
let Defs = [EFLAGS] in
def INT1 : I<0xf1, RawFrm, (outs), (ins), "int1", []>;
def FSETPM : I<0xDB, MRM_E4, (outs), (ins), "fsetpm", []>;
// Capstone: comment out below lines for X86 Reduce mode
/*
// X87 Floating Point Stack.
include "X86InstrFPStack.td"
// SIMD support (SSE, MMX and AVX)
include "X86InstrFragmentsSIMD.td"
// FMA - Fused Multiply-Add support (requires FMA)
include "X86InstrFMA.td"
// XOP
include "X86InstrXOP.td"
// SSE, MMX and 3DNow! vector support.
include "X86InstrSSE.td"
include "X86InstrAVX512.td"
include "X86InstrMMX.td"
include "X86Instr3DNow.td"
// MPX instructions
include "X86InstrMPX.td"
//include "X86InstrTSX.td"
include "X86InstrSGX.td"
// Various unary fpstack operations default to operating on ST1.
// For example, "fxch" -> "fxch %st(1)"
def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
def: InstAlias<"fadd", (ADD_FPrST0 ST1), 0>;
def : InstAlias<"fsub{|r}p", (SUBR_FPrST0 ST1), 0>;
def : InstAlias<"fsub{r|}p", (SUB_FPrST0 ST1), 0>;
def : InstAlias<"fmul", (MUL_FPrST0 ST1), 0>;
def : InstAlias<"fmulp", (MUL_FPrST0 ST1), 0>;
def : InstAlias<"fdiv{|r}p", (DIVR_FPrST0 ST1), 0>;
def : InstAlias<"fdiv{r|}p", (DIV_FPrST0 ST1), 0>;
def : InstAlias<"fxch", (XCH_F ST1), 0>;
def : InstAlias<"fcom", (COM_FST0r ST1), 0>;
def : InstAlias<"fcomp", (COMP_FST0r ST1), 0>;
def : InstAlias<"fcomi", (COM_FIr ST1), 0>;
def : InstAlias<"fcompi", (COM_FIPr ST1), 0>;
def : InstAlias<"fucom", (UCOM_Fr ST1), 0>;
def : InstAlias<"fucomp", (UCOM_FPr ST1), 0>;
def : InstAlias<"fucomi", (UCOM_FIr ST1), 0>;
def : InstAlias<"fucompi", (UCOM_FIPr ST1), 0>;
// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
// gas.
multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
def : InstAlias<!strconcat(Mnemonic, "\t{$op, %st(0)|st(0), $op}"),
(Inst RST:$op), EmitAlias>;
def : InstAlias<!strconcat(Mnemonic, "\t{%st(0), %st(0)|st(0), st(0)}"),
(Inst ST0), EmitAlias>;
}
defm : FpUnaryAlias<"fadd", ADD_FST0r>;
defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>;
defm : FpUnaryAlias<"fsub", SUB_FST0r>;
defm : FpUnaryAlias<"fsub{|r}p", SUBR_FPrST0>;
defm : FpUnaryAlias<"fsubr", SUBR_FST0r>;
defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0>;
defm : FpUnaryAlias<"fmul", MUL_FST0r>;
defm : FpUnaryAlias<"fmulp", MUL_FPrST0>;
defm : FpUnaryAlias<"fdiv", DIV_FST0r>;
defm : FpUnaryAlias<"fdiv{|r}p", DIVR_FPrST0>;
defm : FpUnaryAlias<"fdivr", DIVR_FST0r>;
defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0>;
defm : FpUnaryAlias<"fcomi", COM_FIr, 0>;
defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>;
defm : FpUnaryAlias<"fcompi", COM_FIPr>;
defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
// solely because gas supports it.
def : InstAlias<"faddp\t{%st(0), $op|$op, st(0)}", (ADD_FPrST0 RST:$op), 0>;
def : InstAlias<"fmulp\t{%st(0), $op|$op, st(0)}", (MUL_FPrST0 RST:$op)>;
def : InstAlias<"fsub{|r}p\t{%st(0), $op|$op, st(0)}", (SUBR_FPrST0 RST:$op)>;
def : InstAlias<"fsub{r|}p\t{%st(0), $op|$op, st(0)}", (SUB_FPrST0 RST:$op)>;
def : InstAlias<"fdiv{|r}p\t{%st(0), $op|$op, st(0)}", (DIVR_FPrST0 RST:$op)>;
def : InstAlias<"fdiv{r|}p\t{%st(0), $op|$op, st(0)}", (DIV_FPrST0 RST:$op)>;
def : InstAlias<"fnstsw" , (FNSTSW16r), 0>;
// Match 'movd GR64, MMX' as an alias for movq to be compatible with gas,
// which supports this due to an old AMD documentation bug when 64-bit mode was
// created.
def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
(MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
(MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
*/

View File

@@ -0,0 +1,101 @@
// Capstone definitions fix for X86 LLVM instructions.
let Defs = [EFLAGS] in
def INT1 : I<0xf1, RawFrm, (outs), (ins), "int1", []>;
def FSETPM : I<0xDB, MRM_E4, (outs), (ins), "fsetpm", []>;
// Capstone: comment out below lines for X86 Reduce mode
// X87 Floating Point Stack.
//include "X86InstrFPStack.td"
// SIMD support (SSE, MMX and AVX)
//include "X86InstrFragmentsSIMD.td"
// FMA - Fused Multiply-Add support (requires FMA)
//include "X86InstrFMA.td"
// XOP
//include "X86InstrXOP.td"
// SSE, MMX and 3DNow! vector support.
//include "X86InstrSSE.td"
//include "X86InstrAVX512.td"
//include "X86InstrMMX.td"
//include "X86Instr3DNow.td"
// MPX instructions
//include "X86InstrMPX.td"
//include "X86InstrTSX.td"
//include "X86InstrSGX.td"
// Various unary fpstack operations default to operating on ST1.
// For example, "fxch" -> "fxch %st(1)"
//def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
//def: InstAlias<"fadd", (ADD_FPrST0 ST1), 0>;
//def : InstAlias<"fsub{|r}p", (SUBR_FPrST0 ST1), 0>;
//def : InstAlias<"fsub{r|}p", (SUB_FPrST0 ST1), 0>;
//def : InstAlias<"fmul", (MUL_FPrST0 ST1), 0>;
//def : InstAlias<"fmulp", (MUL_FPrST0 ST1), 0>;
//def : InstAlias<"fdiv{|r}p", (DIVR_FPrST0 ST1), 0>;
//def : InstAlias<"fdiv{r|}p", (DIV_FPrST0 ST1), 0>;
//def : InstAlias<"fxch", (XCH_F ST1), 0>;
//def : InstAlias<"fcom", (COM_FST0r ST1), 0>;
//def : InstAlias<"fcomp", (COMP_FST0r ST1), 0>;
//def : InstAlias<"fcomi", (COM_FIr ST1), 0>;
//def : InstAlias<"fcompi", (COM_FIPr ST1), 0>;
//def : InstAlias<"fucom", (UCOM_Fr ST1), 0>;
//def : InstAlias<"fucomp", (UCOM_FPr ST1), 0>;
//def : InstAlias<"fucomi", (UCOM_FIr ST1), 0>;
//def : InstAlias<"fucompi", (UCOM_FIPr ST1), 0>;
// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
// gas.
multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
def : InstAlias<!strconcat(Mnemonic, "\t{$op, %st(0)|st(0), $op}"),
(Inst RST:$op), EmitAlias>;
def : InstAlias<!strconcat(Mnemonic, "\t{%st(0), %st(0)|st(0), st(0)}"),
(Inst ST0), EmitAlias>;
}
//defm : FpUnaryAlias<"fadd", ADD_FST0r>;
//defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>;
//defm : FpUnaryAlias<"fsub", SUB_FST0r>;
//defm : FpUnaryAlias<"fsub{|r}p", SUBR_FPrST0>;
//defm : FpUnaryAlias<"fsubr", SUBR_FST0r>;
//defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0>;
//defm : FpUnaryAlias<"fmul", MUL_FST0r>;
//defm : FpUnaryAlias<"fmulp", MUL_FPrST0>;
//defm : FpUnaryAlias<"fdiv", DIV_FST0r>;
//defm : FpUnaryAlias<"fdiv{|r}p", DIVR_FPrST0>;
//defm : FpUnaryAlias<"fdivr", DIVR_FST0r>;
//defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0>;
//defm : FpUnaryAlias<"fcomi", COM_FIr, 0>;
//defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>;
//defm : FpUnaryAlias<"fcompi", COM_FIPr>;
//defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
// solely because gas supports it.
//def : InstAlias<"faddp\t{%st(0), $op|$op, st(0)}", (ADD_FPrST0 RST:$op), 0>;
//def : InstAlias<"fmulp\t{%st(0), $op|$op, st(0)}", (MUL_FPrST0 RST:$op)>;
//def : InstAlias<"fsub{|r}p\t{%st(0), $op|$op, st(0)}", (SUBR_FPrST0 RST:$op)>;
//def : InstAlias<"fsub{r|}p\t{%st(0), $op|$op, st(0)}", (SUB_FPrST0 RST:$op)>;
//def : InstAlias<"fdiv{|r}p\t{%st(0), $op|$op, st(0)}", (DIVR_FPrST0 RST:$op)>;
//def : InstAlias<"fdiv{r|}p\t{%st(0), $op|$op, st(0)}", (DIV_FPrST0 RST:$op)>;
//
//def : InstAlias<"fnstsw" , (FNSTSW16r), 0>;
// Match 'movd GR64, MMX' as an alias for movq to be compatible with gas,
// which supports this due to an old AMD documentation bug when 64-bit mode was
// created.
//def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
// (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
//def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
// (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;

View File

@@ -0,0 +1,111 @@
//===-- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the 3DNow! instruction set, which extends MMX to support
// floating point and also adds a few more random instructions for good measure.
//
//===----------------------------------------------------------------------===//
class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
: I<o, F, outs, ins, asm, pat>, Requires<[Has3DNow]> {
}
class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
: I3DNow<o, F, (outs VR64:$dst), ins,
!strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>, ThreeDNow {
let Constraints = "$src1 = $dst";
}
class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
: I3DNow<o, F, (outs VR64:$dst), ins,
!strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>, ThreeDNow;
multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn,
X86FoldableSchedWrite sched, bit Commutable = 0,
string Ver = ""> {
let isCommutable = Commutable in
def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>,
Sched<[sched]>;
def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
Sched<[sched.Folded, ReadAfterLd]>;
}
multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn,
X86FoldableSchedWrite sched, string Ver = ""> {
def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>,
Sched<[sched]>;
def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn))
(bitconvert (load_mmx addr:$src))))]>,
Sched<[sched.Folded, ReadAfterLd]>;
}
defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>;
defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id", WriteCvtPS2I>;
defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc", WriteFAdd>;
defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd", WriteFAdd, 1>;
defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq", WriteFAdd, 1>;
defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge", WriteFAdd>;
defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt", WriteFAdd>;
defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax", WriteFAdd>;
defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin", WriteFAdd>;
defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul", WriteFAdd, 1>;
defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp", WriteFAdd>;
defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1", WriteFAdd>;
defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2", WriteFAdd>;
defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1", WriteFAdd>;
defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt", WriteFAdd>;
defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub", WriteFAdd, 1>;
defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr", WriteFAdd, 1>;
defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2PS>;
defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>;
let SchedRW = [WriteEMMS] in
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
[(int_x86_mmx_femms)]>, TB;
// PREFETCHWT1 is supported we want to use it for everything but T0.
def PrefetchWLevel : PatFrag<(ops), (i32 imm), [{
return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1();
}]>;
// Use PREFETCHWT1 for NTA, T2, T1.
def PrefetchWT1Level : ImmLeaf<i32, [{
return Imm < 3;
}]>;
let SchedRW = [WriteLoad] in {
let Predicates = [Has3DNow, NoSSEPrefetch] in
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
"prefetch\t$addr",
[(prefetch addr:$addr, imm, imm, (i32 1))]>, TB;
def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
[(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))]>,
TB, Requires<[HasPrefetchW]>;
def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr",
[(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))]>,
TB, Requires<[HasPREFETCHWT1]>;
}
// "3DNowA" instructions
defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", WriteCvtPS2I, "a">;
defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", WriteCvtI2PS, "a">;
defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", WriteFAdd, 0, "a">;
defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", WriteFAdd, 0, "a">;
defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", SchedWriteShuffle.MMX, "a">;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,116 @@
//===-- X86InstrCMovSetCC.td - Conditional Move and SetCC --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 conditional move and set on condition
// instructions.
//
//===----------------------------------------------------------------------===//
// CMOV instructions.
multiclass CMOV<bits<8> opc, string Mnemonic, X86FoldableSchedWrite Sched,
PatLeaf CondNode> {
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
isCommutable = 1, SchedRW = [Sched] in {
def NAME#16rr
: I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
[(set GR16:$dst,
(X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,
TB, OpSize16;
def NAME#32rr
: I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
!strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
[(set GR32:$dst,
(X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>,
TB, OpSize32;
def NAME#64rr
:RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
!strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
[(set GR64:$dst,
(X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
}
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
SchedRW = [Sched.Folded, ReadAfterLd] in {
def NAME#16rm
: I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
[(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
CondNode, EFLAGS))]>, TB, OpSize16;
def NAME#32rm
: I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
!strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
[(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
CondNode, EFLAGS))]>, TB, OpSize32;
def NAME#64rm
:RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
!strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
[(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
CondNode, EFLAGS))]>, TB;
} // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
} // end multiclass
// Conditional Moves.
defm CMOVO : CMOV<0x40, "cmovo" , WriteCMOV, X86_COND_O>;
defm CMOVNO : CMOV<0x41, "cmovno", WriteCMOV, X86_COND_NO>;
defm CMOVB : CMOV<0x42, "cmovb" , WriteCMOV, X86_COND_B>;
defm CMOVAE : CMOV<0x43, "cmovae", WriteCMOV, X86_COND_AE>;
defm CMOVE : CMOV<0x44, "cmove" , WriteCMOV, X86_COND_E>;
defm CMOVNE : CMOV<0x45, "cmovne", WriteCMOV, X86_COND_NE>;
defm CMOVBE : CMOV<0x46, "cmovbe", WriteCMOV2, X86_COND_BE>;
defm CMOVA : CMOV<0x47, "cmova" , WriteCMOV2, X86_COND_A>;
defm CMOVS : CMOV<0x48, "cmovs" , WriteCMOV, X86_COND_S>;
defm CMOVNS : CMOV<0x49, "cmovns", WriteCMOV, X86_COND_NS>;
defm CMOVP : CMOV<0x4A, "cmovp" , WriteCMOV, X86_COND_P>;
defm CMOVNP : CMOV<0x4B, "cmovnp", WriteCMOV, X86_COND_NP>;
defm CMOVL : CMOV<0x4C, "cmovl" , WriteCMOV, X86_COND_L>;
defm CMOVGE : CMOV<0x4D, "cmovge", WriteCMOV, X86_COND_GE>;
defm CMOVLE : CMOV<0x4E, "cmovle", WriteCMOV, X86_COND_LE>;
defm CMOVG : CMOV<0x4F, "cmovg" , WriteCMOV, X86_COND_G>;
// SetCC instructions.
multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
let Uses = [EFLAGS] in {
def r : I<opc, MRMXr, (outs GR8:$dst), (ins),
!strconcat(Mnemonic, "\t$dst"),
[(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>,
TB, Sched<[WriteSETCC]>;
def m : I<opc, MRMXm, (outs), (ins i8mem:$dst),
!strconcat(Mnemonic, "\t$dst"),
[(store (X86setcc OpNode, EFLAGS), addr:$dst)]>,
TB, Sched<[WriteSETCCStore]>;
} // Uses = [EFLAGS]
}
defm SETO : SETCC<0x90, "seto", X86_COND_O>; // is overflow bit set
defm SETNO : SETCC<0x91, "setno", X86_COND_NO>; // is overflow bit not set
defm SETB : SETCC<0x92, "setb", X86_COND_B>; // unsigned less than
defm SETAE : SETCC<0x93, "setae", X86_COND_AE>; // unsigned greater or equal
defm SETE : SETCC<0x94, "sete", X86_COND_E>; // equal to
defm SETNE : SETCC<0x95, "setne", X86_COND_NE>; // not equal to
defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>; // unsigned less than or equal
defm SETA : SETCC<0x97, "seta", X86_COND_A>; // unsigned greater than
defm SETS : SETCC<0x98, "sets", X86_COND_S>; // is signed bit set
defm SETNS : SETCC<0x99, "setns", X86_COND_NS>; // is not signed
defm SETP : SETCC<0x9A, "setp", X86_COND_P>; // is parity bit set
defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>; // is parity bit not set
defm SETL : SETCC<0x9C, "setl", X86_COND_L>; // signed less than
defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal
defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal
defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than
// SALC is an undocumented instruction. Information for this instruction can be found
// here http://www.rcollins.org/secrets/opcodes/SALC.html
// Set AL if carry.
let Uses = [EFLAGS], Defs = [AL], SchedRW = [WriteALU] in {
def SALC : I<0xD6, RawFrm, (outs), (ins), "salc", []>, Requires<[Not64BitMode]>;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,413 @@
//===-- X86InstrControl.td - Control Flow Instructions -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 jump, return, call, and related instructions.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
//
// Return instructions.
//
// The X86retflag return instructions are variadic because we may add ST0 and
// ST1 arguments when returning values on the x87 stack.
let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
def RETL : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret{l}", []>, OpSize32, Requires<[Not64BitMode]>;
def RETQ : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret{q}", []>, OpSize32, Requires<[In64BitMode]>;
def RETW : I <0xC3, RawFrm, (outs), (ins),
"ret{w}", []>, OpSize16;
def RETIL : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret{l}\t$amt", []>, OpSize32, Requires<[Not64BitMode]>;
def RETIQ : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret{q}\t$amt", []>, OpSize32, Requires<[In64BitMode]>;
def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
"ret{w}\t$amt", []>, OpSize16;
def LRETL : I <0xCB, RawFrm, (outs), (ins),
"{l}ret{l|f}", []>, OpSize32;
def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
"{l}ret{|f}q", []>, Requires<[In64BitMode]>;
def LRETW : I <0xCB, RawFrm, (outs), (ins),
"{l}ret{w|f}", []>, OpSize16;
def LRETIL : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
"{l}ret{l|f}\t$amt", []>, OpSize32;
def LRETIQ : RIi16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
"{l}ret{|f}q\t$amt", []>, Requires<[In64BitMode]>;
def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
"{l}ret{w|f}\t$amt", []>, OpSize16;
// The machine return from interrupt instruction, but sometimes we need to
// perform a post-epilogue stack adjustment. Codegen emits the pseudo form
// which expands to include an SP adjustment if necessary.
def IRET16 : I <0xcf, RawFrm, (outs), (ins), "iret{w}", []>,
OpSize16;
def IRET32 : I <0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>, OpSize32;
def IRET64 : RI <0xcf, RawFrm, (outs), (ins), "iretq", []>, Requires<[In64BitMode]>;
// let isCodeGenOnly = 1 in
// def IRET : PseudoI<(outs), (ins i32imm:$adj), [(X86iret timm:$adj)]>;
// def RET : PseudoI<(outs), (ins i32imm:$adj, variable_ops), [(X86retflag timm:$adj)]>;
}
// Unconditional branches.
let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
"jmp\t$dst", [(br bb:$dst)]>;
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
def JMP_2 : Ii16PCRel<0xE9, RawFrm, (outs), (ins brtarget16:$dst),
"jmp\t$dst", []>, OpSize16;
def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget32:$dst),
"jmp\t$dst", []>, OpSize32;
}
}
// Conditional Branches.
let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm,
[(X86brcond bb:$dst, Cond, EFLAGS)]>;
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
def _2 : Ii16PCRel<opc4, RawFrm, (outs), (ins brtarget16:$dst), asm,
[]>, OpSize16, TB;
def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget32:$dst), asm,
[]>, TB, OpSize32;
}
}
}
defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
defm JNO : ICBr<0x71, 0x81, "jno\t$dst", X86_COND_NO>;
defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
// jcx/jecx/jrcx instructions.
let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in {
// These are the 32-bit versions of this instruction for the asmparser. In
// 32-bit mode, the address size prefix is jcxz and the unprefixed version is
// jecxz.
let Uses = [CX] in
def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
"jcxz\t$dst", []>, AdSize16, Requires<[Not64BitMode]>;
let Uses = [ECX] in
def JECXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
"jecxz\t$dst", []>, AdSize32;
let Uses = [RCX] in
def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
"jrcxz\t$dst", []>, AdSize64, Requires<[In64BitMode]>;
}
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def JMP16r : I<0xFF, MRM4r, (outs), (ins GR16:$dst), "jmp{w}\t{*}$dst",
[(brind GR16:$dst)]>, Requires<[Not64BitMode]>,
OpSize16, Sched<[WriteJump]>;
def JMP16m : I<0xFF, MRM4m, (outs), (ins i16mem:$dst), "jmp{w}\t{*}$dst",
[(brind (loadi16 addr:$dst))]>, Requires<[Not64BitMode]>,
OpSize16, Sched<[WriteJumpLd]>;
def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
[(brind GR32:$dst)]>, Requires<[Not64BitMode]>,
OpSize32, Sched<[WriteJump]>;
def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
[(brind (loadi32 addr:$dst))]>, Requires<[Not64BitMode]>,
OpSize32, Sched<[WriteJumpLd]>;
def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
[(brind GR64:$dst)]>, Requires<[In64BitMode]>,
Sched<[WriteJump]>;
def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
[(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>,
Sched<[WriteJumpLd]>;
// Non-tracking jumps for IBT, use with caution.
let isCodeGenOnly = 1 in {
def JMP16r_NT : I<0xFF, MRM4r, (outs), (ins GR16 : $dst), "jmp{w}\t{*}$dst",
[(X86NoTrackBrind GR16 : $dst)]>, Requires<[Not64BitMode]>,
OpSize16, Sched<[WriteJump]>, NOTRACK;
def JMP16m_NT : I<0xFF, MRM4m, (outs), (ins i16mem : $dst), "jmp{w}\t{*}$dst",
[(X86NoTrackBrind (loadi16 addr : $dst))]>,
Requires<[Not64BitMode]>, OpSize16, Sched<[WriteJumpLd]>,
NOTRACK;
def JMP32r_NT : I<0xFF, MRM4r, (outs), (ins GR32 : $dst), "jmp{l}\t{*}$dst",
[(X86NoTrackBrind GR32 : $dst)]>, Requires<[Not64BitMode]>,
OpSize32, Sched<[WriteJump]>, NOTRACK;
def JMP32m_NT : I<0xFF, MRM4m, (outs), (ins i32mem : $dst), "jmp{l}\t{*}$dst",
[(X86NoTrackBrind (loadi32 addr : $dst))]>,
Requires<[Not64BitMode]>, OpSize32, Sched<[WriteJumpLd]>,
NOTRACK;
def JMP64r_NT : I<0xFF, MRM4r, (outs), (ins GR64 : $dst), "jmp{q}\t{*}$dst",
[(X86NoTrackBrind GR64 : $dst)]>, Requires<[In64BitMode]>,
Sched<[WriteJump]>, NOTRACK;
def JMP64m_NT : I<0xFF, MRM4m, (outs), (ins i64mem : $dst), "jmp{q}\t{*}$dst",
[(X86NoTrackBrind(loadi64 addr : $dst))]>,
Requires<[In64BitMode]>, Sched<[WriteJumpLd]>, NOTRACK;
}
let Predicates = [Not64BitMode], AsmVariantName = "att" in {
def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
(ins i16imm:$off, i16imm:$seg),
"ljmp{w}\t$seg : $off", []>,
OpSize16, Sched<[WriteJump]>;
def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
(ins i32imm:$off, i16imm:$seg),
"ljmp{l}\t$seg : $off", []>,
OpSize32, Sched<[WriteJump]>;
}
def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
"ljmp{q}\t{*}$dst", []>, Sched<[WriteJump]>, Requires<[In64BitMode]>;
let AsmVariantName = "att" in
def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
"ljmp{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
"{l}jmp{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
}
// Loop instructions
let SchedRW = [WriteJump] in {
def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
}
//===----------------------------------------------------------------------===//
// Call Instructions...
//
let isCall = 1 in
// All calls clobber the non-callee saved registers. ESP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead. Uses for argument
// registers are added manually.
let Uses = [ESP, SSP] in {
def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i32imm_pcrel:$dst),
"call{l}\t$dst", []>, OpSize32,
Requires<[Not64BitMode]>, Sched<[WriteJump]>;
let hasSideEffects = 0 in
def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
(outs), (ins i16imm_pcrel:$dst),
"call{w}\t$dst", []>, OpSize16,
Sched<[WriteJump]>;
def CALL16r : I<0xFF, MRM2r, (outs), (ins GR16:$dst),
"call{w}\t{*}$dst", [(X86call GR16:$dst)]>,
OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>;
def CALL16m : I<0xFF, MRM2m, (outs), (ins i16mem:$dst),
"call{w}\t{*}$dst", [(X86call (loadi16 addr:$dst))]>,
OpSize16, Requires<[Not64BitMode,FavorMemIndirectCall]>,
Sched<[WriteJumpLd]>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
"call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
Requires<[Not64BitMode,NotUseRetpoline]>, Sched<[WriteJump]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
"call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
OpSize32,
Requires<[Not64BitMode,FavorMemIndirectCall,NotUseRetpoline]>,
Sched<[WriteJumpLd]>;
// Non-tracking calls for IBT, use with caution.
let isCodeGenOnly = 1 in {
def CALL16r_NT : I<0xFF, MRM2r, (outs), (ins GR16 : $dst),
"call{w}\t{*}$dst",[(X86NoTrackCall GR16 : $dst)]>,
OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>, NOTRACK;
def CALL16m_NT : I<0xFF, MRM2m, (outs), (ins i16mem : $dst),
"call{w}\t{*}$dst",[(X86NoTrackCall(loadi16 addr : $dst))]>,
OpSize16, Requires<[Not64BitMode,FavorMemIndirectCall]>,
Sched<[WriteJumpLd]>, NOTRACK;
def CALL32r_NT : I<0xFF, MRM2r, (outs), (ins GR32 : $dst),
"call{l}\t{*}$dst",[(X86NoTrackCall GR32 : $dst)]>,
OpSize32, Requires<[Not64BitMode]>, Sched<[WriteJump]>, NOTRACK;
def CALL32m_NT : I<0xFF, MRM2m, (outs), (ins i32mem : $dst),
"call{l}\t{*}$dst",[(X86NoTrackCall(loadi32 addr : $dst))]>,
OpSize32, Requires<[Not64BitMode,FavorMemIndirectCall]>,
Sched<[WriteJumpLd]>, NOTRACK;
}
let Predicates = [Not64BitMode], AsmVariantName = "att" in {
def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
(ins i16imm:$off, i16imm:$seg),
"lcall{w}\t$seg : $off", []>,
OpSize16, Sched<[WriteJump]>;
def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
(ins i32imm:$off, i16imm:$seg),
"lcall{l}\t$seg : $off", []>,
OpSize32, Sched<[WriteJump]>;
}
def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
"lcall{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
"{l}call{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
}
/*
// Tail call stuff.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
let Uses = [ESP, SSP] in {
def TCRETURNdi : PseudoI<(outs),
(ins i32imm_pcrel:$dst, i32imm:$offset), []>, NotMemoryFoldable;
def TCRETURNri : PseudoI<(outs),
(ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable;
let mayLoad = 1 in
def TCRETURNmi : PseudoI<(outs),
(ins i32mem_TC:$dst, i32imm:$offset), []>;
// FIXME: The should be pseudo instructions that are lowered when going to
// mcinst.
def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
(ins i32imm_pcrel:$dst), "jmp\t$dst", []>;
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
"", []>; // FIXME: Remove encoding when JIT is dead.
let mayLoad = 1 in
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst),
"jmp{l}\t{*}$dst", []>;
}
// Conditional tail calls are similar to the above, but they are branches
// rather than barriers, and they use EFLAGS.
let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
let Uses = [ESP, EFLAGS, SSP] in {
def TCRETURNdicc : PseudoI<(outs),
(ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
// This gets substituted to a conditional jump instruction in MC lowering.
def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
(ins i32imm_pcrel:$dst, i32imm:$cond), "", []>;
}
*/
//===----------------------------------------------------------------------===//
// Call Instructions...
//
// RSP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead. Uses for argument
// registers are added manually.
let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
// NOTE: this pattern doesn't match "X86call imm", because we do not know
// that the offset between an arbitrary immediate and the call will fit in
// the 32-bit pcrel field that we have.
def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i64i32imm_pcrel:$dst),
"call{q}\t$dst", []>, OpSize32,
Requires<[In64BitMode]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
"call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
Requires<[In64BitMode,NotUseRetpoline]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
Requires<[In64BitMode,FavorMemIndirectCall,
NotUseRetpoline]>;
// Non-tracking calls for IBT, use with caution.
let isCodeGenOnly = 1 in {
def CALL64r_NT : I<0xFF, MRM2r, (outs), (ins GR64 : $dst),
"call{q}\t{*}$dst",[(X86NoTrackCall GR64 : $dst)]>,
Requires<[In64BitMode]>, NOTRACK;
def CALL64m_NT : I<0xFF, MRM2m, (outs), (ins i64mem : $dst),
"call{q}\t{*}$dst",
[(X86NoTrackCall(loadi64 addr : $dst))]>,
Requires<[In64BitMode,FavorMemIndirectCall]>, NOTRACK;
}
def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
"lcall{q}\t{*}$dst", []>;
}
/*
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
def TCRETURNdi64 : PseudoI<(outs),
(ins i64i32imm_pcrel:$dst, i32imm:$offset),
[]>;
def TCRETURNri64 : PseudoI<(outs),
(ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable;
let mayLoad = 1 in
def TCRETURNmi64 : PseudoI<(outs),
(ins i64mem_TC:$dst, i32imm:$offset), []>, NotMemoryFoldable;
def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst),
"jmp\t$dst", []>;
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
"jmp{q}\t{*}$dst", []>;
let mayLoad = 1 in
def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
"jmp{q}\t{*}$dst", []>;
// Win64 wants indirect jumps leaving the function to have a REX_W prefix.
let hasREX_WPrefix = 1 in {
def TAILJMPr64_REX : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
"rex64 jmp{q}\t{*}$dst", []>;
let mayLoad = 1 in
def TAILJMPm64_REX : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
"rex64 jmp{q}\t{*}$dst", []>;
}
}
let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
Uses = [RSP, SSP],
usesCustomInserter = 1,
SchedRW = [WriteJump] in {
def RETPOLINE_CALL32 :
PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
Requires<[Not64BitMode,UseRetpoline]>;
def RETPOLINE_CALL64 :
PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
Requires<[In64BitMode,UseRetpoline]>;
// Retpoline variant of indirect tail calls.
let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
def RETPOLINE_TCRETURN64 :
PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>;
def RETPOLINE_TCRETURN32 :
PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;
}
}
// Conditional tail calls are similar to the above, but they are branches
// rather than barriers, and they use EFLAGS.
let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
let Uses = [RSP, EFLAGS, SSP] in {
def TCRETURNdi64cc : PseudoI<(outs),
(ins i64i32imm_pcrel:$dst, i32imm:$offset,
i32imm:$cond), []>;
// This gets substituted to a conditional jump instruction in MC lowering.
def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
(ins i64i32imm_pcrel:$dst, i32imm:$cond), "", []>;
}
*/

View File

@@ -0,0 +1,204 @@
//===-- X86InstrExtension.td - Sign and Zero Extensions ----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the sign and zero extension operations.
//
//===----------------------------------------------------------------------===//
let hasSideEffects = 0 in {
let Defs = [AX], Uses = [AL] in // AX = signext(AL)
def CBW : I<0x98, RawFrm, (outs), (ins),
"{cbtw|cbw}", []>, OpSize16, Sched<[WriteALU]>;
let Defs = [EAX], Uses = [AX] in // EAX = signext(AX)
def CWDE : I<0x98, RawFrm, (outs), (ins),
"{cwtl|cwde}", []>, OpSize32, Sched<[WriteALU]>;
let Defs = [AX,DX], Uses = [AX] in // DX:AX = signext(AX)
def CWD : I<0x99, RawFrm, (outs), (ins),
"{cwtd|cwd}", []>, OpSize16, Sched<[WriteALU]>;
let Defs = [EAX,EDX], Uses = [EAX] in // EDX:EAX = signext(EAX)
def CDQ : I<0x99, RawFrm, (outs), (ins),
"{cltd|cdq}", []>, OpSize32, Sched<[WriteALU]>;
let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX)
def CDQE : RI<0x98, RawFrm, (outs), (ins),
"{cltq|cdqe}", []>, Sched<[WriteALU]>;
let Defs = [RAX,RDX], Uses = [RAX] in // RDX:RAX = signext(RAX)
def CQO : RI<0x99, RawFrm, (outs), (ins),
"{cqto|cqo}", []>, Sched<[WriteALU]>;
}
// Sign/Zero extenders
let hasSideEffects = 0 in {
def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", []>,
TB, OpSize16, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movs{bw|x}\t{$src, $dst|$dst, $src}", []>,
TB, OpSize16, Sched<[WriteALULd]>;
} // hasSideEffects = 0
def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sext GR8:$src))]>, TB,
OpSize32, Sched<[WriteALU]>;
def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB,
OpSize32, Sched<[WriteALULd]>;
def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sext GR16:$src))]>, TB,
OpSize32, Sched<[WriteALU]>;
def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movs{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sextloadi32i16 addr:$src))]>,
OpSize32, TB, Sched<[WriteALULd]>;
let hasSideEffects = 0 in {
def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", []>,
TB, OpSize16, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
"movz{bw|x}\t{$src, $dst|$dst, $src}", []>,
TB, OpSize16, Sched<[WriteALULd]>;
} // hasSideEffects = 0
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zext GR8:$src))]>, TB,
OpSize32, Sched<[WriteALU]>;
def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB,
OpSize32, Sched<[WriteALULd]>;
def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zext GR16:$src))]>, TB,
OpSize32, Sched<[WriteALU]>;
def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"movz{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zextloadi32i16 addr:$src))]>,
TB, OpSize32, Sched<[WriteALULd]>;
// These instructions exist as a consequence of operand size prefix having
// control of the destination size, but not the input size. Only support them
// for the disassembler.
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def MOVSX16rr16: I<0xBF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"movs{ww|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
def MOVZX16rr16: I<0xB7, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"movz{ww|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
let mayLoad = 1 in {
def MOVSX16rm16: I<0xBF, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movs{ww|x}\t{$src, $dst|$dst, $src}",
[]>, OpSize16, TB, Sched<[WriteALULd]>, NotMemoryFoldable;
def MOVZX16rm16: I<0xB7, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movz{ww|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize16, Sched<[WriteALULd]>, NotMemoryFoldable;
} // mayLoad = 1
} // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0
// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
// except that they use GR32_NOREX for the output operand register class
// instead of GR32. This allows them to operate on h registers on x86-64.
let hasSideEffects = 0, isCodeGenOnly = 1 in {
def MOVZX32rr8_NOREX : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize32, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX32rm8_NOREX : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize32, Sched<[WriteALULd]>;
def MOVSX32rr8_NOREX : I<0xBE, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize32, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVSX32rm8_NOREX : I<0xBE, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[]>, TB, OpSize32, Sched<[WriteALULd]>;
}
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
// operand, which makes it a rare instruction with an 8-bit register
// operand that can never access an h register. If support for h registers
// were generalized, this would require a special register class.
def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sext GR8:$src))]>, TB,
Sched<[WriteALU]>;
def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
"movs{bq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i8 addr:$src))]>,
TB, Sched<[WriteALULd]>;
def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sext GR16:$src))]>, TB,
Sched<[WriteALU]>;
def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movs{wq|x}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i16 addr:$src))]>,
TB, Sched<[WriteALULd]>;
def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sext GR32:$src))]>,
Sched<[WriteALU]>, Requires<[In64BitMode]>;
def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (sextloadi64i32 addr:$src))]>,
Sched<[WriteALULd]>, Requires<[In64BitMode]>;
// movzbq and movzwq encodings for the disassembler
let hasSideEffects = 0 in {
def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
"movz{bq|x}\t{$src, $dst|$dst, $src}", []>,
TB, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
"movz{bq|x}\t{$src, $dst|$dst, $src}", []>,
TB, Sched<[WriteALULd]>;
def MOVZX64rr16 : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
"movz{wq|x}\t{$src, $dst|$dst, $src}", []>,
TB, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX64rm16 : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"movz{wq|x}\t{$src, $dst|$dst, $src}", []>,
TB, Sched<[WriteALULd]>;
}
// 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a
// 32-bit register.
def : Pat<(i64 (zext GR8:$src)),
(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8:$src), sub_32bit)>;
def : Pat<(zextloadi64i8 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
def : Pat<(i64 (zext GR16:$src)),
(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16:$src), sub_32bit)>;
def : Pat<(zextloadi64i16 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
// The preferred way to do 32-bit-to-64-bit zero extension on x86-64 is to use a
// SUBREG_TO_REG to utilize implicit zero-extension, however this isn't possible
// when the 32-bit value is defined by a truncate or is copied from something
// where the high bits aren't necessarily all zero. In such cases, we fall back
// to these explicit zext instructions.
def : Pat<(i64 (zext GR32:$src)),
(SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src), sub_32bit)>;
def : Pat<(i64 (zextloadi64i32 addr:$src)),
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;

View File

@@ -0,0 +1,636 @@
//===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes FMA (Fused Multiply-Add) instructions.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// FMA3 - Intel 3 operand Fused Multiply-Add instructions
//===----------------------------------------------------------------------===//
// For all FMA opcodes declared in fma3p_rm_* and fma3s_rm_* multiclasses
// defined below, both the register and memory variants are commutable.
// For the register form the commutable operands are 1, 2 and 3.
// For the memory variant the folded operand must be in 3. Thus,
// in that case, only the operands 1 and 2 can be swapped.
// Commuting some of operands may require the opcode change.
// FMA*213*:
// operands 1 and 2 (memory & register forms): *213* --> *213*(no changes);
// operands 1 and 3 (register forms only): *213* --> *231*;
// operands 2 and 3 (register forms only): *213* --> *132*.
// FMA*132*:
// operands 1 and 2 (memory & register forms): *132* --> *231*;
// operands 1 and 3 (register forms only): *132* --> *132*(no changes);
// operands 2 and 3 (register forms only): *132* --> *213*.
// FMA*231*:
// operands 1 and 2 (memory & register forms): *231* --> *132*;
// operands 1 and 3 (register forms only): *231* --> *213*;
// operands 2 and 3 (register forms only): *231* --> *231*(no changes).
multiclass fma3p_rm_213<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
SDNode Op, X86FoldableSchedWrite sched> {
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, RC:$src1, RC:$src3)))]>,
Sched<[sched]>;
let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, RC:$src1,
(MemFrag addr:$src3))))]>,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
SDNode Op, X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, Sched<[sched]>;
let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3),
RC:$src1)))]>,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
SDNode Op, X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, Sched<[sched]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1,
RC:$src2)))]>,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpcodeStr, string PackTy, string Suff,
PatFrag MemFrag128, PatFrag MemFrag256,
SDNode Op, ValueType OpTy128, ValueType OpTy256,
X86SchedWriteWidths sched> {
defm NAME#213#Suff : fma3p_rm_213<opc213, !strconcat(OpcodeStr, "213", PackTy),
VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
defm NAME#231#Suff : fma3p_rm_231<opc231, !strconcat(OpcodeStr, "231", PackTy),
VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
defm NAME#132#Suff : fma3p_rm_132<opc132, !strconcat(OpcodeStr, "132", PackTy),
VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
defm NAME#213#Suff#Y : fma3p_rm_213<opc213, !strconcat(OpcodeStr, "213", PackTy),
VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
VEX_L;
defm NAME#231#Suff#Y : fma3p_rm_231<opc231, !strconcat(OpcodeStr, "231", PackTy),
VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
VEX_L;
defm NAME#132#Suff#Y : fma3p_rm_132<opc132, !strconcat(OpcodeStr, "132", PackTy),
VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
VEX_L;
}
// Fused Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS",
loadv4f32, loadv8f32, X86Fmadd, v4f32, v8f32,
SchedWriteFMA>;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS",
loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32,
SchedWriteFMA>;
defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", "PS",
loadv4f32, loadv8f32, X86Fmaddsub, v4f32, v8f32,
SchedWriteFMA>;
defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", "PS",
loadv4f32, loadv8f32, X86Fmsubadd, v4f32, v8f32,
SchedWriteFMA>;
}
let ExeDomain = SSEPackedDouble in {
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD",
loadv2f64, loadv4f64, X86Fmadd, v2f64,
v4f64, SchedWriteFMA>, VEX_W;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD",
loadv2f64, loadv4f64, X86Fmsub, v2f64,
v4f64, SchedWriteFMA>, VEX_W;
defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", "PD",
loadv2f64, loadv4f64, X86Fmaddsub,
v2f64, v4f64, SchedWriteFMA>, VEX_W;
defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", "PD",
loadv2f64, loadv4f64, X86Fmsubadd,
v2f64, v4f64, SchedWriteFMA>, VEX_W;
}
// Fused Negative Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", "PS", loadv4f32,
loadv8f32, X86Fnmadd, v4f32, v8f32, SchedWriteFMA>;
defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", "PS", loadv4f32,
loadv8f32, X86Fnmsub, v4f32, v8f32, SchedWriteFMA>;
}
let ExeDomain = SSEPackedDouble in {
defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", "PD", loadv2f64,
loadv4f64, X86Fnmadd, v2f64, v4f64, SchedWriteFMA>, VEX_W;
defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", "PD", loadv2f64,
loadv4f64, X86Fnmsub, v2f64, v4f64, SchedWriteFMA>, VEX_W;
}
// All source register operands of FMA opcodes defined in fma3s_rm multiclass
// can be commuted. In many cases such commute transformation requires an opcode
// adjustment, for example, commuting the operands 1 and 2 in FMA*132 form
// would require an opcode change to FMA*231:
// FMA*132* reg1, reg2, reg3; // reg1 * reg3 + reg2;
// -->
// FMA*231* reg2, reg1, reg3; // reg1 * reg3 + reg2;
// Please see more detailed comment at the very beginning of the section
// defining FMA3 opcodes above.
multiclass fma3s_rm_213<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, RegisterClass RC,
SDPatternOperator OpNode,
X86FoldableSchedWrite sched> {
def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst, (OpNode RC:$src2, RC:$src1, RC:$src3))]>,
Sched<[sched]>;
let mayLoad = 1 in
def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, RegisterClass RC,
SDPatternOperator OpNode, X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in
def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, Sched<[sched]>;
let mayLoad = 1 in
def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src2, (load addr:$src3), RC:$src1))]>,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, RegisterClass RC,
SDPatternOperator OpNode, X86FoldableSchedWrite sched> {
let hasSideEffects = 0 in
def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, Sched<[sched]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
let mayLoad = 1 in
def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
(OpNode (load addr:$src3), RC:$src1, RC:$src2))]>,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, string PackTy, string Suff,
SDNode OpNode, RegisterClass RC,
X86MemOperand x86memop, X86FoldableSchedWrite sched> {
defm NAME#213#Suff : fma3s_rm_213<opc213, !strconcat(OpStr, "213", PackTy),
x86memop, RC, OpNode, sched>;
defm NAME#231#Suff : fma3s_rm_231<opc231, !strconcat(OpStr, "231", PackTy),
x86memop, RC, OpNode, sched>;
defm NAME#132#Suff : fma3s_rm_132<opc132, !strconcat(OpStr, "132", PackTy),
x86memop, RC, OpNode, sched>;
}
// These FMA*_Int instructions are defined specially for being used when
// the scalar FMA intrinsics are lowered to machine instructions, and in that
// sense, they are similar to existing ADD*_Int, SUB*_Int, MUL*_Int, etc.
// instructions.
//
// All of the FMA*_Int opcodes are defined as commutable here.
// Commuting the 2nd and 3rd source register operands of FMAs is quite trivial
// and the corresponding optimizations have been developed.
// Commuting the 1st operand of FMA*_Int requires some additional analysis,
// the commute optimization is legal only if all users of FMA*_Int use only
// the lowest element of the FMA*_Int instruction. Even though such analysis
// may be not implemented yet we allow the routines doing the actual commute
// transformation to decide if one or another instruction is commutable or not.
let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
hasSideEffects = 0 in
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
Operand memopr, RegisterClass RC,
X86FoldableSchedWrite sched> {
def r_Int : FMA3S_Int<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, Sched<[sched]>;
let mayLoad = 1 in
def m_Int : FMA3S_Int<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, memopr:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[]>, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
}
// The FMA 213 form is created for lowering of scalar FMA intrinscis
// to machine instructions.
// The FMA 132 form can trivially be get by commuting the 2nd and 3rd operands
// of FMA 213 form.
// The FMA 231 form can be get only by commuting the 1st operand of 213 or 132
// forms and is possible only after special analysis of all uses of the initial
// instruction. Such analysis do not exist yet and thus introducing the 231
// form of FMA*_Int instructions is done using an optimistic assumption that
// such analysis will be implemented eventually.
multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, string PackTy, string Suff,
RegisterClass RC, Operand memop,
X86FoldableSchedWrite sched> {
defm NAME#132#Suff : fma3s_rm_int<opc132, !strconcat(OpStr, "132", PackTy),
memop, RC, sched>;
defm NAME#213#Suff : fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
memop, RC, sched>;
defm NAME#231#Suff : fma3s_rm_int<opc231, !strconcat(OpStr, "231", PackTy),
memop, RC, sched>;
}
multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, SDNode OpNode, X86FoldableSchedWrite sched> {
let ExeDomain = SSEPackedSingle in
defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", OpNode,
FR32, f32mem, sched>,
fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", "SS",
VR128, ssmem, sched>;
let ExeDomain = SSEPackedDouble in
defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "SD", OpNode,
FR64, f64mem, sched>,
fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", "SD",
VR128, sdmem, sched>, VEX_W;
}
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadd,
SchedWriteFMA.Scl>, VEX_LIG;
defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsub,
SchedWriteFMA.Scl>, VEX_LIG;
defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadd,
SchedWriteFMA.Scl>, VEX_LIG;
defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsub,
SchedWriteFMA.Scl>, VEX_LIG;
multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
SDNode Move, ValueType VT, ValueType EltVT,
RegisterClass RC, PatFrag mem_frag> {
let Predicates = [HasFMA, NoAVX512] in {
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
(Op RC:$src2,
(EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
RC:$src3))))),
(!cast<Instruction>(Prefix#"213"#Suffix#"r_Int")
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
(Op RC:$src2, RC:$src3,
(EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))),
(!cast<Instruction>(Prefix#"231"#Suffix#"r_Int")
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
(Op RC:$src2,
(EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
(mem_frag addr:$src3)))))),
(!cast<Instruction>(Prefix#"213"#Suffix#"m_Int")
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
addr:$src3)>;
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
(Op (EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
(mem_frag addr:$src3), RC:$src2))))),
(!cast<Instruction>(Prefix#"132"#Suffix#"m_Int")
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
addr:$src3)>;
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
(Op RC:$src2, (mem_frag addr:$src3),
(EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))),
(!cast<Instruction>(Prefix#"231"#Suffix#"m_Int")
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
addr:$src3)>;
}
}
defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
//===----------------------------------------------------------------------===//
// FMA4 - AMD 4 operand Fused Multiply-Add instructions
//===----------------------------------------------------------------------===//
multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
PatFrag mem_frag, X86FoldableSchedWrite sched> {
let isCommutable = 1 in
def rr : FMA4S<opc, MRMSrcRegOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG,
Sched<[sched]>;
def rm : FMA4S<opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2,
(mem_frag addr:$src3)))]>, VEX_W, VEX_LIG,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
def mr : FMA4S<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG,
Sched<[sched.Folded, ReadAfterLd,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC:$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : FMA4S<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
VEX_LIG, FoldGenData<NAME#rr>, Sched<[sched]>;
}
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
ValueType VT, X86FoldableSchedWrite sched> {
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, VEX_W, VEX_LIG, Sched<[sched]>;
let mayLoad = 1 in
def rm_Int : FMA4S_Int<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, VEX_W, VEX_LIG,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
let mayLoad = 1 in
def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>,
VEX_LIG, Sched<[sched.Folded, ReadAfterLd,
// memop:$src2
ReadDefault, ReadDefault, ReadDefault,
ReadDefault, ReadDefault,
// VR128::$src3
ReadAfterLd]>;
def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, VEX_LIG, FoldGenData<NAME#rr_Int>, Sched<[sched]>;
} // isCodeGenOnly = 1
}
multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT128, ValueType OpVT256,
PatFrag ld_frag128, PatFrag ld_frag256,
X86SchedWriteWidths sched> {
let isCommutable = 1 in
def rr : FMA4<opc, MRMSrcRegOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>,
VEX_W, Sched<[sched.XMM]>;
def rm : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
(ld_frag128 addr:$src3)))]>, VEX_W,
Sched<[sched.XMM.Folded, ReadAfterLd, ReadAfterLd]>;
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>,
Sched<[sched.XMM.Folded, ReadAfterLd,
// f128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR128::$src3
ReadAfterLd]>;
let isCommutable = 1 in
def Yrr : FMA4<opc, MRMSrcRegOp4, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst,
(OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>,
VEX_W, VEX_L, Sched<[sched.YMM]>;
def Yrm : FMA4<opc, MRMSrcMemOp4, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
(ld_frag256 addr:$src3)))]>, VEX_W, VEX_L,
Sched<[sched.YMM.Folded, ReadAfterLd, ReadAfterLd]>;
def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (OpNode VR256:$src1,
(ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L,
Sched<[sched.YMM.Folded, ReadAfterLd,
// f256mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR256::$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
Sched<[sched.XMM]>, FoldGenData<NAME#rr>;
def Yrr_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
VEX_L, Sched<[sched.YMM]>, FoldGenData<NAME#Yrr>;
} // isCodeGenOnly = 1
}
let ExeDomain = SSEPackedSingle in {
// Scalar Instructions
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32,
SchedWriteFMA.Scl>,
fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
SchedWriteFMA.Scl>;
defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32,
SchedWriteFMA.Scl>,
fma4s_int<0x6E, "vfmsubss", ssmem, v4f32,
SchedWriteFMA.Scl>;
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
X86Fnmadd, loadf32, SchedWriteFMA.Scl>,
fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32,
SchedWriteFMA.Scl>;
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
X86Fnmsub, loadf32, SchedWriteFMA.Scl>,
fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
}
let ExeDomain = SSEPackedDouble in {
// Scalar Instructions
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64,
SchedWriteFMA.Scl>,
fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64,
SchedWriteFMA.Scl>,
fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
X86Fnmadd, loadf64, SchedWriteFMA.Scl>,
fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
X86Fnmsub, loadf64, SchedWriteFMA.Scl>,
fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
}
multiclass scalar_fma4_patterns<SDNode Op, string Name,
ValueType VT, ValueType EltVT,
RegisterClass RC, PatFrag mem_frag> {
let Predicates = [HasFMA4] in {
def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
(Op RC:$src1, RC:$src2, RC:$src3))))),
(!cast<Instruction>(Name#"rr_Int")
(VT (COPY_TO_REGCLASS RC:$src1, VR128)),
(VT (COPY_TO_REGCLASS RC:$src2, VR128)),
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
(Op RC:$src1, RC:$src2,
(mem_frag addr:$src3)))))),
(!cast<Instruction>(Name#"rm_Int")
(VT (COPY_TO_REGCLASS RC:$src1, VR128)),
(VT (COPY_TO_REGCLASS RC:$src2, VR128)), addr:$src3)>;
def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
(Op RC:$src1, (mem_frag addr:$src2),
RC:$src3))))),
(!cast<Instruction>(Name#"mr_Int")
(VT (COPY_TO_REGCLASS RC:$src1, VR128)), addr:$src2,
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
}
}
defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;

View File

@@ -0,0 +1,748 @@
//===- X86InstrFPStack.td - FPU Instruction Set ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 x87 FPU instruction set, defining the
// instructions, and properties of the instructions which are needed for code
// generation, machine code emission, and analysis.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// FPStack specific DAG Nodes.
//===----------------------------------------------------------------------===//
def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
SDTCisVT<1, f80>]>;
def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
[SDNPHasChain, SDNPInGlue, SDNPMayStore,
SDNPMemOperand]>;
def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
SDNPMemOperand]>;
def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
[SDNPHasChain, SDNPMayStore, SDNPSideEffect,
SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// FPStack pattern fragments
//===----------------------------------------------------------------------===//
def fpimm0 : FPImmLeaf<fAny, [{
return Imm.isExactlyValue(+0.0);
}]>;
def fpimmneg0 : FPImmLeaf<fAny, [{
return Imm.isExactlyValue(-0.0);
}]>;
def fpimm1 : FPImmLeaf<fAny, [{
return Imm.isExactlyValue(+1.0);
}]>;
def fpimmneg1 : FPImmLeaf<fAny, [{
return Imm.isExactlyValue(-1.0);
}]>;
/*
// Some 'special' instructions - expanded after instruction selection.
let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
[(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
[(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
[(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
[(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
[(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
[(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
[(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
[(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
[(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
}
*/
// All FP Stack operations are represented with four instructions here. The
// first three instructions, generated by the instruction selector, use "RFP32"
// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
// 64-bit or 80-bit floating point values. These sizes apply to the values,
// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
// copied to each other without losing information. These instructions are all
// pseudo instructions and use the "_Fp" suffix.
// In some cases there are additional variants with a mixture of different
// register sizes.
// The second instruction is defined with FPI, which is the actual instruction
// emitted by the assembler. These use "RST" registers, although frequently
// the actual register(s) used are implicit. These are always 80 bits.
// The FP stackifier pass converts one to the other after register allocation
// occurs.
//
// Note that the FpI instruction should have instruction selection info (e.g.
// a pattern) and the FPI instruction should have emission info (e.g. opcode
// encoding and asm printing info).
// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
// f80 instructions cannot use SSE and use neither of these.
class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
// Factoring for arithmetic.
multiclass FPBinary_rr<SDNode OpNode> {
// Register op register -> register
// These are separated out because they have no reversed form.
def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
[(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
[(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
[(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
}
// The FopST0 series are not included here because of the irregularities
// in where the 'r' goes in assembly output.
// These instructions cannot address 80-bit memory.
multiclass FPBinary<SDNode OpNode, Format fp, string asmstring,
bit Forward = 1> {
let mayLoad = 1, hasSideEffects = 1 in {
// ST(0) = ST(0) + [mem]
def _Fp32m : FpIf32<(outs RFP32:$dst),
(ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
[!if(Forward,
(set RFP32:$dst,
(OpNode RFP32:$src1, (loadf32 addr:$src2))),
(set RFP32:$dst,
(OpNode (loadf32 addr:$src2), RFP32:$src1)))]>;
def _Fp64m : FpIf64<(outs RFP64:$dst),
(ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
[!if(Forward,
(set RFP64:$dst,
(OpNode RFP64:$src1, (loadf64 addr:$src2))),
(set RFP64:$dst,
(OpNode (loadf64 addr:$src2), RFP64:$src1)))]>;
def _Fp64m32: FpIf64<(outs RFP64:$dst),
(ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
[!if(Forward,
(set RFP64:$dst,
(OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2)))),
(set RFP64:$dst,
(OpNode (f64 (extloadf32 addr:$src2)), RFP64:$src1)))]>;
def _Fp80m32: FpI_<(outs RFP80:$dst),
(ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
[!if(Forward,
(set RFP80:$dst,
(OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2)))),
(set RFP80:$dst,
(OpNode (f80 (extloadf32 addr:$src2)), RFP80:$src1)))]>;
def _Fp80m64: FpI_<(outs RFP80:$dst),
(ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
[!if(Forward,
(set RFP80:$dst,
(OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))),
(set RFP80:$dst,
(OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>;
def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
!strconcat("f", asmstring, "{s}\t$src")>;
def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
!strconcat("f", asmstring, "{l}\t$src")>;
// ST(0) = ST(0) + [memint]
def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP32:$dst,
(OpNode RFP32:$src1, (X86fild addr:$src2, i16))),
(set RFP32:$dst,
(OpNode (X86fild addr:$src2, i16), RFP32:$src1)))]>;
def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP32:$dst,
(OpNode RFP32:$src1, (X86fild addr:$src2, i32))),
(set RFP32:$dst,
(OpNode (X86fild addr:$src2, i32), RFP32:$src1)))]>;
def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP64:$dst,
(OpNode RFP64:$src1, (X86fild addr:$src2, i16))),
(set RFP64:$dst,
(OpNode (X86fild addr:$src2, i16), RFP64:$src1)))]>;
def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP64:$dst,
(OpNode RFP64:$src1, (X86fild addr:$src2, i32))),
(set RFP64:$dst,
(OpNode (X86fild addr:$src2, i32), RFP64:$src1)))]>;
def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP80:$dst,
(OpNode RFP80:$src1, (X86fild addr:$src2, i16))),
(set RFP80:$dst,
(OpNode (X86fild addr:$src2, i16), RFP80:$src1)))]>;
def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
OneArgFPRW,
[!if(Forward,
(set RFP80:$dst,
(OpNode RFP80:$src1, (X86fild addr:$src2, i32))),
(set RFP80:$dst,
(OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>;
def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
!strconcat("fi", asmstring, "{s}\t$src")>;
def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
!strconcat("fi", asmstring, "{l}\t$src")>;
} // mayLoad = 1, hasSideEffects = 1
}
let Defs = [FPSW] in {
// FPBinary_rr just defines pseudo-instructions, no need to set a scheduling
// resources.
let hasNoSchedulingInfo = 1 in {
defm ADD : FPBinary_rr<fadd>;
defm SUB : FPBinary_rr<fsub>;
defm MUL : FPBinary_rr<fmul>;
defm DIV : FPBinary_rr<fdiv>;
}
// Sets the scheduling resources for the actual NAME#_F<size>m definitions.
let SchedRW = [WriteFAddLd] in {
defm ADD : FPBinary<fadd, MRM0m, "add">;
defm SUB : FPBinary<fsub, MRM4m, "sub">;
defm SUBR: FPBinary<fsub ,MRM5m, "subr", 0>;
}
let SchedRW = [WriteFMulLd] in {
defm MUL : FPBinary<fmul, MRM1m, "mul">;
}
let SchedRW = [WriteFDivLd] in {
defm DIV : FPBinary<fdiv, MRM6m, "div">;
defm DIVR: FPBinary<fdiv, MRM7m, "divr", 0>;
}
} // Defs = [FPSW]
class FPST0rInst<Format fp, string asm>
: FPI<0xD8, fp, (outs), (ins RST:$op), asm>;
class FPrST0Inst<Format fp, string asm>
: FPI<0xDC, fp, (outs), (ins RST:$op), asm>;
class FPrST0PInst<Format fp, string asm>
: FPI<0xDE, fp, (outs), (ins RST:$op), asm>;
// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
// of some of the 'reverse' forms of the fsub and fdiv instructions. As such,
// we have to put some 'r's in and take them out of weird places.
let SchedRW = [WriteFAdd] in {
def ADD_FST0r : FPST0rInst <MRM0r, "fadd\t$op">;
def ADD_FrST0 : FPrST0Inst <MRM0r, "fadd\t{%st(0), $op|$op, st(0)}">;
def ADD_FPrST0 : FPrST0PInst<MRM0r, "faddp\t$op">;
def SUBR_FST0r : FPST0rInst <MRM5r, "fsubr\t$op">;
def SUB_FrST0 : FPrST0Inst <MRM5r, "fsub{r}\t{%st(0), $op|$op, st(0)}">;
def SUB_FPrST0 : FPrST0PInst<MRM5r, "fsub{r}p\t$op">;
def SUB_FST0r : FPST0rInst <MRM4r, "fsub\t$op">;
def SUBR_FrST0 : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">;
def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">;
} // SchedRW
let SchedRW = [WriteFCom] in {
def COM_FST0r : FPST0rInst <MRM2r, "fcom\t$op">;
def COMP_FST0r : FPST0rInst <MRM3r, "fcomp\t$op">;
} // SchedRW
let SchedRW = [WriteFMul] in {
def MUL_FST0r : FPST0rInst <MRM1r, "fmul\t$op">;
def MUL_FrST0 : FPrST0Inst <MRM1r, "fmul\t{%st(0), $op|$op, st(0)}">;
def MUL_FPrST0 : FPrST0PInst<MRM1r, "fmulp\t$op">;
} // SchedRW
let SchedRW = [WriteFDiv] in {
def DIVR_FST0r : FPST0rInst <MRM7r, "fdivr\t$op">;
def DIV_FrST0 : FPrST0Inst <MRM7r, "fdiv{r}\t{%st(0), $op|$op, st(0)}">;
def DIV_FPrST0 : FPrST0PInst<MRM7r, "fdiv{r}p\t$op">;
def DIV_FST0r : FPST0rInst <MRM6r, "fdiv\t$op">;
def DIVR_FrST0 : FPrST0Inst <MRM6r, "fdiv{|r}\t{%st(0), $op|$op, st(0)}">;
def DIVR_FPrST0 : FPrST0PInst<MRM6r, "fdiv{|r}p\t$op">;
} // SchedRW
// Unary operations.
multiclass FPUnary<SDNode OpNode, Format fp, string asmstring> {
def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
[(set RFP32:$dst, (OpNode RFP32:$src))]>;
def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
[(set RFP64:$dst, (OpNode RFP64:$src))]>;
def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
[(set RFP80:$dst, (OpNode RFP80:$src))]>;
def _F : FPI<0xD9, fp, (outs), (ins), asmstring>;
}
let Defs = [FPSW] in {
let SchedRW = [WriteFSign] in {
defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
defm ABS : FPUnary<fabs, MRM_E1, "fabs">;
}
let SchedRW = [WriteFSqrt80] in
defm SQRT: FPUnary<fsqrt,MRM_FA, "fsqrt">;
let SchedRW = [WriteMicrocoded] in {
defm SIN : FPUnary<fsin, MRM_FE, "fsin">;
defm COS : FPUnary<fcos, MRM_FF, "fcos">;
}
let SchedRW = [WriteFCom] in {
let hasSideEffects = 0 in {
def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
} // hasSideEffects
def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
} // SchedRW
} // Defs = [FPSW]
// Versions of FP instructions that take a single memory operand. Added for the
// disassembler; remove as they are included with patterns elsewhere.
let SchedRW = [WriteFComLd] in {
def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">;
def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
} // SchedRW
let SchedRW = [WriteMicrocoded] in {
def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins f32mem:$dst), "fnstenv\t$dst">;
def FRSTORm : FPI<0xDD, MRM4m, (outs), (ins f32mem:$dst), "frstor\t$dst">;
def FSAVEm : FPI<0xDD, MRM6m, (outs), (ins f32mem:$dst), "fnsave\t$dst">;
def FNSTSWm : FPI<0xDD, MRM7m, (outs), (ins i16mem:$dst), "fnstsw\t$dst">;
def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f80mem:$src), "fbld\ttbyte ptr $src">;
def FBSTPm : FPI<0xDF, MRM6m, (outs), (ins f80mem:$dst), "fbstp\ttbyte ptr $dst">;
} // SchedRW
// Floating point cmovs.
class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32, HasCMov]>;
class FpIf64CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64, HasCMov]>;
multiclass FPCMov<PatLeaf cc> {
def _Fp32 : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
CondMovFP,
[(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
cc, EFLAGS))]>;
def _Fp64 : FpIf64CMov<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
CondMovFP,
[(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
cc, EFLAGS))]>;
def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
CondMovFP,
[(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
cc, EFLAGS))]>,
Requires<[HasCMov]>;
}
let Defs = [FPSW] in {
let SchedRW = [WriteFCMOV] in {
let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
defm CMOVB : FPCMov<X86_COND_B>;
defm CMOVBE : FPCMov<X86_COND_BE>;
defm CMOVE : FPCMov<X86_COND_E>;
defm CMOVP : FPCMov<X86_COND_P>;
defm CMOVNB : FPCMov<X86_COND_AE>;
defm CMOVNBE: FPCMov<X86_COND_A>;
defm CMOVNE : FPCMov<X86_COND_NE>;
defm CMOVNP : FPCMov<X86_COND_NP>;
} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
let Predicates = [HasCMov] in {
// These are not factored because there's no clean way to pass DA/DB.
def CMOVB_F : FPI<0xDA, MRM0r, (outs), (ins RST:$op),
"fcmovb\t{$op, %st(0)|st(0), $op}">;
def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RST:$op),
"fcmovbe\t{$op, %st(0)|st(0), $op}">;
def CMOVE_F : FPI<0xDA, MRM1r, (outs), (ins RST:$op),
"fcmove\t{$op, %st(0)|st(0), $op}">;
def CMOVP_F : FPI<0xDA, MRM3r, (outs), (ins RST:$op),
"fcmovu\t{$op, %st(0)|st(0), $op}">;
def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RST:$op),
"fcmovnb\t{$op, %st(0)|st(0), $op}">;
def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RST:$op),
"fcmovnbe\t{$op, %st(0)|st(0), $op}">;
def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RST:$op),
"fcmovne\t{$op, %st(0)|st(0), $op}">;
def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RST:$op),
"fcmovnu\t{$op, %st(0)|st(0), $op}">;
} // Predicates = [HasCMov]
} // SchedRW
// Floating point loads & stores.
let SchedRW = [WriteLoad] in {
let canFoldAsLoad = 1 in {
def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP32:$dst, (loadf32 addr:$src))]>;
let isReMaterializable = 1 in
def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
[(set RFP64:$dst, (loadf64 addr:$src))]>;
def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
[(set RFP80:$dst, (loadf80 addr:$src))]>;
} // canFoldAsLoad
def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
[(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
[(set RFP32:$dst, (X86fild addr:$src, i16))]>;
def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
[(set RFP32:$dst, (X86fild addr:$src, i32))]>;
def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
[(set RFP32:$dst, (X86fild addr:$src, i64))]>;
def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
[(set RFP64:$dst, (X86fild addr:$src, i16))]>;
def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
[(set RFP64:$dst, (X86fild addr:$src, i32))]>;
def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
[(set RFP64:$dst, (X86fild addr:$src, i64))]>;
def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
[(set RFP80:$dst, (X86fild addr:$src, i16))]>;
def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
[(set RFP80:$dst, (X86fild addr:$src, i32))]>;
def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
[(set RFP80:$dst, (X86fild addr:$src, i64))]>;
} // SchedRW
let SchedRW = [WriteStore] in {
def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
[(store RFP32:$src, addr:$op)]>;
def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
[(truncstoref32 RFP64:$src, addr:$op)]>;
def ST_Fp64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
[(store RFP64:$src, addr:$op)]>;
def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
[(truncstoref32 RFP80:$src, addr:$op)]>;
def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP,
[(truncstoref64 RFP80:$src, addr:$op)]>;
// FST does not support 80-bit memory target; FSTP must be used.
let mayStore = 1, hasSideEffects = 0 in {
def ST_FpP32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
def ST_FpP80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
} // mayStore
def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
[(store RFP80:$src, addr:$op)]>;
let mayStore = 1, hasSideEffects = 0 in {
def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
} // mayStore
} // SchedRW
let mayLoad = 1, SchedRW = [WriteLoad] in {
def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
}
let mayStore = 1, SchedRW = [WriteStore] in {
def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
def ST_FP64m : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">;
def ST_FP80m : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">;
def IST_F16m : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">;
def IST_F32m : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">;
def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">;
def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">;
def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
}
// FISTTP requires SSE3 even though it's a FPStack op.
let Predicates = [HasSSE3], SchedRW = [WriteStore] in {
def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
[(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
[(X86fp_to_i32mem RFP32:$src, addr:$op)]>;
def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
[(X86fp_to_i64mem RFP32:$src, addr:$op)]>;
def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
[(X86fp_to_i16mem RFP64:$src, addr:$op)]>;
def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
[(X86fp_to_i32mem RFP64:$src, addr:$op)]>;
def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
[(X86fp_to_i64mem RFP64:$src, addr:$op)]>;
def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
[(X86fp_to_i16mem RFP80:$src, addr:$op)]>;
def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
[(X86fp_to_i32mem RFP80:$src, addr:$op)]>;
def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
[(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
} // Predicates = [HasSSE3]
let mayStore = 1, SchedRW = [WriteStore] in {
def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">;
}
// FP Stack manipulation instructions.
let SchedRW = [WriteMove] in {
def LD_Frr : FPI<0xD9, MRM0r, (outs), (ins RST:$op), "fld\t$op">;
def ST_Frr : FPI<0xDD, MRM2r, (outs), (ins RST:$op), "fst\t$op">;
def ST_FPrr : FPI<0xDD, MRM3r, (outs), (ins RST:$op), "fstp\t$op">;
def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RST:$op), "fxch\t$op">;
}
// Floating point constant loads.
let isReMaterializable = 1, SchedRW = [WriteZero] in {
def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
[(set RFP32:$dst, fpimm0)]>;
def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
[(set RFP32:$dst, fpimm1)]>;
def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
[(set RFP64:$dst, fpimm0)]>;
def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
[(set RFP64:$dst, fpimm1)]>;
def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
[(set RFP80:$dst, fpimm0)]>;
def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
[(set RFP80:$dst, fpimm1)]>;
}
let SchedRW = [WriteFLD0] in
def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
let SchedRW = [WriteFLD1] in
def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
let SchedRW = [WriteFLDC], Defs = [FPSW] in {
def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
def FLDLG2 : I<0xD9, MRM_EC, (outs), (ins), "fldlg2", []>;
def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", []>;
} // SchedRW
// Floating point compares.
let SchedRW = [WriteFCom] in {
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>;
def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>;
} // SchedRW
} // Defs = [FPSW]
let SchedRW = [WriteFCom] in {
// CC = ST(0) cmp ST(i)
let Defs = [EFLAGS, FPSW] in {
def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
[(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
}
let Defs = [FPSW], Uses = [ST0] in {
def UCOM_Fr : FPI<0xDD, MRM4r, // FPSW = cmp ST(0) with ST(i)
(outs), (ins RST:$reg), "fucom\t$reg">;
def UCOM_FPr : FPI<0xDD, MRM5r, // FPSW = cmp ST(0) with ST(i), pop
(outs), (ins RST:$reg), "fucomp\t$reg">;
def UCOM_FPPr : FPI<0xDA, MRM_E9, // cmp ST(0) with ST(1), pop, pop
(outs), (ins), "fucompp">;
}
let Defs = [EFLAGS, FPSW], Uses = [ST0] in {
def UCOM_FIr : FPI<0xDB, MRM5r, // CC = cmp ST(0) with ST(i)
(outs), (ins RST:$reg), "fucomi\t$reg">;
def UCOM_FIPr : FPI<0xDF, MRM5r, // CC = cmp ST(0) with ST(i), pop
(outs), (ins RST:$reg), "fucompi\t$reg">;
}
let Defs = [EFLAGS, FPSW] in {
def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RST:$reg), "fcomi\t$reg">;
def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RST:$reg), "fcompi\t$reg">;
}
} // SchedRW
// Floating point flag ops.
let SchedRW = [WriteALU] in {
let Defs = [AX], Uses = [FPSW] in
def FNSTSW16r : I<0xDF, MRM_E0, // AX = fp flags
(outs), (ins), "fnstsw\t{%ax|ax}",
[(set AX, (X86fp_stsw FPSW))]>;
let Defs = [FPSW] in
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
(outs), (ins i16mem:$dst), "fnstcw\t$dst",
[(X86fp_cwd_get16 addr:$dst)]>;
} // SchedRW
let Defs = [FPSW], mayLoad = 1 in
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
(outs), (ins i16mem:$dst), "fldcw\t$dst", []>,
Sched<[WriteLoad]>;
// FPU control instructions
let SchedRW = [WriteMicrocoded] in {
let Defs = [FPSW] in {
def FNINIT : I<0xDB, MRM_E3, (outs), (ins), "fninit", []>;
def FFREE : FPI<0xDD, MRM0r, (outs), (ins RST:$reg), "ffree\t$reg">;
def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RST:$reg), "ffreep\t$reg">;
def FPNCEST0r : FPI<0xD9, MRM3r, (outs RST:$op), (ins),
"fstpnce\t{%st(0), $op|$op, st(0)}">;
def FENI8087_NOP : I<0xDB, MRM_E0, (outs), (ins), "feni8087_nop", []>;
def FDISI8087_NOP : I<0xDB, MRM_E1, (outs), (ins), "fdisi8087_nop", []>;
// Clear exceptions
def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", []>;
} // Defs = [FPSW]
} // SchedRW
// Operand-less floating-point instructions for the disassembler.
def FNOP : I<0xD9, MRM_D0, (outs), (ins), "fnop", []>, Sched<[WriteNop]>;
let SchedRW = [WriteMicrocoded] in {
let Defs = [FPSW] in {
def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
def FXAM : I<0xD9, MRM_E5, (outs), (ins), "fxam", []>;
def F2XM1 : I<0xD9, MRM_F0, (outs), (ins), "f2xm1", []>;
def FYL2X : I<0xD9, MRM_F1, (outs), (ins), "fyl2x", []>;
def FPTAN : I<0xD9, MRM_F2, (outs), (ins), "fptan", []>;
def FPATAN : I<0xD9, MRM_F3, (outs), (ins), "fpatan", []>;
def FXTRACT : I<0xD9, MRM_F4, (outs), (ins), "fxtract", []>;
def FPREM1 : I<0xD9, MRM_F5, (outs), (ins), "fprem1", []>;
def FDECSTP : I<0xD9, MRM_F6, (outs), (ins), "fdecstp", []>;
def FINCSTP : I<0xD9, MRM_F7, (outs), (ins), "fincstp", []>;
def FPREM : I<0xD9, MRM_F8, (outs), (ins), "fprem", []>;
def FYL2XP1 : I<0xD9, MRM_F9, (outs), (ins), "fyl2xp1", []>;
def FSINCOS : I<0xD9, MRM_FB, (outs), (ins), "fsincos", []>;
def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", []>;
def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", []>;
def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", []>;
} // Defs = [FPSW]
def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
"fxsave\t$dst", [(int_x86_fxsave addr:$dst)]>, TB,
Requires<[HasFXSR]>;
def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
"fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)]>,
TB, Requires<[HasFXSR, In64BitMode]>;
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaquemem:$src),
"fxrstor\t$src", [(int_x86_fxrstor addr:$src)]>,
TB, Requires<[HasFXSR]>;
def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaquemem:$src),
"fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)]>,
TB, Requires<[HasFXSR, In64BitMode]>;
} // SchedRW
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
// Required for RET of f32 / f64 / f80 values.
def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
// Required for CALL which return f32 / f64 / f80 values.
def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
RFP64:$src)>;
def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
RFP80:$src)>;
def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
RFP80:$src)>;
def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
RFP80:$src)>;
// Floating point constant -0.0 and -1.0
def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
// Used to conv. i64 to f64 since there isn't a SSE version.
def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
// FP extensions map onto simple pseudo-value conversions if they are to/from
// the FP stack.
def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
Requires<[FPStackf32]>;
def : Pat<(f80 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
Requires<[FPStackf32]>;
def : Pat<(f80 (fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
Requires<[FPStackf64]>;
// FP truncations map onto simple pseudo-value conversions if they are to/from
// the FP stack. We have validated that only value-preserving truncations make
// it through isel.
def : Pat<(f32 (fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
Requires<[FPStackf32]>;
def : Pat<(f32 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
Requires<[FPStackf32]>;
def : Pat<(f64 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
Requires<[FPStackf64]>;

View File

@@ -0,0 +1,993 @@
//===-- X86InstrFormats.td - X86 Instruction Formats -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
//
// Format specifies the encoding used by the instruction. This is part of the
// ad-hoc solution used to emit machine instruction encodings by our machine
// code emitter.
class Format<bits<7> val> {
bits<7> Value = val;
}
def Pseudo : Format<0>;
def RawFrm : Format<1>;
def AddRegFrm : Format<2>;
def RawFrmMemOffs : Format<3>;
def RawFrmSrc : Format<4>;
def RawFrmDst : Format<5>;
def RawFrmDstSrc : Format<6>;
def RawFrmImm8 : Format<7>;
def RawFrmImm16 : Format<8>;
def MRMDestMem : Format<32>;
def MRMSrcMem : Format<33>;
def MRMSrcMem4VOp3 : Format<34>;
def MRMSrcMemOp4 : Format<35>;
def MRMXm : Format<39>;
def MRM0m : Format<40>; def MRM1m : Format<41>; def MRM2m : Format<42>;
def MRM3m : Format<43>; def MRM4m : Format<44>; def MRM5m : Format<45>;
def MRM6m : Format<46>; def MRM7m : Format<47>;
def MRMDestReg : Format<48>;
def MRMSrcReg : Format<49>;
def MRMSrcReg4VOp3 : Format<50>;
def MRMSrcRegOp4 : Format<51>;
def MRMXr : Format<55>;
def MRM0r : Format<56>; def MRM1r : Format<57>; def MRM2r : Format<58>;
def MRM3r : Format<59>; def MRM4r : Format<60>; def MRM5r : Format<61>;
def MRM6r : Format<62>; def MRM7r : Format<63>;
def MRM_C0 : Format<64>; def MRM_C1 : Format<65>; def MRM_C2 : Format<66>;
def MRM_C3 : Format<67>; def MRM_C4 : Format<68>; def MRM_C5 : Format<69>;
def MRM_C6 : Format<70>; def MRM_C7 : Format<71>; def MRM_C8 : Format<72>;
def MRM_C9 : Format<73>; def MRM_CA : Format<74>; def MRM_CB : Format<75>;
def MRM_CC : Format<76>; def MRM_CD : Format<77>; def MRM_CE : Format<78>;
def MRM_CF : Format<79>; def MRM_D0 : Format<80>; def MRM_D1 : Format<81>;
def MRM_D2 : Format<82>; def MRM_D3 : Format<83>; def MRM_D4 : Format<84>;
def MRM_D5 : Format<85>; def MRM_D6 : Format<86>; def MRM_D7 : Format<87>;
def MRM_D8 : Format<88>; def MRM_D9 : Format<89>; def MRM_DA : Format<90>;
def MRM_DB : Format<91>; def MRM_DC : Format<92>; def MRM_DD : Format<93>;
def MRM_DE : Format<94>; def MRM_DF : Format<95>; def MRM_E0 : Format<96>;
def MRM_E1 : Format<97>; def MRM_E2 : Format<98>; def MRM_E3 : Format<99>;
def MRM_E4 : Format<100>; def MRM_E5 : Format<101>; def MRM_E6 : Format<102>;
def MRM_E7 : Format<103>; def MRM_E8 : Format<104>; def MRM_E9 : Format<105>;
def MRM_EA : Format<106>; def MRM_EB : Format<107>; def MRM_EC : Format<108>;
def MRM_ED : Format<109>; def MRM_EE : Format<110>; def MRM_EF : Format<111>;
def MRM_F0 : Format<112>; def MRM_F1 : Format<113>; def MRM_F2 : Format<114>;
def MRM_F3 : Format<115>; def MRM_F4 : Format<116>; def MRM_F5 : Format<117>;
def MRM_F6 : Format<118>; def MRM_F7 : Format<119>; def MRM_F8 : Format<120>;
def MRM_F9 : Format<121>; def MRM_FA : Format<122>; def MRM_FB : Format<123>;
def MRM_FC : Format<124>; def MRM_FD : Format<125>; def MRM_FE : Format<126>;
def MRM_FF : Format<127>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
// machine code emitter.
class ImmType<bits<4> val> {
bits<4> Value = val;
}
def NoImm : ImmType<0>;
def Imm8 : ImmType<1>;
def Imm8PCRel : ImmType<2>;
def Imm8Reg : ImmType<3>; // Register encoded in [7:4].
def Imm16 : ImmType<4>;
def Imm16PCRel : ImmType<5>;
def Imm32 : ImmType<6>;
def Imm32PCRel : ImmType<7>;
def Imm32S : ImmType<8>;
def Imm64 : ImmType<9>;
// FPFormat - This specifies what form this FP instruction has. This is used by
// the Floating-Point stackifier pass.
class FPFormat<bits<3> val> {
bits<3> Value = val;
}
def NotFP : FPFormat<0>;
def ZeroArgFP : FPFormat<1>;
def OneArgFP : FPFormat<2>;
def OneArgFPRW : FPFormat<3>;
def TwoArgFP : FPFormat<4>;
def CompareFP : FPFormat<5>;
def CondMovFP : FPFormat<6>;
def SpecialFP : FPFormat<7>;
// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
// Keep in sync with tables in X86InstrInfo.cpp.
class Domain<bits<2> val> {
bits<2> Value = val;
}
def GenericDomain : Domain<0>;
def SSEPackedSingle : Domain<1>;
def SSEPackedDouble : Domain<2>;
def SSEPackedInt : Domain<3>;
// Class specifying the vector form of the decompressed
// displacement of 8-bit.
class CD8VForm<bits<3> val> {
bits<3> Value = val;
}
def CD8VF : CD8VForm<0>; // v := VL
def CD8VH : CD8VForm<1>; // v := VL/2
def CD8VQ : CD8VForm<2>; // v := VL/4
def CD8VO : CD8VForm<3>; // v := VL/8
// The tuple (subvector) forms.
def CD8VT1 : CD8VForm<4>; // v := 1
def CD8VT2 : CD8VForm<5>; // v := 2
def CD8VT4 : CD8VForm<6>; // v := 4
def CD8VT8 : CD8VForm<7>; // v := 8
// Class specifying the prefix used an opcode extension.
class Prefix<bits<3> val> {
bits<3> Value = val;
}
def NoPrfx : Prefix<0>;
def PD : Prefix<1>;
def XS : Prefix<2>;
def XD : Prefix<3>;
def PS : Prefix<4>; // Similar to NoPrfx, but disassembler uses this to know
// that other instructions with this opcode use PD/XS/XD
// and if any of those is not supported they shouldn't
// decode to this instruction. e.g. ANDSS/ANDSD don't
// exist, but the 0xf2/0xf3 encoding shouldn't
// disable to ANDPS.
// Class specifying the opcode map.
class Map<bits<3> val> {
bits<3> Value = val;
}
def OB : Map<0>;
def TB : Map<1>;
def T8 : Map<2>;
def TA : Map<3>;
def XOP8 : Map<4>;
def XOP9 : Map<5>;
def XOPA : Map<6>;
def ThreeDNow : Map<7>;
// Class specifying the encoding
class Encoding<bits<2> val> {
bits<2> Value = val;
}
def EncNormal : Encoding<0>;
def EncVEX : Encoding<1>;
def EncXOP : Encoding<2>;
def EncEVEX : Encoding<3>;
// Operand size for encodings that change based on mode.
class OperandSize<bits<2> val> {
bits<2> Value = val;
}
def OpSizeFixed : OperandSize<0>; // Never needs a 0x66 prefix.
def OpSize16 : OperandSize<1>; // Needs 0x66 prefix in 32-bit mode.
def OpSize32 : OperandSize<2>; // Needs 0x66 prefix in 16-bit mode.
// Address size for encodings that change based on mode.
class AddressSize<bits<2> val> {
bits<2> Value = val;
}
def AdSizeX : AddressSize<0>; // Address size determined using addr operand.
def AdSize16 : AddressSize<1>; // Encodes a 16-bit address.
def AdSize32 : AddressSize<2>; // Encodes a 32-bit address.
def AdSize64 : AddressSize<3>; // Encodes a 64-bit address.
// Prefix byte classes which are used to indicate to the ad-hoc machine code
// emitter that various prefix bytes are required.
class OpSize16 { OperandSize OpSize = OpSize16; }
class OpSize32 { OperandSize OpSize = OpSize32; }
class AdSize16 { AddressSize AdSize = AdSize16; }
class AdSize32 { AddressSize AdSize = AdSize32; }
class AdSize64 { AddressSize AdSize = AdSize64; }
class REX_W { bit hasREX_WPrefix = 1; }
class LOCK { bit hasLockPrefix = 1; }
class REP { bit hasREPPrefix = 1; }
class TB { Map OpMap = TB; }
class T8 { Map OpMap = T8; }
class TA { Map OpMap = TA; }
class XOP8 { Map OpMap = XOP8; Prefix OpPrefix = PS; }
class XOP9 { Map OpMap = XOP9; Prefix OpPrefix = PS; }
class XOPA { Map OpMap = XOPA; Prefix OpPrefix = PS; }
class ThreeDNow { Map OpMap = ThreeDNow; }
class OBXS { Prefix OpPrefix = XS; }
class PS : TB { Prefix OpPrefix = PS; }
class PD : TB { Prefix OpPrefix = PD; }
class XD : TB { Prefix OpPrefix = XD; }
class XS : TB { Prefix OpPrefix = XS; }
class T8PS : T8 { Prefix OpPrefix = PS; }
class T8PD : T8 { Prefix OpPrefix = PD; }
class T8XD : T8 { Prefix OpPrefix = XD; }
class T8XS : T8 { Prefix OpPrefix = XS; }
class TAPS : TA { Prefix OpPrefix = PS; }
class TAPD : TA { Prefix OpPrefix = PD; }
class TAXD : TA { Prefix OpPrefix = XD; }
class VEX { Encoding OpEnc = EncVEX; }
class VEX_W { bits<2> VEX_WPrefix = 1; }
class VEX_WIG { bits<2> VEX_WPrefix = 2; }
// Special version of VEX_W that can be changed to VEX.W==0 for EVEX2VEX.
// FIXME: We should consider adding separate bits for VEX_WIG and the extra
// part of W1X. This would probably simplify the tablegen emitters and
// the TSFlags creation below.
class VEX_W1X { bits<2> VEX_WPrefix = 3; }
class VEX_4V : VEX { bit hasVEX_4V = 1; }
class VEX_L { bit hasVEX_L = 1; }
class VEX_LIG { bit ignoresVEX_L = 1; }
class EVEX { Encoding OpEnc = EncEVEX; }
class EVEX_4V : EVEX { bit hasVEX_4V = 1; }
class EVEX_K { bit hasEVEX_K = 1; }
class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
class EVEX_B { bit hasEVEX_B = 1; }
class EVEX_RC { bit hasEVEX_RC = 1; }
class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; }
class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; }
class NOTRACK { bit hasNoTrackPrefix = 1; }
// Specify AVX512 8-bit compressed displacement encoding based on the vector
// element size in bits (8, 16, 32, 64) and the CDisp8 form.
class EVEX_CD8<int esize, CD8VForm form> {
int CD8_EltSize = !srl(esize, 3);
bits<3> CD8_Form = form.Value;
}
class XOP { Encoding OpEnc = EncXOP; }
class XOP_4V : XOP { bit hasVEX_4V = 1; }
// Specify the alternative register form instruction to replace the current
// instruction in case it was picked during generation of memory folding tables
class FoldGenData<string _RegisterForm> {
string FoldGenRegForm = _RegisterForm;
}
// Provide a specific instruction to be used by the EVEX2VEX conversion.
class EVEX2VEXOverride<string VEXInstrName> {
string EVEX2VEXOverride = VEXInstrName;
}
// Mark the instruction as "illegal to memory fold/unfold"
class NotMemoryFoldable { bit isMemoryFoldable = 0; }
// Prevent EVEX->VEX conversion from considering this instruction.
class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
: Instruction {
let Namespace = "X86";
bits<8> Opcode = opcod;
Format Form = f;
bits<7> FormBits = Form.Value;
ImmType ImmT = i;
dag OutOperandList = outs;
dag InOperandList = ins;
string AsmString = AsmStr;
// If this is a pseudo instruction, mark it isCodeGenOnly.
let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
//
// Attributes specific to X86 instructions...
//
bit ForceDisassemble = 0; // Force instruction to disassemble even though it's
// isCodeGenonly. Needed to hide an ambiguous
// AsmString from the parser, but still disassemble.
OperandSize OpSize = OpSizeFixed; // Does this instruction's encoding change
// based on operand size of the mode?
bits<2> OpSizeBits = OpSize.Value;
AddressSize AdSize = AdSizeX; // Does this instruction's encoding change
// based on address size of the mode?
bits<2> AdSizeBits = AdSize.Value;
Prefix OpPrefix = NoPrfx; // Which prefix byte does this inst have?
bits<3> OpPrefixBits = OpPrefix.Value;
Map OpMap = OB; // Which opcode map does this inst have?
bits<3> OpMapBits = OpMap.Value;
bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
Domain ExeDomain = d;
bit hasREPPrefix = 0; // Does this inst have a REP prefix?
Encoding OpEnc = EncNormal; // Encoding used by this instruction
bits<2> OpEncBits = OpEnc.Value;
bits<2> VEX_WPrefix = 0; // Does this inst set the VEX_W field?
bit hasVEX_4V = 0; // Does this inst require the VEX.VVVV field?
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
bit hasEVEX_K = 0; // Does this inst require masking?
bit hasEVEX_Z = 0; // Does this inst set the EVEX_Z field?
bit hasEVEX_L2 = 0; // Does this inst set the EVEX_L2 field?
bit hasEVEX_B = 0; // Does this inst set the EVEX_B field?
bits<3> CD8_Form = 0; // Compressed disp8 form - vector-width.
// Declare it int rather than bits<4> so that all bits are defined when
// assigning to bits<7>.
int CD8_EltSize = 0; // Compressed disp8 form - element-size in bytes.
bit hasEVEX_RC = 0; // Explicitly specified rounding control in FP instruction.
bit hasNoTrackPrefix = 0; // Does this inst has 0x3E (NoTrack) prefix?
bits<2> EVEX_LL;
let EVEX_LL{0} = hasVEX_L;
let EVEX_LL{1} = hasEVEX_L2;
// Vector size in bytes.
bits<7> VectSize = !shl(16, EVEX_LL);
// The scaling factor for AVX512's compressed displacement is either
// - the size of a power-of-two number of elements or
// - the size of a single element for broadcasts or
// - the total vector size divided by a power-of-two number.
// Possible values are: 0 (non-AVX512 inst), 1, 2, 4, 8, 16, 32 and 64.
bits<7> CD8_Scale = !if (!eq (OpEnc.Value, EncEVEX.Value),
!if (CD8_Form{2},
!shl(CD8_EltSize, CD8_Form{1-0}),
!if (hasEVEX_B,
CD8_EltSize,
!srl(VectSize, CD8_Form{1-0}))), 0);
// Used in the memory folding generation (TableGen backend) to point to an alternative
// instruction to replace the current one in case it got picked during generation.
string FoldGenRegForm = ?;
// Used to prevent an explicit EVEX2VEX override for this instruction.
string EVEX2VEXOverride = ?;
bit isMemoryFoldable = 1; // Is it allowed to memory fold/unfold this instruction?
bit notEVEX2VEXConvertible = 0; // Prevent EVEX->VEX conversion.
// TSFlags layout should be kept in sync with X86BaseInfo.h.
let TSFlags{6-0} = FormBits;
let TSFlags{8-7} = OpSizeBits;
let TSFlags{10-9} = AdSizeBits;
// No need for 3rd bit, we don't need to distinguish NoPrfx from PS.
let TSFlags{12-11} = OpPrefixBits{1-0};
let TSFlags{15-13} = OpMapBits;
let TSFlags{16} = hasREX_WPrefix;
let TSFlags{20-17} = ImmT.Value;
let TSFlags{23-21} = FPForm.Value;
let TSFlags{24} = hasLockPrefix;
let TSFlags{25} = hasREPPrefix;
let TSFlags{27-26} = ExeDomain.Value;
let TSFlags{29-28} = OpEncBits;
let TSFlags{37-30} = Opcode;
// Currently no need for second bit in TSFlags - W Ignore is equivalent to 0.
let TSFlags{38} = VEX_WPrefix{0};
let TSFlags{39} = hasVEX_4V;
let TSFlags{40} = hasVEX_L;
let TSFlags{41} = hasEVEX_K;
let TSFlags{42} = hasEVEX_Z;
let TSFlags{43} = hasEVEX_L2;
let TSFlags{44} = hasEVEX_B;
// If we run out of TSFlags bits, it's possible to encode this in 3 bits.
let TSFlags{51-45} = CD8_Scale;
let TSFlags{52} = hasEVEX_RC;
let TSFlags{53} = hasNoTrackPrefix;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
: X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
let Pattern = pattern;
}
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, Domain d = GenericDomain>
: X86Inst<o, f, NoImm, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, Domain d = GenericDomain>
: X86Inst<o, f, Imm8, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8Reg<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, Domain d = GenericDomain>
: X86Inst<o, f, Imm8Reg, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm16, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm32, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii32S<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm32S, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii64<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm64, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
// FPStack Instruction Templates:
// FPI - Floating Point Instruction template.
class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
: I<o, F, outs, ins, asm, []> {}
// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
: PseudoI<outs, ins, pattern> {
let FPForm = fp;
}
// Templates for instructions that use a 16- or 32-bit segmented address as
// their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
//
// Iseg16 - 16-bit segment selector, 16-bit offset
// Iseg32 - 16-bit segment selector, 32-bit offset
class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm16, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm32, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
// SI - SSE 1 & 2 scalar instructions
class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d = GenericDomain>
: I<o, F, outs, ins, asm, pattern, d> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
!if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
!if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
[UseSSE1])))));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
asm));
}
// SI - SSE 1 & 2 scalar intrinsics - vex form available on AVX512
class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d = GenericDomain>
: I<o, F, outs, ins, asm, pattern, d> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
!if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
!if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
[UseSSE1])))));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
asm));
}
// SIi8 - SSE 1 & 2 scalar instructions - vex form available on AVX512
class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
!if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
[UseSSE2])));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
asm));
}
// PI - SSE 1 & 2 packed instructions
class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
Domain d>
: I<o, F, outs, ins, asm, pattern, d> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
!if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
[UseSSE1])));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
asm));
}
// MMXPI - SSE 1 & 2 packed instructions with MMX operands
class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
Domain d>
: I<o, F, outs, ins, asm, pattern, d> {
let Predicates = !if(!eq(OpPrefix.Value, PD.Value), [HasMMX, HasSSE2],
[HasMMX, HasSSE1]);
}
// PIi8 - SSE 1 & 2 packed instructions with immediate
class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d>
: Ii8<o, F, outs, ins, asm, pattern, d> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
!if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
[UseSSE1])));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
asm));
}
// SSE1 Instruction Templates:
//
// SSI - SSE1 instructions with XS prefix.
// PSI - SSE1 instructions with PS prefix.
// PSIi8 - SSE1 instructions with ImmT == Imm8 and PS prefix.
// VSSI - SSE1 instructions with XS prefix in AVX form.
// VPSI - SSE1 instructions with PS prefix in AVX form, packed single.
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
Requires<[UseSSE1]>;
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
Requires<[UseSSE1]>;
class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
Requires<[HasAVX]>;
class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, PS,
Requires<[HasAVX]>;
// SSE2 Instruction Templates:
//
// SDI - SSE2 instructions with XD prefix.
// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
// S2SI - SSE2 instructions with XS prefix.
// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
// PDI - SSE2 instructions with PD prefix, packed double domain.
// PDIi8 - SSE2 instructions with ImmT == Imm8 and PD prefix.
// VSDI - SSE2 scalar instructions with XD prefix in AVX form.
// VPDI - SSE2 vector instructions with PD prefix in AVX form,
// packed double domain.
// VS2I - SSE2 scalar instructions with PD prefix in AVX form.
// S2I - SSE2 scalar instructions with PD prefix.
// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as
// MMX operands.
// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as
// MMX operands.
class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
Requires<[UseSSE2]>;
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
Requires<[UseSSE2]>;
class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
Requires<[UseAVX]>;
class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
Requires<[HasAVX]>;
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
PD, Requires<[HasAVX]>;
class VS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, PD,
Requires<[UseAVX]>;
class S2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, PD, Requires<[UseSSE2]>;
class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX, HasSSE2]>;
class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX, HasSSE2]>;
// SSE3 Instruction Templates:
//
// S3I - SSE3 instructions with PD prefixes.
// S3SI - SSE3 instructions with XS prefix.
// S3DI - SSE3 instructions with XD prefix.
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
Requires<[UseSSE3]>;
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
Requires<[UseSSE3]>;
class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
Requires<[UseSSE3]>;
// SSSE3 Instruction Templates:
//
// SS38I - SSSE3 instructions with T8 prefix.
// SS3AI - SSSE3 instructions with TA prefix.
// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands.
// MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands.
//
// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
// uses the MMX registers. The 64-bit versions are grouped with the MMX
// classes. They need to be enabled even if AVX is enabled.
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[UseSSSE3]>;
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[UseSSSE3]>;
class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PS,
Requires<[HasMMX, HasSSSE3]>;
class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPS,
Requires<[HasMMX, HasSSSE3]>;
// SSE4.1 Instruction Templates:
//
// SS48I - SSE 4.1 instructions with T8 prefix.
// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
//
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[UseSSE41]>;
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[UseSSE41]>;
// SSE4.2 Instruction Templates:
//
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[UseSSE42]>;
// SS42FI - SSE 4.2 instructions with T8XD prefix.
// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[UseSSE42]>;
// AVX Instruction Templates:
// Instructions introduced in AVX (no SSE equivalent forms)
//
// AVX8I - AVX instructions with T8PD prefix.
// AVXAIi8 - AVX instructions with TAPD prefix and ImmT = Imm8.
class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[HasAVX]>;
class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[HasAVX]>;
// AVX2 Instruction Templates:
// Instructions introduced in AVX2 (no SSE equivalent forms)
//
// AVX28I - AVX2 instructions with T8PD prefix.
// AVX2AIi8 - AVX2 instructions with TAPD prefix and ImmT = Imm8.
class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[HasAVX2]>;
class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[HasAVX2]>;
// AVX-512 Instruction Templates:
// Instructions introduced in AVX-512 (no SSE equivalent forms)
//
// AVX5128I - AVX-512 instructions with T8PD prefix.
// AVX512AIi8 - AVX-512 instructions with TAPD prefix and ImmT = Imm8.
// AVX512PDI - AVX-512 instructions with PD, double packed.
// AVX512PSI - AVX-512 instructions with PS, single packed.
// AVX512XS8I - AVX-512 instructions with T8 and XS prefixes.
// AVX512XSI - AVX-512 instructions with XS prefix, generic domain.
// AVX512BI - AVX-512 instructions with PD, int packed domain.
// AVX512SI - AVX-512 scalar instructions with PD prefix.
class AVX5128I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[HasAVX512]>;
class AVX5128IBase : T8PD {
Domain ExeDomain = SSEPackedInt;
}
class AVX512XS8I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8XS,
Requires<[HasAVX512]>;
class AVX512XSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XS,
Requires<[HasAVX512]>;
class AVX512XDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, XD,
Requires<[HasAVX512]>;
class AVX512BI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
Requires<[HasAVX512]>;
class AVX512BIBase : PD {
Domain ExeDomain = SSEPackedInt;
}
class AVX512BIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
Requires<[HasAVX512]>;
class AVX512BIi8Base : PD {
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
class AVX512XSIi8Base : XS {
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
class AVX512XDIi8Base : XD {
Domain ExeDomain = SSEPackedInt;
ImmType ImmT = Imm8;
}
class AVX512PSIi8Base : PS {
Domain ExeDomain = SSEPackedSingle;
ImmType ImmT = Imm8;
}
class AVX512PDIi8Base : PD {
Domain ExeDomain = SSEPackedDouble;
ImmType ImmT = Imm8;
}
class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[HasAVX512]>;
class AVX512AIi8Base : TAPD {
ImmType ImmT = Imm8;
}
class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>,
Requires<[HasAVX512]>;
class AVX512PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
Requires<[HasAVX512]>;
class AVX512PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
Requires<[HasAVX512]>;
class AVX512PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d>
: Ii8<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
class AVX512PI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d>
: I<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
class AVX512FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern>, T8PD,
EVEX_4V, Requires<[HasAVX512]>;
class AVX512FMA3Base : T8PD, EVEX_4V;
class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern>, Requires<[HasAVX512]>;
// AES Instruction Templates:
//
// AES8I
// These use the same encoding as the SSE4.2 T8 and TA encodings.
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[NoAVX, HasAES]>;
class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[NoAVX, HasAES]>;
// PCLMUL Instruction Templates
class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD;
// FMA3 Instruction Templates
class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern>, T8PD,
VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoVLX]>;
class FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern>, T8PD,
VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoAVX512]>;
class FMA3S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern>, T8PD,
VEX_4V, FMASC, Requires<[HasFMA, NoAVX512]>;
// FMA4 Instruction Templates
class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
VEX_4V, FMASC, Requires<[HasFMA4, NoVLX]>;
class FMA4S<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
VEX_4V, FMASC, Requires<[HasFMA4, NoAVX512]>;
class FMA4S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
VEX_4V, FMASC, Requires<[HasFMA4]>;
// XOP 2, 3 and 4 Operand Instruction Template
class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
XOP9, Requires<[HasXOP]>;
// XOP 2 and 3 Operand Instruction Templates with imm byte
class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
XOP8, Requires<[HasXOP]>;
// XOP 4 Operand Instruction Templates with imm byte
class IXOPi8Reg<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
XOP8, Requires<[HasXOP]>;
// XOP 5 operand instruction (VEX encoding!)
class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
VEX_4V, Requires<[HasXOP]>;
// X86-64 Instruction templates...
//
class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, REX_W;
class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, REX_W;
class RIi16 <bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii16<o, F, outs, ins, asm, pattern>, REX_W;
class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii32<o, F, outs, ins, asm, pattern>, REX_W;
class RIi32S <bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii32S<o, F, outs, ins, asm, pattern>, REX_W;
class RIi64<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii64<o, F, outs, ins, asm, pattern>, REX_W;
class RS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: S2I<o, F, outs, ins, asm, pattern>, REX_W;
class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: VS2I<o, F, outs, ins, asm, pattern>, VEX_W;
// MMX Instruction templates
//
// MMXI - MMX instructions with TB prefix.
// MMXI32 - MMX instructions with TB prefix valid only in 32 bit mode.
// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
// MMX2I - MMX / SSE2 instructions with PD prefix.
// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
// MMXID - MMX instructions with XD prefix.
// MMXIS - MMX instructions with XS prefix.
class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,Not64BitMode]>;
class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,In64BitMode]>;
class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, PS, REX_W, Requires<[HasMMX]>;
class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, PD, Requires<[HasMMX]>;
class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,612 @@
//===-- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 MMX instruction set, defining the instructions,
// and properties of the instructions which are needed for code generation,
// machine code emission, and analysis.
//
// All instructions that use MMX should be in this file, even if they also use
// SSE.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// MMX Multiclasses
//===----------------------------------------------------------------------===//
// Alias instruction that maps zero vector to pxor mmx.
// This is expanded by ExpandPostRAPseudos to an pxor.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, SchedRW = [WriteZero] in {
def MMX_SET0 : I<0, Pseudo, (outs VR64:$dst), (ins), "", []>;
}
let Constraints = "$src1 = $dst" in {
// MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
// When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
X86FoldableSchedWrite sched, bit Commutable = 0,
X86MemOperand OType = i64mem> {
def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>,
Sched<[sched]> {
let isCommutable = Commutable;
}
def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, OType:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
Sched<[sched.Folded, ReadAfterLd]>;
}
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr, Intrinsic IntId,
Intrinsic IntId2, X86FoldableSchedWrite sched,
X86FoldableSchedWrite schedImm> {
def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>,
Sched<[sched]>;
def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
Sched<[sched.Folded, ReadAfterLd]>;
def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
(ins VR64:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId2 VR64:$src1, imm:$src2))]>,
Sched<[schedImm]>;
}
}
/// Unary MMX instructions requiring SSSE3.
multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
Intrinsic IntId64, X86FoldableSchedWrite sched> {
def rr : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst, (IntId64 VR64:$src))]>,
Sched<[sched]>;
def rm : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst,
(IntId64 (bitconvert (load_mmx addr:$src))))]>,
Sched<[sched.Folded]>;
}
/// Binary MMX instructions requiring SSSE3.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
Intrinsic IntId64, X86FoldableSchedWrite sched,
bit Commutable = 0> {
let isCommutable = Commutable in
def rr : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>,
Sched<[sched]>;
def rm : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR64:$dst,
(IntId64 VR64:$src1,
(bitconvert (load_mmx addr:$src2))))]>,
Sched<[sched.Folded, ReadAfterLd]>;
}
}
/// PALIGN MMX instructions (require SSSE3).
multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
X86FoldableSchedWrite sched> {
def rri : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
Sched<[sched]>;
def rmi : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
Sched<[sched.Folded, ReadAfterLd]>;
}
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
string asm, X86FoldableSchedWrite sched, Domain d> {
def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[(set DstRC:$dst, (Int SrcRC:$src))], d>,
Sched<[sched]>;
def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
[(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>,
Sched<[sched.Folded]>;
}
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
PatFrag ld_frag, string asm, Domain d> {
def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst),
(ins DstRC:$src1, SrcRC:$src2), asm,
[(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>,
Sched<[WriteCvtI2PS]>;
def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src2), asm,
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>,
Sched<[WriteCvtI2PS.Folded]>;
}
//===----------------------------------------------------------------------===//
// MMX EMMS Instruction
//===----------------------------------------------------------------------===//
let SchedRW = [WriteEMMS] in
def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
//===----------------------------------------------------------------------===//
// MMX Scalar Instructions
//===----------------------------------------------------------------------===//
// Data Transfer Instructions
def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (scalar_to_vector GR32:$src)))]>,
Sched<[WriteVecMoveFromGpr]>;
def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (scalar_to_vector (loadi32 addr:$src))))]>,
Sched<[WriteVecLoad]>;
let Predicates = [HasMMX] in {
def : Pat<(x86mmx (MMX_X86movw2d GR32:$src)),
(MMX_MOVD64rr GR32:$src)>;
def : Pat<(x86mmx (MMX_X86movw2d (i32 0))),
(MMX_SET0)>;
def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))),
(MMX_MOVD64rm addr:$src)>;
}
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>,
Sched<[WriteVecStore]>;
def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst,
(MMX_X86movd2w (x86mmx VR64:$src)))]>,
Sched<[WriteVecMoveToGpr]>, FoldGenData<"MMX_MOVD64rr">;
let isBitcast = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (bitconvert GR64:$src))]>,
Sched<[WriteVecMoveFromGpr]>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst),
(ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}",
[]>, Sched<[SchedWriteVecMoveLS.MMX.RM]>;
let isBitcast = 1 in {
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert VR64:$src))]>,
Sched<[WriteVecMoveToGpr]>;
let SchedRW = [WriteVecMove], hasSideEffects = 0, isMoveReg = 1 in {
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>;
let isCodeGenOnly = 1, ForceDisassemble = 1 in
def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>,
FoldGenData<"MMX_MOVQ64rr">;
} // SchedRW, hasSideEffects, isMoveReg
} // isBitcast
def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
(MMX_MOVQ64rr_REV VR64:$dst, VR64:$src), 0>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
def MMX_MOVD64from64rm : MMXRI<0x7E, MRMDestMem,
(outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteVecMoveLS.MMX.MR]>;
let SchedRW = [SchedWriteVecMoveLS.MMX.RM] in {
let canFoldAsLoad = 1 in
def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (load_mmx addr:$src))]>;
} // SchedRW
let SchedRW = [SchedWriteVecMoveLS.MMX.MR] in
def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (x86mmx VR64:$src), addr:$dst)]>;
let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (bitconvert
(i64 (extractelt (v2i64 VR128:$src),
(iPTR 0))))))]>;
def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64
(scalar_to_vector
(i64 (bitconvert (x86mmx VR64:$src))))))]>;
let isCodeGenOnly = 1, hasSideEffects = 1 in {
def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
[]>;
def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[]>;
}
} // SchedRW
let Predicates = [HasMMX, HasSSE1] in
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>,
Sched<[SchedWriteVecMoveLSNT.MMX.MR]>;
let Predicates = [HasMMX] in {
// movd to MMX register zero-extends
def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))),
(MMX_MOVD64rr GR32:$src)>;
def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
(MMX_MOVD64rm addr:$src)>;
}
// Arithmetic Instructions
defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
SchedWriteVecALU.MMX>;
defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w,
SchedWriteVecALU.MMX>;
defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d,
SchedWriteVecALU.MMX>;
// -- Addition
defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b,
SchedWriteVecALU.MMX, 1>;
defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w,
SchedWriteVecALU.MMX, 1>;
defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d,
SchedWriteVecALU.MMX, 1>;
let Predicates = [HasMMX, HasSSE2] in
defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q,
SchedWriteVecALU.MMX, 1>;
defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b,
SchedWriteVecALU.MMX, 1>;
defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w,
SchedWriteVecALU.MMX, 1>;
defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b,
SchedWriteVecALU.MMX, 1>;
defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w,
SchedWriteVecALU.MMX, 1>;
defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w,
SchedWritePHAdd.MMX>;
defm MMX_PHADDD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d,
SchedWritePHAdd.MMX>;
defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw,
SchedWritePHAdd.MMX>;
// -- Subtraction
defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b,
SchedWriteVecALU.MMX>;
defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w,
SchedWriteVecALU.MMX>;
defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d,
SchedWriteVecALU.MMX>;
let Predicates = [HasMMX, HasSSE2] in
defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q,
SchedWriteVecALU.MMX>;
defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b,
SchedWriteVecALU.MMX>;
defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w,
SchedWriteVecALU.MMX>;
defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b,
SchedWriteVecALU.MMX>;
defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w,
SchedWriteVecALU.MMX>;
defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w,
SchedWritePHAdd.MMX>;
defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d,
SchedWritePHAdd.MMX>;
defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw,
SchedWritePHAdd.MMX>;
// -- Multiplication
defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w,
SchedWriteVecIMul.MMX, 1>;
defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w,
SchedWriteVecIMul.MMX, 1>;
let Predicates = [HasMMX, HasSSE1] in
defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w,
SchedWriteVecIMul.MMX, 1>;
let Predicates = [HasMMX, HasSSE2] in
defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq,
SchedWriteVecIMul.MMX, 1>;
defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
int_x86_ssse3_pmul_hr_sw,
SchedWriteVecIMul.MMX, 1>;
// -- Miscellanea
defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd,
SchedWriteVecIMul.MMX, 1>;
defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
int_x86_ssse3_pmadd_ub_sw,
SchedWriteVecIMul.MMX>;
let Predicates = [HasMMX, HasSSE1] in {
defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b,
SchedWriteVecALU.MMX, 1>;
defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w,
SchedWriteVecALU.MMX, 1>;
defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b,
SchedWriteVecALU.MMX, 1>;
defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w,
SchedWriteVecALU.MMX, 1>;
defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b,
SchedWriteVecALU.MMX, 1>;
defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w,
SchedWriteVecALU.MMX, 1>;
defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw,
SchedWritePSADBW.MMX, 1>;
}
defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b,
SchedWriteVecALU.MMX>;
defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w,
SchedWriteVecALU.MMX>;
defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d,
SchedWriteVecALU.MMX>;
let Constraints = "$src1 = $dst" in
defm MMX_PALIGNR : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b,
SchedWriteShuffle.MMX>;
// Logical Instructions
defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand,
SchedWriteVecLogic.MMX, 1>;
defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por,
SchedWriteVecLogic.MMX, 1>;
defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor,
SchedWriteVecLogic.MMX, 1>;
defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn,
SchedWriteVecLogic.MMX>;
// Shift Instructions
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
int_x86_mmx_psrl_w, int_x86_mmx_psrli_w,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
int_x86_mmx_psrl_d, int_x86_mmx_psrli_d,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
int_x86_mmx_psrl_q, int_x86_mmx_psrli_q,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
int_x86_mmx_psll_w, int_x86_mmx_pslli_w,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
int_x86_mmx_psll_d, int_x86_mmx_pslli_d,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
int_x86_mmx_psll_q, int_x86_mmx_pslli_q,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
int_x86_mmx_psra_w, int_x86_mmx_psrai_w,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_mmx_psra_d, int_x86_mmx_psrai_d,
SchedWriteVecShift.MMX,
SchedWriteVecShiftImm.MMX>;
// Comparison Instructions
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b,
SchedWriteVecALU.MMX>;
defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w,
SchedWriteVecALU.MMX>;
defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d,
SchedWriteVecALU.MMX>;
defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b,
SchedWriteVecALU.MMX>;
defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w,
SchedWriteVecALU.MMX>;
defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d,
SchedWriteVecALU.MMX>;
// -- Unpack Instructions
defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
int_x86_mmx_punpckhbw,
SchedWriteShuffle.MMX>;
defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
int_x86_mmx_punpckhwd,
SchedWriteShuffle.MMX>;
defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
int_x86_mmx_punpckhdq,
SchedWriteShuffle.MMX>;
defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
int_x86_mmx_punpcklbw,
SchedWriteShuffle.MMX,
0, i32mem>;
defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
int_x86_mmx_punpcklwd,
SchedWriteShuffle.MMX,
0, i32mem>;
defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
int_x86_mmx_punpckldq,
SchedWriteShuffle.MMX,
0, i32mem>;
// -- Pack Instructions
defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb,
SchedWriteShuffle.MMX>;
defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw,
SchedWriteShuffle.MMX>;
defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb,
SchedWriteShuffle.MMX>;
// -- Shuffle Instructions
defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b,
SchedWriteVarShuffle.MMX>;
def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, u8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>,
Sched<[SchedWriteShuffle.MMX]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(int_x86_sse_pshuf_w (load_mmx addr:$src1),
imm:$src2))]>,
Sched<[SchedWriteShuffle.MMX.Folded]>;
// -- Conversion Instructions
defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
WriteCvtPS2I, SSEPackedSingle>, PS;
defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
WriteCvtPD2I, SSEPackedDouble>, PD;
defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
WriteCvtPS2I, SSEPackedSingle>, PS;
defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
WriteCvtPD2I, SSEPackedDouble>, PD;
defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
WriteCvtI2PD, SSEPackedDouble>, PD;
let Constraints = "$src1 = $dst" in {
defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
int_x86_sse_cvtpi2ps,
i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
SSEPackedSingle>, PS;
}
// Extract / Insert
let Predicates = [HasMMX, HasSSE1] in
def MMX_PEXTRWrr: MMXIi8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR64:$src1, i32u8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1,
imm:$src2))]>,
Sched<[WriteVecExtract]>;
let Constraints = "$src1 = $dst" in {
let Predicates = [HasMMX, HasSSE1] in {
def MMX_PINSRWrr : MMXIi8<0xC4, MRMSrcReg,
(outs VR64:$dst),
(ins VR64:$src1, GR32orGR64:$src2, i32u8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
GR32orGR64:$src2, imm:$src3))]>,
Sched<[WriteVecInsert]>;
def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem,
(outs VR64:$dst),
(ins VR64:$src1, i16mem:$src2, i32u8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
(i32 (anyext (loadi16 addr:$src2))),
imm:$src3))]>,
Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
}
// Mask creation
let Predicates = [HasMMX, HasSSE1] in
def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR64:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32orGR64:$dst,
(int_x86_mmx_pmovmskb VR64:$src))]>,
Sched<[WriteMMXMOVMSK]>;
// Low word of XMM to MMX.
def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
[SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
(x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
(x86mmx (MMX_MOVQ64rm addr:$src))>;
// Misc.
let SchedRW = [SchedWriteShuffle.MMX] in {
let Uses = [EDI], Predicates = [HasMMX, HasSSE1,Not64BitMode] in
def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
let Uses = [RDI], Predicates = [HasMMX, HasSSE1,In64BitMode] in
def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
}
// 64-bit bit convert.
let Predicates = [HasMMX, HasSSE2] in {
def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
(MMX_MOVQ2FR64rr VR64:$src)>;
def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
(MMX_MOVFR642Qrr FR64:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvtp2Int (v4f32 VR128:$src)))))),
(MMX_CVTPS2PIirr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvttp2si (v4f32 VR128:$src)))))),
(MMX_CVTTPS2PIirr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))),
(MMX_CVTTPS2PIirr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
(MMX_CVTPD2PIirr VR128:$src)>;
def : Pat<(x86mmx (MMX_X86movdq2q
(bc_v2i64 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
(MMX_CVTTPD2PIirr VR128:$src)>;
}

View File

@@ -0,0 +1,80 @@
//===-- X86InstrMPX.td - MPX Instruction Set ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 MPX instruction set, defining the
// instructions, and properties of the instructions which are needed for code
// generation, machine code emission, and analysis.
//
//===----------------------------------------------------------------------===//
// FIXME: Investigate a better scheduler class once MPX is used inside LLVM.
let SchedRW = [WriteSystem] in {
multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
Requires<[HasMPX, In64BitMode]>;
}
defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, anymem:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, anymem:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, In64BitMode]>;
def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR64:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, In64BitMode]>;
}
defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS, NotMemoryFoldable;
defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD, NotMemoryFoldable;
defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD, NotMemoryFoldable;
def BNDMOVrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX]>, NotMemoryFoldable;
let mayLoad = 1 in {
def BNDMOV32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable;
def BNDMOV64rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable;
}
let isCodeGenOnly = 1, ForceDisassemble = 1 in
def BNDMOVrr_REV : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX]>, NotMemoryFoldable;
let mayStore = 1 in {
def BNDMOV32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable;
def BNDMOV64mr : I<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable;
def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins anymem:$dst, BNDR:$src),
"bndstx\t{$src, $dst|$dst, $src}", []>, PS,
Requires<[HasMPX]>;
}
let mayLoad = 1 in
def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
"bndldx\t{$src, $dst|$dst, $src}", []>, PS,
Requires<[HasMPX]>;
} // SchedRW

View File

@@ -0,0 +1,30 @@
//===-- X86InstrSGX.td - SGX Instruction Set Extension -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the instructions that make up the Intel SGX instruction
// set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SGX instructions
let SchedRW = [WriteSystem], Predicates = [HasSGX] in {
// ENCLS - Execute an Enclave System Function of Specified Leaf Number
def ENCLS : I<0x01, MRM_CF, (outs), (ins),
"encls", []>, TB;
// ENCLU - Execute an Enclave User Function of Specified Leaf Number
def ENCLU : I<0x01, MRM_D7, (outs), (ins),
"enclu", []>, TB;
// ENCLV - Execute an Enclave VMM Function of Specified Leaf Number
def ENCLV : I<0x01, MRM_C0, (outs), (ins),
"enclv", []>, TB;
} // SchedRW

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,63 @@
//===-- X86InstrSVM.td - SVM Instruction Set Extension -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the instructions that make up the AMD SVM instruction
// set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// SVM instructions
let SchedRW = [WriteSystem] in {
// 0F 01 D9
def VMMCALL : I<0x01, MRM_D9, (outs), (ins), "vmmcall", []>, TB;
// 0F 01 DC
def STGI : I<0x01, MRM_DC, (outs), (ins), "stgi", []>, TB;
// 0F 01 DD
def CLGI : I<0x01, MRM_DD, (outs), (ins), "clgi", []>, TB;
// 0F 01 DE
let Uses = [EAX] in
def SKINIT : I<0x01, MRM_DE, (outs), (ins), "skinit\t{%eax|eax}", []>, TB;
// 0F 01 D8
let Uses = [EAX] in
def VMRUN32 : I<0x01, MRM_D8, (outs), (ins), "vmrun\t{%eax|eax}", []>, TB,
Requires<[Not64BitMode]>;
let Uses = [RAX] in
def VMRUN64 : I<0x01, MRM_D8, (outs), (ins), "vmrun\t{%rax|rax}", []>, TB,
Requires<[In64BitMode]>;
// 0F 01 DA
let Uses = [EAX] in
def VMLOAD32 : I<0x01, MRM_DA, (outs), (ins), "vmload\t{%eax|eax}", []>, TB,
Requires<[Not64BitMode]>;
let Uses = [RAX] in
def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins), "vmload\t{%rax|rax}", []>, TB,
Requires<[In64BitMode]>;
// 0F 01 DB
let Uses = [EAX] in
def VMSAVE32 : I<0x01, MRM_DB, (outs), (ins), "vmsave\t{%eax|eax}", []>, TB,
Requires<[Not64BitMode]>;
let Uses = [RAX] in
def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins), "vmsave\t{%rax|rax}", []>, TB,
Requires<[In64BitMode]>;
// 0F 01 DF
let Uses = [EAX, ECX] in
def INVLPGA32 : I<0x01, MRM_DF, (outs), (ins),
"invlpga\t{%eax, %ecx|eax, ecx}", []>, TB, Requires<[Not64BitMode]>;
let Uses = [RAX, ECX] in
def INVLPGA64 : I<0x01, MRM_DF, (outs), (ins),
"invlpga\t{%rax, %ecx|rax, ecx}", []>, TB, Requires<[In64BitMode]>;
} // SchedRW

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,743 @@
//===-- X86InstrSystem.td - System Instructions ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 instructions that are generally used in
// privileged modes. These are not typically used by the compiler, but are
// supported for the assembler and disassembler.
//
//===----------------------------------------------------------------------===//
let SchedRW = [WriteSystem] in {
let Defs = [RAX, RDX] in
def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
let Defs = [RAX, RCX, RDX] in
def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB;
// CPU flow control instructions
let mayLoad = 1, mayStore = 0, hasSideEffects = 1, isTrap = 1 in {
def UD2 : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
def UD1 : I<0xB9, RawFrm, (outs), (ins), "ud1", []>, TB;
def UD0 : I<0xFF, RawFrm, (outs), (ins), "ud0", []>, TB;
}
def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
// Interrupt and SysCall Instructions.
let Uses = [EFLAGS] in
def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>, Requires<[Not64BitMode]>;
def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>;
} // SchedRW
// The long form of "int $3" turns into int3 as a size optimization.
// FIXME: This doesn't work because InstAlias can't match immediate constants.
//def : InstAlias<"int\t$3", (INT3)>;
let SchedRW = [WriteSystem] in {
def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap",
[(int_x86_int imm:$trap)]>;
def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB;
def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
Requires<[In64BitMode]>;
def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB;
def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexitq", []>, TB,
Requires<[In64BitMode]>;
} // SchedRW
def : Pat<(debugtrap),
(INT3)>, Requires<[NotPS4]>;
def : Pat<(debugtrap),
(INT (i8 0x41))>, Requires<[IsPS4]>;
//===----------------------------------------------------------------------===//
// Input/Output Instructions.
//
let SchedRW = [WriteSystem] in {
let Defs = [AL], Uses = [DX] in
def IN8rr : I<0xEC, RawFrm, (outs), (ins), "in{b}\t{%dx, %al|al, dx}", []>;
let Defs = [AX], Uses = [DX] in
def IN16rr : I<0xED, RawFrm, (outs), (ins), "in{w}\t{%dx, %ax|ax, dx}", []>,
OpSize16;
let Defs = [EAX], Uses = [DX] in
def IN32rr : I<0xED, RawFrm, (outs), (ins), "in{l}\t{%dx, %eax|eax, dx}", []>,
OpSize32;
let Defs = [AL] in
def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins u8imm:$port),
"in{b}\t{$port, %al|al, $port}", []>;
let Defs = [AX] in
def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
"in{w}\t{$port, %ax|ax, $port}", []>, OpSize16;
let Defs = [EAX] in
def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
"in{l}\t{$port, %eax|eax, $port}", []>, OpSize32;
let Uses = [DX, AL] in
def OUT8rr : I<0xEE, RawFrm, (outs), (ins), "out{b}\t{%al, %dx|dx, al}", []>;
let Uses = [DX, AX] in
def OUT16rr : I<0xEF, RawFrm, (outs), (ins), "out{w}\t{%ax, %dx|dx, ax}", []>,
OpSize16;
let Uses = [DX, EAX] in
def OUT32rr : I<0xEF, RawFrm, (outs), (ins), "out{l}\t{%eax, %dx|dx, eax}", []>,
OpSize32;
let Uses = [AL] in
def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins u8imm:$port),
"out{b}\t{%al, $port|$port, al}", []>;
let Uses = [AX] in
def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
"out{w}\t{%ax, $port|$port, ax}", []>, OpSize16;
let Uses = [EAX] in
def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
"out{l}\t{%eax, $port|$port, eax}", []>, OpSize32;
} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from debug registers
let SchedRW = [WriteSystem] in {
def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[Not64BitMode]>;
def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[In64BitMode]>;
def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[Not64BitMode]>;
def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[In64BitMode]>;
} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from control registers
let SchedRW = [WriteSystem] in {
def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[Not64BitMode]>;
def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[In64BitMode]>;
def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[Not64BitMode]>;
def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
Requires<[In64BitMode]>;
} // SchedRW
//===----------------------------------------------------------------------===//
// Segment override instruction prefixes
//let SchedRW = [WriteNop] in {
//def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
//def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
//def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
//def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
//def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
//def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
//} // SchedRW
//===----------------------------------------------------------------------===//
// Moves to and from segment registers.
//
let SchedRW = [WriteMove] in {
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
let mayStore = 1 in {
def MOV16ms : I<0x8C, MRMDestMem, (outs), (ins i16mem:$dst, SEGMENT_REG:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>;
}
def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
let mayLoad = 1 in {
def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>;
}
} // SchedRW
//===----------------------------------------------------------------------===//
// Segmentation support instructions.
let SchedRW = [WriteSystem] in {
def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
let mayLoad = 1 in
def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize16, NotMemoryFoldable;
def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize16, NotMemoryFoldable;
// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
let mayLoad = 1 in
def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize32, NotMemoryFoldable;
def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize32, NotMemoryFoldable;
// i16mem operand in LAR64rm and GR32 operand in LAR64rr is not a typo.
let mayLoad = 1 in
def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
// i16mem operand in LSL32rm and GR32 operand in LSL32rr is not a typo.
let mayLoad = 1 in
def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize16, NotMemoryFoldable;
def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize16, NotMemoryFoldable;
// i16mem operand in LSL64rm and GR32 operand in LSL64rr is not a typo.
let mayLoad = 1 in
def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize32, NotMemoryFoldable;
def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
OpSize32, NotMemoryFoldable;
let mayLoad = 1 in
def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
"str{w}\t$dst", []>, TB, OpSize16;
def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
"str{l}\t$dst", []>, TB, OpSize32;
def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
"str{q}\t$dst", []>, TB;
let mayStore = 1 in
def STRm : I<0x00, MRM1m, (outs), (ins i16mem:$dst), "str{w}\t$dst", []>, TB;
def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), "ltr{w}\t$src", []>, TB, NotMemoryFoldable;
let mayLoad = 1 in
def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), "ltr{w}\t$src", []>, TB, NotMemoryFoldable;
def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), "push{w}\t{%cs|cs}", []>,
OpSize16, Requires<[Not64BitMode]>;
def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins), "push{l}\t{%cs|cs}", []>,
OpSize32, Requires<[Not64BitMode]>;
def PUSHSS16 : I<0x16, RawFrm, (outs), (ins), "push{w}\t{%ss|ss}", []>,
OpSize16, Requires<[Not64BitMode]>;
def PUSHSS32 : I<0x16, RawFrm, (outs), (ins), "push{l}\t{%ss|ss}", []>,
OpSize32, Requires<[Not64BitMode]>;
def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins), "push{w}\t{%ds|ds}", []>,
OpSize16, Requires<[Not64BitMode]>;
def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins), "push{l}\t{%ds|ds}", []>,
OpSize32, Requires<[Not64BitMode]>;
def PUSHES16 : I<0x06, RawFrm, (outs), (ins), "push{w}\t{%es|es}", []>,
OpSize16, Requires<[Not64BitMode]>;
def PUSHES32 : I<0x06, RawFrm, (outs), (ins), "push{l}\t{%es|es}", []>,
OpSize32, Requires<[Not64BitMode]>;
def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), "push{w}\t{%fs|fs}", []>,
OpSize16, TB;
def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), "push{l}\t{%fs|fs}", []>, TB,
OpSize32, Requires<[Not64BitMode]>;
def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), "push{w}\t{%gs|gs}", []>,
OpSize16, TB;
def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), "push{l}\t{%gs|gs}", []>, TB,
OpSize32, Requires<[Not64BitMode]>;
def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), "push{q}\t{%fs|fs}", []>, TB,
OpSize32, Requires<[In64BitMode]>;
def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), "push{q}\t{%gs|gs}", []>, TB,
OpSize32, Requires<[In64BitMode]>;
// No "pop cs" instruction.
def POPSS16 : I<0x17, RawFrm, (outs), (ins), "pop{w}\t{%ss|ss}", []>,
OpSize16, Requires<[Not64BitMode]>;
def POPSS32 : I<0x17, RawFrm, (outs), (ins), "pop{l}\t{%ss|ss}", []>,
OpSize32, Requires<[Not64BitMode]>;
def POPDS16 : I<0x1F, RawFrm, (outs), (ins), "pop{w}\t{%ds|ds}", []>,
OpSize16, Requires<[Not64BitMode]>;
def POPDS32 : I<0x1F, RawFrm, (outs), (ins), "pop{l}\t{%ds|ds}", []>,
OpSize32, Requires<[Not64BitMode]>;
def POPES16 : I<0x07, RawFrm, (outs), (ins), "pop{w}\t{%es|es}", []>,
OpSize16, Requires<[Not64BitMode]>;
def POPES32 : I<0x07, RawFrm, (outs), (ins), "pop{l}\t{%es|es}", []>,
OpSize32, Requires<[Not64BitMode]>;
def POPFS16 : I<0xa1, RawFrm, (outs), (ins), "pop{w}\t{%fs|fs}", []>,
OpSize16, TB;
def POPFS32 : I<0xa1, RawFrm, (outs), (ins), "pop{l}\t{%fs|fs}", []>, TB,
OpSize32, Requires<[Not64BitMode]>;
def POPFS64 : I<0xa1, RawFrm, (outs), (ins), "pop{q}\t{%fs|fs}", []>, TB,
OpSize32, Requires<[In64BitMode]>;
def POPGS16 : I<0xa9, RawFrm, (outs), (ins), "pop{w}\t{%gs|gs}", []>,
OpSize16, TB;
def POPGS32 : I<0xa9, RawFrm, (outs), (ins), "pop{l}\t{%gs|gs}", []>, TB,
OpSize32, Requires<[Not64BitMode]>;
def POPGS64 : I<0xa9, RawFrm, (outs), (ins), "pop{q}\t{%gs|gs}", []>, TB,
OpSize32, Requires<[In64BitMode]>;
def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
"lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize16,
Requires<[Not64BitMode]>;
def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
"lds{l}\t{$src, $dst|$dst, $src}", []>, OpSize32,
Requires<[Not64BitMode]>;
def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
"lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
"lss{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
"lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
"les{w}\t{$src, $dst|$dst, $src}", []>, OpSize16,
Requires<[Not64BitMode]>;
def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
"les{l}\t{$src, $dst|$dst, $src}", []>, OpSize32,
Requires<[Not64BitMode]>;
def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
"lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
"lfs{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
"lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
"lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
"lgs{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
"lgs\t{$src, $dst|$dst, $src}", []>, TB;
def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), "verr\t$seg", []>, TB, NotMemoryFoldable;
def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", []>, TB, NotMemoryFoldable;
let mayLoad = 1 in {
def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), "verr\t$seg", []>, TB, NotMemoryFoldable;
def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", []>, TB, NotMemoryFoldable;
}
} // SchedRW
//===----------------------------------------------------------------------===//
// Descriptor-table support instructions
let SchedRW = [WriteSystem] in {
def SGDT16m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
"sgdt{w}\t$dst", []>, TB, OpSize16, Requires<[Not64BitMode]>;
def SGDT32m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
"sgdt{l}\t$dst", []>, OpSize32, TB, Requires <[Not64BitMode]>;
def SGDT64m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
"sgdt{q}\t$dst", []>, TB, Requires <[In64BitMode]>;
def SIDT16m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
"sidt{w}\t$dst", []>, TB, OpSize16, Requires<[Not64BitMode]>;
def SIDT32m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
"sidt{l}\t$dst", []>, OpSize32, TB, Requires <[Not64BitMode]>;
def SIDT64m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
"sidt{q}\t$dst", []>, TB, Requires <[In64BitMode]>;
def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
"sldt{w}\t$dst", []>, TB, OpSize16;
let mayStore = 1 in
def SLDT16m : I<0x00, MRM0m, (outs), (ins i16mem:$dst),
"sldt{w}\t$dst", []>, TB;
def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
"sldt{l}\t$dst", []>, OpSize32, TB;
// LLDT is not interpreted specially in 64-bit mode because there is no sign
// extension.
def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
"sldt{q}\t$dst", []>, TB, Requires<[In64BitMode]>;
def LGDT16m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
"lgdt{w}\t$src", []>, TB, OpSize16, Requires<[Not64BitMode]>;
def LGDT32m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
"lgdt{l}\t$src", []>, OpSize32, TB, Requires<[Not64BitMode]>;
def LGDT64m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
"lgdt{q}\t$src", []>, TB, Requires<[In64BitMode]>;
def LIDT16m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
"lidt{w}\t$src", []>, TB, OpSize16, Requires<[Not64BitMode]>;
def LIDT32m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
"lidt{l}\t$src", []>, OpSize32, TB, Requires<[Not64BitMode]>;
def LIDT64m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
"lidt{q}\t$src", []>, TB, Requires<[In64BitMode]>;
def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
"lldt{w}\t$src", []>, TB, NotMemoryFoldable;
let mayLoad = 1 in
def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
"lldt{w}\t$src", []>, TB, NotMemoryFoldable;
} // SchedRW
//===----------------------------------------------------------------------===//
// Specialized register support
let SchedRW = [WriteSystem] in {
let Uses = [EAX, ECX, EDX] in
def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
let Defs = [EAX, EDX], Uses = [ECX] in
def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
let Defs = [RAX, RDX], Uses = [ECX] in
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)]>, TB;
def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
"smsw{w}\t$dst", []>, OpSize16, TB;
def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
"smsw{l}\t$dst", []>, OpSize32, TB;
// no m form encodable; use SMSW16m
def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
"smsw{q}\t$dst", []>, TB;
// For memory operands, there is only a 16-bit form
def SMSW16m : I<0x01, MRM4m, (outs), (ins i16mem:$dst),
"smsw{w}\t$dst", []>, TB;
def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
"lmsw{w}\t$src", []>, TB, NotMemoryFoldable;
let mayLoad = 1 in
def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
"lmsw{w}\t$src", []>, TB, NotMemoryFoldable;
let Defs = [EAX, EBX, ECX, EDX], Uses = [EAX, ECX] in
def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
} // SchedRW
//===----------------------------------------------------------------------===//
// Cache instructions
let SchedRW = [WriteSystem] in {
def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [(int_x86_wbinvd)]>, TB;
// wbnoinvd is like wbinvd, except without invalidation
// encoding: like wbinvd + an 0xF3 prefix
def WBNOINVD : I<0x09, RawFrm, (outs), (ins), "wbnoinvd",
[(int_x86_wbnoinvd)]>, XS,
Requires<[HasWBNOINVD]>;
} // SchedRW
//===----------------------------------------------------------------------===//
// CET instructions
// Use with caution, availability is not predicated on features.
let SchedRW = [WriteSystem] in {
let Uses = [SSP] in {
let Defs = [SSP] in {
def INCSSPD : I<0xAE, MRM5r, (outs), (ins GR32:$src), "incsspd\t$src",
[(int_x86_incsspd GR32:$src)]>, XS;
def INCSSPQ : RI<0xAE, MRM5r, (outs), (ins GR64:$src), "incsspq\t$src",
[(int_x86_incsspq GR64:$src)]>, XS;
} // Defs SSP
let Constraints = "$src = $dst" in {
def RDSSPD : I<0x1E, MRM1r, (outs GR32:$dst), (ins GR32:$src),
"rdsspd\t$dst",
[(set GR32:$dst, (int_x86_rdsspd GR32:$src))]>, XS;
def RDSSPQ : RI<0x1E, MRM1r, (outs GR64:$dst), (ins GR64:$src),
"rdsspq\t$dst",
[(set GR64:$dst, (int_x86_rdsspq GR64:$src))]>, XS;
}
let Defs = [SSP] in {
def SAVEPREVSSP : I<0x01, MRM_EA, (outs), (ins), "saveprevssp",
[(int_x86_saveprevssp)]>, XS;
def RSTORSSP : I<0x01, MRM5m, (outs), (ins i32mem:$src),
"rstorssp\t$src",
[(int_x86_rstorssp addr:$src)]>, XS;
} // Defs SSP
} // Uses SSP
def WRSSD : I<0xF6, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"wrssd\t{$src, $dst|$dst, $src}",
[(int_x86_wrssd GR32:$src, addr:$dst)]>, T8PS;
def WRSSQ : RI<0xF6, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"wrssq\t{$src, $dst|$dst, $src}",
[(int_x86_wrssq GR64:$src, addr:$dst)]>, T8PS;
def WRUSSD : I<0xF5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"wrussd\t{$src, $dst|$dst, $src}",
[(int_x86_wrussd GR32:$src, addr:$dst)]>, T8PD;
def WRUSSQ : RI<0xF5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"wrussq\t{$src, $dst|$dst, $src}",
[(int_x86_wrussq GR64:$src, addr:$dst)]>, T8PD;
let Defs = [SSP] in {
let Uses = [SSP] in {
def SETSSBSY : I<0x01, MRM_E8, (outs), (ins), "setssbsy",
[(int_x86_setssbsy)]>, XS;
} // Uses SSP
def CLRSSBSY : I<0xAE, MRM6m, (outs), (ins i32mem:$src),
"clrssbsy\t$src",
[(int_x86_clrssbsy addr:$src)]>, XS;
} // Defs SSP
} // SchedRW
let SchedRW = [WriteSystem] in {
def ENDBR64 : I<0x1E, MRM_FA, (outs), (ins), "endbr64", []>, XS;
def ENDBR32 : I<0x1E, MRM_FB, (outs), (ins), "endbr32", []>, XS;
} // SchedRW
//===----------------------------------------------------------------------===//
// XSAVE instructions
let SchedRW = [WriteSystem] in {
let Predicates = [HasXSAVE] in {
let Defs = [EDX, EAX], Uses = [ECX] in
def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
let Uses = [EDX, EAX, ECX] in
def XSETBV : I<0x01, MRM_D1, (outs), (ins),
"xsetbv",
[(int_x86_xsetbv ECX, EDX, EAX)]>, TB;
} // HasXSAVE
let Uses = [EDX, EAX] in {
def XSAVE : I<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
"xsave\t$dst",
[(int_x86_xsave addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>;
def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
"xsave64\t$dst",
[(int_x86_xsave64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaquemem:$dst),
"xrstor\t$dst",
[(int_x86_xrstor addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>;
def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaquemem:$dst),
"xrstor64\t$dst",
[(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaquemem:$dst),
"xsaveopt\t$dst",
[(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT]>;
def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaquemem:$dst),
"xsaveopt64\t$dst",
[(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT, In64BitMode]>;
def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
"xsavec\t$dst",
[(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC]>;
def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
"xsavec64\t$dst",
[(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC, In64BitMode]>;
def XSAVES : I<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
"xsaves\t$dst",
[(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
"xsaves64\t$dst",
[(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE, In64BitMode]>;
def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
"xrstors\t$dst",
[(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
"xrstors64\t$dst",
[(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES, In64BitMode]>;
} // Uses
} // SchedRW
//===----------------------------------------------------------------------===//
// VIA PadLock crypto instructions
let Defs = [RAX, RDI], Uses = [RDX, RDI], SchedRW = [WriteSystem] in
def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB;
def : InstAlias<"xstorerng", (XSTORE)>;
let SchedRW = [WriteSystem] in {
let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
def XCRYPTECB : I<0xa7, MRM_C8, (outs), (ins), "xcryptecb", []>, TB;
def XCRYPTCBC : I<0xa7, MRM_D0, (outs), (ins), "xcryptcbc", []>, TB;
def XCRYPTCTR : I<0xa7, MRM_D8, (outs), (ins), "xcryptctr", []>, TB;
def XCRYPTCFB : I<0xa7, MRM_E0, (outs), (ins), "xcryptcfb", []>, TB;
def XCRYPTOFB : I<0xa7, MRM_E8, (outs), (ins), "xcryptofb", []>, TB;
}
let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
def XSHA1 : I<0xa6, MRM_C8, (outs), (ins), "xsha1", []>, TB;
def XSHA256 : I<0xa6, MRM_D0, (outs), (ins), "xsha256", []>, TB;
}
let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB;
} // SchedRW
/*
//==-----------------------------------------------------------------------===//
// PKU - enable protection key
let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
def WRPKRU : PseudoI<(outs), (ins GR32:$src),
[(int_x86_wrpkru GR32:$src)]>;
def RDPKRU : PseudoI<(outs GR32:$dst), (ins),
[(set GR32:$dst, (int_x86_rdpkru))]>;
}
*/
let SchedRW = [WriteSystem] in {
let Defs = [EAX, EDX], Uses = [ECX] in
def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB;
let Uses = [EAX, ECX, EDX] in
def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB;
} // SchedRW
//===----------------------------------------------------------------------===//
// FS/GS Base Instructions
let Predicates = [HasFSGSBase, In64BitMode], SchedRW = [WriteSystem] in {
def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins),
"rdfsbase{l}\t$dst",
[(set GR32:$dst, (int_x86_rdfsbase_32))]>, XS;
def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins),
"rdfsbase{q}\t$dst",
[(set GR64:$dst, (int_x86_rdfsbase_64))]>, XS;
def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins),
"rdgsbase{l}\t$dst",
[(set GR32:$dst, (int_x86_rdgsbase_32))]>, XS;
def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins),
"rdgsbase{q}\t$dst",
[(set GR64:$dst, (int_x86_rdgsbase_64))]>, XS;
def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$src),
"wrfsbase{l}\t$src",
[(int_x86_wrfsbase_32 GR32:$src)]>, XS;
def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$src),
"wrfsbase{q}\t$src",
[(int_x86_wrfsbase_64 GR64:$src)]>, XS;
def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$src),
"wrgsbase{l}\t$src",
[(int_x86_wrgsbase_32 GR32:$src)]>, XS;
def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$src),
"wrgsbase{q}\t$src",
[(int_x86_wrgsbase_64 GR64:$src)]>, XS;
}
//===----------------------------------------------------------------------===//
// INVPCID Instruction
let SchedRW = [WriteSystem] in {
def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}",
[(int_x86_invpcid GR32:$src1, addr:$src2)]>, T8PD,
Requires<[Not64BitMode, HasINVPCID]>;
def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
Requires<[In64BitMode, HasINVPCID]>;
} // SchedRW
let Predicates = [In64BitMode, HasINVPCID] in {
// The instruction can only use a 64 bit register as the register argument
// in 64 bit mode, while the intrinsic only accepts a 32 bit argument
// corresponding to it.
// The accepted values for now are 0,1,2,3 anyways (see Intel SDM -- INVCPID
// type),/ so it doesn't hurt us that one can't supply a 64 bit value here.
def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2),
(INVPCID64
(SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit),
addr:$src2)>;
}
//===----------------------------------------------------------------------===//
// SMAP Instruction
let Defs = [EFLAGS], SchedRW = [WriteSystem] in {
def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
}
//===----------------------------------------------------------------------===//
// SMX Instruction
let SchedRW = [WriteSystem] in {
let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in {
def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, TB;
} // Uses, Defs
} // SchedRW
//===----------------------------------------------------------------------===//
// TS flag control instruction.
let SchedRW = [WriteSystem] in {
def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
}
//===----------------------------------------------------------------------===//
// IF (inside EFLAGS) management instructions.
let SchedRW = [WriteSystem], Uses = [EFLAGS], Defs = [EFLAGS] in {
def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>;
}
//===----------------------------------------------------------------------===//
// RDPID Instruction
let SchedRW = [WriteSystem] in {
def RDPID32 : I<0xC7, MRM7r, (outs GR32:$dst), (ins),
"rdpid\t$dst", [(set GR32:$dst, (int_x86_rdpid))]>, XS,
Requires<[Not64BitMode, HasRDPID]>;
def RDPID64 : I<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdpid\t$dst", []>, XS,
Requires<[In64BitMode, HasRDPID]>;
} // SchedRW
let Predicates = [In64BitMode, HasRDPID] in {
// Due to silly instruction definition, we have to compensate for the
// instruction outputting a 64-bit register.
def : Pat<(int_x86_rdpid),
(EXTRACT_SUBREG (RDPID64), sub_32bit)>;
}
//===----------------------------------------------------------------------===//
// PTWRITE Instruction - Write Data to a Processor Trace Packet
let SchedRW = [WriteSystem] in {
def PTWRITEm: I<0xAE, MRM4m, (outs), (ins i32mem:$dst),
"ptwrite{l}\t$dst", [(int_x86_ptwrite32 (loadi32 addr:$dst))]>, XS,
Requires<[HasPTWRITE]>;
def PTWRITE64m : RI<0xAE, MRM4m, (outs), (ins i64mem:$dst),
"ptwrite{q}\t$dst", [(int_x86_ptwrite64 (loadi64 addr:$dst))]>, XS,
Requires<[In64BitMode, HasPTWRITE]>;
def PTWRITEr : I<0xAE, MRM4r, (outs), (ins GR32:$dst),
"ptwrite{l}\t$dst", [(int_x86_ptwrite32 GR32:$dst)]>, XS,
Requires<[HasPTWRITE]>;
def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst),
"ptwrite{q}\t$dst", [(int_x86_ptwrite64 GR64:$dst)]>, XS,
Requires<[In64BitMode, HasPTWRITE]>;
} // SchedRW
//===----------------------------------------------------------------------===//
// Platform Configuration instruction
// From ISA docs:
// "This instruction is used to execute functions for configuring platform
// features.
// EAX: Leaf function to be invoked.
// RBX/RCX/RDX: Leaf-specific purpose."
// "Successful execution of the leaf clears RAX (set to zero) and ZF, CF, PF,
// AF, OF, and SF are cleared. In case of failure, the failure reason is
// indicated in RAX with ZF set to 1 and CF, PF, AF, OF, and SF are cleared."
// Thus all these mentioned registers are considered clobbered.
let SchedRW = [WriteSystem] in {
let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX, RDX, EFLAGS] in
def PCONFIG : I<0x01, MRM_C5, (outs), (ins), "pconfig", []>, TB,
Requires<[HasPCONFIG]>;
} // SchedRW

View File

@@ -0,0 +1,60 @@
//===-- X86InstrVMX.td - TSX Instruction Set Extension -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the instructions that make up the Intel TSX instruction
// set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// TSX instructions
def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
[SDNPHasChain, SDNPSideEffect]>;
let SchedRW = [WriteSystem] in {
//let usesCustomInserter = 1 in
//def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
// "# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>,
// Requires<[HasRTM]>;
let isBranch = 1, isTerminator = 1, Defs = [EAX] in {
def XBEGIN_2 : Ii16PCRel<0xc7, MRM_F8, (outs), (ins brtarget16:$dst),
"xbegin\t$dst", []>, OpSize16;
def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget32:$dst),
"xbegin\t$dst", []>, OpSize32;
}
// Pseudo instruction to fake the definition of EAX on the fallback code path.
//let isPseudo = 1, Defs = [EAX] in {
//def XABORT_DEF : I<0, Pseudo, (outs), (ins), "# XABORT DEF", []>;
//}
def XEND : I<0x01, MRM_D5, (outs), (ins),
"xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
let Defs = [EFLAGS] in
def XTEST : I<0x01, MRM_D6, (outs), (ins),
"xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasRTM]>;
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
"xabort\t$imm",
[(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
} // SchedRW
// HLE prefixes
let SchedRW = [WriteSystem] in {
let isAsmParserOnly = 1 in {
def XACQUIRE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "xacquire", []>;
def XRELEASE_PREFIX : I<0xF3, RawFrm, (outs), (ins), "xrelease", []>;
}
} // SchedRW

View File

@@ -0,0 +1,88 @@
//===-- X86InstrVMX.td - VMX Instruction Set Extension -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the instructions that make up the Intel VMX instruction
// set.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// VMX instructions
let SchedRW = [WriteSystem] in {
// 66 0F 38 80
def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
"invept\t{$src2, $src1|$src1, $src2}", []>, T8PD,
Requires<[Not64BitMode]>;
def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invept\t{$src2, $src1|$src1, $src2}", []>, T8PD,
Requires<[In64BitMode]>;
// 66 0F 38 81
def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
"invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
Requires<[Not64BitMode]>;
def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
Requires<[In64BitMode]>;
// 0F 01 C1
def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
"vmclear\t$vmcs", []>, PD;
// OF 01 D4
def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB;
// 0F 01 C2
def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
// 0F 01 C3
def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
"vmptrld\t$vmcs", []>, PS;
def VMPTRSTm : I<0xC7, MRM7m, (outs), (ins i64mem:$vmcs),
"vmptrst\t$vmcs", []>, PS;
def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
NotMemoryFoldable;
def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
NotMemoryFoldable;
let mayStore = 1 in {
def VMREAD64mr : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
NotMemoryFoldable;
def VMREAD32mr : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
NotMemoryFoldable;
} // mayStore
def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
NotMemoryFoldable;
def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
NotMemoryFoldable;
let mayLoad = 1 in {
def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
NotMemoryFoldable;
def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
NotMemoryFoldable;
} // mayLoad
// 0F 01 C4
def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
"vmxon\t$vmxon", []>, XS;
} // SchedRW

View File

@@ -0,0 +1,511 @@
//===- X86InstrVecCompiler.td - Vector Compiler Patterns ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the various vector pseudo instructions used by the
// compiler, as well as Pat patterns used during instruction selection.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// No op bitconverts
//===----------------------------------------------------------------------===//
// Bitcasts between 128-bit vector types. Return the original type since
// no instruction is needed for the conversion
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
// Bitcasts between 256-bit vector types. Return the original type since
// no instruction is needed for the conversion
def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>;
def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>;
def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>;
def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>;
def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>;
def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>;
def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>;
def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>;
def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>;
def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>;
def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>;
def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>;
def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>;
def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>;
def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>;
def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
// Bitcasts between 512-bit vector types. Return the original type since
// no instruction is needed for the conversion.
def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
//===----------------------------------------------------------------------===//
// Non-instruction patterns
//===----------------------------------------------------------------------===//
// A vector extract of the first f32/f64 position is a subregister copy
def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
(COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
(COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
(COPY_TO_REGCLASS FR32:$src, VR128)>;
// Implicitly promote a 64-bit scalar to a vector.
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
(COPY_TO_REGCLASS FR64:$src, VR128)>;
//===----------------------------------------------------------------------===//
// Subvector tricks
//===----------------------------------------------------------------------===//
// Patterns for insert_subvector/extract_subvector to/from index=0
multiclass subvector_subreg_lowering<RegisterClass subRC, ValueType subVT,
RegisterClass RC, ValueType VT,
SubRegIndex subIdx> {
def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
(subVT (EXTRACT_SUBREG RC:$src, subIdx))>;
def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
(VT (INSERT_SUBREG (IMPLICIT_DEF), subRC:$src, subIdx))>;
}
// A 128-bit subvector extract from the first 256-bit vector position is a
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
// insert to the first 256-bit vector position is a subregister copy that needs
// no instruction.
defm : subvector_subreg_lowering<VR128, v4i32, VR256, v8i32, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v4f32, VR256, v8f32, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v2i64, VR256, v4i64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>;
// A 128-bit subvector extract from the first 512-bit vector position is a
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
// insert to the first 512-bit vector position is a subregister copy that needs
// no instruction.
defm : subvector_subreg_lowering<VR128, v4i32, VR512, v16i32, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v4f32, VR512, v16f32, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v2i64, VR512, v8i64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>;
// A 128-bit subvector extract from the first 512-bit vector position is a
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
// insert to the first 512-bit vector position is a subregister copy that needs
// no instruction.
defm : subvector_subreg_lowering<VR256, v8i32, VR512, v16i32, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v8f32, VR512, v16f32, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v4i64, VR512, v8i64, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
multiclass subvector_store_lowering<string AlignedStr, string UnalignedStr,
RegisterClass RC, ValueType DstTy,
ValueType SrcTy, SubRegIndex SubIdx> {
def : Pat<(alignedstore (DstTy (extract_subvector
(SrcTy RC:$src), (iPTR 0))), addr:$dst),
(!cast<Instruction>("VMOV"#AlignedStr#"mr") addr:$dst,
(DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
def : Pat<(store (DstTy (extract_subvector
(SrcTy RC:$src), (iPTR 0))), addr:$dst),
(!cast<Instruction>("VMOV"#UnalignedStr#"mr") addr:$dst,
(DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
}
let Predicates = [HasAVX, NoVLX] in {
defm : subvector_store_lowering<"APD", "UPD", VR256X, v2f64, v4f64, sub_xmm>;
defm : subvector_store_lowering<"APS", "UPS", VR256X, v4f32, v8f32, sub_xmm>;
defm : subvector_store_lowering<"DQA", "DQU", VR256X, v2i64, v4i64, sub_xmm>;
defm : subvector_store_lowering<"DQA", "DQU", VR256X, v4i32, v8i32, sub_xmm>;
defm : subvector_store_lowering<"DQA", "DQU", VR256X, v8i16, v16i16, sub_xmm>;
defm : subvector_store_lowering<"DQA", "DQU", VR256X, v16i8, v32i8, sub_xmm>;
}
let Predicates = [HasVLX] in {
// Special patterns for storing subvector extracts of lower 128-bits
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR256X, v2f64, v4f64,
sub_xmm>;
defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR256X, v4f32, v8f32,
sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v2i64,
v4i64, sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v4i32,
v8i32, sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v8i16,
v16i16, sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v16i8,
v32i8, sub_xmm>;
// Special patterns for storing subvector extracts of lower 128-bits of 512.
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR512, v2f64, v8f64,
sub_xmm>;
defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR512, v4f32, v16f32,
sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v2i64,
v8i64, sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v4i32,
v16i32, sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v8i16,
v32i16, sub_xmm>;
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v16i8,
v64i8, sub_xmm>;
// Special patterns for storing subvector extracts of lower 256-bits of 512.
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
defm : subvector_store_lowering<"APDZ256", "UPDZ256", VR512, v4f64, v8f64,
sub_ymm>;
defm : subvector_store_lowering<"APSZ256", "UPSZ256", VR512, v8f32, v16f32,
sub_ymm>;
defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v4i64,
v8i64, sub_ymm>;
defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v8i32,
v16i32, sub_ymm>;
defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v16i16,
v32i16, sub_ymm>;
defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v32i8,
v64i8, sub_ymm>;
}
// If we're inserting into an all zeros vector, just use a plain move which
// will zero the upper bits. A post-isel hook will take care of removing
// any moves that we can prove are unnecessary.
multiclass subvec_zero_lowering<string MoveStr,
RegisterClass RC, ValueType DstTy,
ValueType SrcTy, ValueType ZeroTy,
SubRegIndex SubIdx> {
def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
(SrcTy RC:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0),
(SrcTy (!cast<Instruction>("VMOV"#MoveStr#"rr") RC:$src)), SubIdx)>;
}
let Predicates = [HasAVX, NoVLX] in {
defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, sub_xmm>;
}
let Predicates = [HasVLX] in {
defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32, sub_xmm>;
defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32, sub_ymm>;
}
let Predicates = [HasAVX512, NoVLX] in {
defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, sub_xmm>;
defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32, sub_ymm>;
defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32, sub_ymm>;
}
class maskzeroupper<ValueType vt, RegisterClass RC> :
PatLeaf<(vt RC:$src), [{
return isMaskZeroExtended(N);
}]>;
def maskzeroupperv1i1 : maskzeroupper<v1i1, VK1>;
def maskzeroupperv2i1 : maskzeroupper<v2i1, VK2>;
def maskzeroupperv4i1 : maskzeroupper<v4i1, VK4>;
def maskzeroupperv8i1 : maskzeroupper<v8i1, VK8>;
def maskzeroupperv16i1 : maskzeroupper<v16i1, VK16>;
def maskzeroupperv32i1 : maskzeroupper<v32i1, VK32>;
// The patterns determine if we can depend on the upper bits of a mask register
// being zeroed by the previous operation so that we can skip explicit
// zeroing.
let Predicates = [HasBWI] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
maskzeroupperv1i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK1:$src, VK32)>;
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
maskzeroupperv8i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK8:$src, VK32)>;
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
maskzeroupperv16i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK16:$src, VK32)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
maskzeroupperv1i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK1:$src, VK64)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
maskzeroupperv8i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK8:$src, VK64)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
maskzeroupperv16i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK16:$src, VK64)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
maskzeroupperv32i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK32:$src, VK64)>;
}
let Predicates = [HasAVX512] in {
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
maskzeroupperv1i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK1:$src, VK16)>;
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
maskzeroupperv8i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK8:$src, VK16)>;
}
let Predicates = [HasDQI] in {
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
maskzeroupperv1i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK1:$src, VK8)>;
}
let Predicates = [HasVLX, HasDQI] in {
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
maskzeroupperv2i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK2:$src, VK8)>;
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
maskzeroupperv4i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK4:$src, VK8)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
maskzeroupperv2i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK2:$src, VK16)>;
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
maskzeroupperv4i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK4:$src, VK16)>;
}
let Predicates = [HasBWI, HasVLX] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
maskzeroupperv2i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK2:$src, VK32)>;
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
maskzeroupperv4i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK4:$src, VK32)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
maskzeroupperv2i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK2:$src, VK64)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
maskzeroupperv4i1:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK4:$src, VK64)>;
}
// If the bits are not zero we have to fall back to explicitly zeroing by
// using shifts.
let Predicates = [HasAVX512] in {
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v1i1 VK1:$mask), (iPTR 0))),
(KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK1:$mask, VK16),
(i8 15)), (i8 15))>;
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v2i1 VK2:$mask), (iPTR 0))),
(KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16),
(i8 14)), (i8 14))>;
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v4i1 VK4:$mask), (iPTR 0))),
(KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16),
(i8 12)), (i8 12))>;
}
let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16),
(i8 8)), (i8 8))>;
}
let Predicates = [HasDQI] in {
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>;
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
(v1i1 VK1:$mask), (iPTR 0))),
(KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK1:$mask, VK8),
(i8 7)), (i8 7))>;
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
(v2i1 VK2:$mask), (iPTR 0))),
(KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8),
(i8 6)), (i8 6))>;
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
(v4i1 VK4:$mask), (iPTR 0))),
(KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK4:$mask, VK8),
(i8 4)), (i8 4))>;
}
let Predicates = [HasBWI] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v16i1 VK16:$mask), (iPTR 0))),
(COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK32)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v16i1 VK16:$mask), (iPTR 0))),
(COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK64)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v32i1 VK32:$mask), (iPTR 0))),
(COPY_TO_REGCLASS (KMOVDkk VK32:$mask), VK64)>;
}
let Predicates = [HasBWI, NoDQI] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32),
(i8 24)), (i8 24))>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64),
(i8 56)), (i8 56))>;
}
let Predicates = [HasBWI, HasDQI] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK32)>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v8i1 VK8:$mask), (iPTR 0))),
(COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>;
}
let Predicates = [HasBWI, HasVLX] in {
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v1i1 VK1:$mask), (iPTR 0))),
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK1:$mask, VK32),
(i8 31)), (i8 31))>;
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v2i1 VK2:$mask), (iPTR 0))),
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK2:$mask, VK32),
(i8 30)), (i8 30))>;
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
(v4i1 VK4:$mask), (iPTR 0))),
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK4:$mask, VK32),
(i8 28)), (i8 28))>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v1i1 VK1:$mask), (iPTR 0))),
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK1:$mask, VK64),
(i8 63)), (i8 63))>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v2i1 VK2:$mask), (iPTR 0))),
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK2:$mask, VK64),
(i8 62)), (i8 62))>;
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
(v4i1 VK4:$mask), (iPTR 0))),
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
(i8 60)), (i8 60))>;
}

View File

@@ -0,0 +1,446 @@
//===-- X86InstrXOP.td - XOP Instruction Set ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes XOP (eXtended OPerations)
//
//===----------------------------------------------------------------------===//
multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWritePHAdd.XMM]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>;
}
let ExeDomain = SSEPackedInt in {
defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, loadv2i64>;
defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, loadv2i64>;
defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, loadv2i64>;
defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, loadv2i64>;
defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, loadv2i64>;
defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, loadv2i64>;
defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, loadv2i64>;
defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, loadv2i64>;
defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, loadv2i64>;
defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, loadv2i64>;
defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, loadv2i64>;
defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, loadv2i64>;
defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, loadv2i64>;
defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, loadv2i64>;
defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, loadv2i64>;
}
// Scalar load 2 addr operand instructions
multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
Operand memop, ComplexPattern mem_cpat,
X86FoldableSchedWrite sched> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, XOP,
Sched<[sched.Folded, ReadAfterLd]>;
}
multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
PatFrag memop, X86FoldableSchedWrite sched> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
Sched<[sched.Folded, ReadAfterLd]>;
}
multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
PatFrag memop, X86FoldableSchedWrite sched> {
def Yrr : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (Int VR256:$src))]>, XOP, VEX_L, Sched<[sched]>;
def Yrm : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L,
Sched<[sched.Folded, ReadAfterLd]>;
}
let ExeDomain = SSEPackedSingle in {
defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
ssmem, sse_load_f32, SchedWriteFRnd.Scl>;
defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32,
SchedWriteFRnd.XMM>;
defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32,
SchedWriteFRnd.YMM>;
}
let ExeDomain = SSEPackedDouble in {
defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
sdmem, sse_load_f64, SchedWriteFRnd.Scl>;
defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64,
SchedWriteFRnd.XMM>;
defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64,
SchedWriteFRnd.YMM>;
}
multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType vt128, X86FoldableSchedWrite sched> {
def rr : IXOP<opc, MRMSrcReg4VOp3, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2))))]>,
XOP, Sched<[sched]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
(vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd]>;
def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst),
(ins i128mem:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))),
(vt128 VR128:$src2))))]>,
XOP, Sched<[sched.Folded, ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>,
XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>;
}
let ExeDomain = SSEPackedInt in {
defm VPROTB : xop3op<0x90, "vprotb", rotl, v16i8, SchedWriteVarVecShift.XMM>;
defm VPROTD : xop3op<0x92, "vprotd", rotl, v4i32, SchedWriteVarVecShift.XMM>;
defm VPROTQ : xop3op<0x93, "vprotq", rotl, v2i64, SchedWriteVarVecShift.XMM>;
defm VPROTW : xop3op<0x91, "vprotw", rotl, v8i16, SchedWriteVarVecShift.XMM>;
defm VPSHAB : xop3op<0x98, "vpshab", X86vpsha, v16i8, SchedWriteVarVecShift.XMM>;
defm VPSHAD : xop3op<0x9A, "vpshad", X86vpsha, v4i32, SchedWriteVarVecShift.XMM>;
defm VPSHAQ : xop3op<0x9B, "vpshaq", X86vpsha, v2i64, SchedWriteVarVecShift.XMM>;
defm VPSHAW : xop3op<0x99, "vpshaw", X86vpsha, v8i16, SchedWriteVarVecShift.XMM>;
defm VPSHLB : xop3op<0x94, "vpshlb", X86vpshl, v16i8, SchedWriteVarVecShift.XMM>;
defm VPSHLD : xop3op<0x96, "vpshld", X86vpshl, v4i32, SchedWriteVarVecShift.XMM>;
defm VPSHLQ : xop3op<0x97, "vpshlq", X86vpshl, v2i64, SchedWriteVarVecShift.XMM>;
defm VPSHLW : xop3op<0x95, "vpshlw", X86vpshl, v8i16, SchedWriteVarVecShift.XMM>;
}
multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType vt128, X86FoldableSchedWrite sched> {
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>,
XOP, Sched<[sched]>;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>,
XOP, Sched<[sched.Folded, ReadAfterLd]>;
}
let ExeDomain = SSEPackedInt in {
defm VPROTB : xop3opimm<0xC0, "vprotb", X86vrotli, v16i8,
SchedWriteVecShiftImm.XMM>;
defm VPROTD : xop3opimm<0xC2, "vprotd", X86vrotli, v4i32,
SchedWriteVecShiftImm.XMM>;
defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vrotli, v2i64,
SchedWriteVecShiftImm.XMM>;
defm VPROTW : xop3opimm<0xC1, "vprotw", X86vrotli, v8i16,
SchedWriteVecShiftImm.XMM>;
}
// Instruction where second source can be memory, but third must be register
multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int,
X86FoldableSchedWrite sched> {
let isCommutable = 1 in
def rr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP_4V,
Sched<[sched]>;
def rm : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
}
let ExeDomain = SSEPackedInt in {
defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd",
int_x86_xop_vpmadcswd, SchedWriteVecIMul.XMM>;
defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd",
int_x86_xop_vpmadcsswd, SchedWriteVecIMul.XMM>;
defm VPMACSWW : xop4opm2<0x95, "vpmacsww",
int_x86_xop_vpmacsww, SchedWriteVecIMul.XMM>;
defm VPMACSWD : xop4opm2<0x96, "vpmacswd",
int_x86_xop_vpmacswd, SchedWriteVecIMul.XMM>;
defm VPMACSSWW : xop4opm2<0x85, "vpmacssww",
int_x86_xop_vpmacssww, SchedWriteVecIMul.XMM>;
defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd",
int_x86_xop_vpmacsswd, SchedWriteVecIMul.XMM>;
defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql",
int_x86_xop_vpmacssdql, SchedWritePMULLD.XMM>;
defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh",
int_x86_xop_vpmacssdqh, SchedWritePMULLD.XMM>;
defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd",
int_x86_xop_vpmacssdd, SchedWritePMULLD.XMM>;
defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql",
int_x86_xop_vpmacsdql, SchedWritePMULLD.XMM>;
defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh",
int_x86_xop_vpmacsdqh, SchedWritePMULLD.XMM>;
defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd",
int_x86_xop_vpmacsdd, SchedWritePMULLD.XMM>;
}
// IFMA patterns - for cases where we can safely ignore the overflow bits from
// the multiply or easily match with existing intrinsics.
let Predicates = [HasXOP] in {
def : Pat<(v8i16 (add (mul (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
(v8i16 VR128:$src3))),
(VPMACSWWrr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(v4i32 (add (mul (v4i32 VR128:$src1), (v4i32 VR128:$src2)),
(v4i32 VR128:$src3))),
(VPMACSDDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(v2i64 (add (X86pmuldq (bc_v2i64 (X86PShufd (v4i32 VR128:$src1), (i8 -11))),
(bc_v2i64 (X86PShufd (v4i32 VR128:$src2), (i8 -11)))),
(v2i64 VR128:$src3))),
(VPMACSDQHrr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(v2i64 (add (X86pmuldq (v2i64 VR128:$src1), (v2i64 VR128:$src2)),
(v2i64 VR128:$src3))),
(VPMACSDQLrr VR128:$src1, VR128:$src2, VR128:$src3)>;
def : Pat<(v4i32 (add (X86vpmaddwd (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
(v4i32 VR128:$src3))),
(VPMADCSWDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
}
// Transforms to swizzle an immediate to help matching memory operand in first
// operand.
def CommuteVPCOMCC : SDNodeXForm<imm, [{
uint8_t Imm = N->getZExtValue() & 0x7;
Imm = X86::getSwappedVPCOMImm(Imm);
return getI8Imm(Imm, SDLoc(N));
}]>;
// Instruction where second source can be memory, third must be imm8
multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
X86FoldableSchedWrite sched> {
let ExeDomain = SSEPackedInt in { // SSE integer instructions
let isCommutable = 1 in
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, XOPCC:$cc),
!strconcat("vpcom${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
imm:$cc)))]>,
XOP_4V, Sched<[sched]>;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, XOPCC:$cc),
!strconcat("vpcom${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
(vt128 (bitconvert (loadv2i64 addr:$src2))),
imm:$cc)))]>,
XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
!strconcat("vpcom", Suffix,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V, Sched<[sched]>, NotMemoryFoldable;
let mayLoad = 1 in
def mi_alt : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat("vpcom", Suffix,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>,
NotMemoryFoldable;
}
}
def : Pat<(OpNode (bitconvert (loadv2i64 addr:$src2)),
(vt128 VR128:$src1), imm:$cc),
(!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2,
(CommuteVPCOMCC imm:$cc))>;
}
defm VPCOMB : xopvpcom<0xCC, "b", X86vpcom, v16i8, SchedWriteVecALU.XMM>;
defm VPCOMW : xopvpcom<0xCD, "w", X86vpcom, v8i16, SchedWriteVecALU.XMM>;
defm VPCOMD : xopvpcom<0xCE, "d", X86vpcom, v4i32, SchedWriteVecALU.XMM>;
defm VPCOMQ : xopvpcom<0xCF, "q", X86vpcom, v2i64, SchedWriteVecALU.XMM>;
defm VPCOMUB : xopvpcom<0xEC, "ub", X86vpcomu, v16i8, SchedWriteVecALU.XMM>;
defm VPCOMUW : xopvpcom<0xED, "uw", X86vpcomu, v8i16, SchedWriteVecALU.XMM>;
defm VPCOMUD : xopvpcom<0xEE, "ud", X86vpcomu, v4i32, SchedWriteVecALU.XMM>;
defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64, SchedWriteVecALU.XMM>;
multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType vt128, X86FoldableSchedWrite sched> {
def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 VR128:$src3))))]>,
XOP_4V, Sched<[sched]>;
def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
(vt128 VR128:$src3))))]>,
XOP_4V, Sched<[sched.Folded, ReadAfterLd,
// 128mem:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// VR128:$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>;
}
let ExeDomain = SSEPackedInt in {
defm VPPERM : xop4op<0xA3, "vpperm", X86vpperm, v16i8,
SchedWriteVarShuffle.XMM>;
}
// Instruction where either second or third source can be memory
multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType VT,
X86FoldableSchedWrite sched> {
def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
(X86andnp RC:$src3, RC:$src2))))]>, XOP_4V,
Sched<[sched]>;
def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
(X86andnp (load addr:$src3), RC:$src2))))]>,
XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
(X86andnp RC:$src3, (load addr:$src2)))))]>,
XOP_4V, Sched<[sched.Folded, ReadAfterLd,
// x86memop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
ReadDefault,
// RC::$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>;
}
let ExeDomain = SSEPackedInt in {
defm VPCMOV : xop4op_int<0xA2, "vpcmov", VR128, i128mem, v2i64,
SchedWriteShuffle.XMM>;
defm VPCMOVY : xop4op_int<0xA2, "vpcmov", VR256, i256mem, v4i64,
SchedWriteShuffle.YMM>, VEX_L;
}
multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
X86MemOperand intmemop, X86MemOperand fpmemop,
ValueType VT, PatFrag FPLdFrag, PatFrag IntLdFrag,
X86FoldableSchedWrite sched> {
def rr : IXOP5<Opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>,
Sched<[sched]>;
def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, RC:$src2,
(bitconvert (IntLdFrag addr:$src3)),
(i8 imm:$src4))))]>, VEX_W,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
RC:$src3, (i8 imm:$src4))))]>,
Sched<[sched.Folded, ReadAfterLd,
// fpmemop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// RC:$src3
ReadAfterLd]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[]>, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>;
}
let ExeDomain = SSEPackedDouble in {
defm VPERMIL2PD : xop_vpermil2<0x49, "vpermil2pd", VR128, i128mem, f128mem,
v2f64, loadv2f64, loadv2i64,
SchedWriteFVarShuffle.XMM>;
defm VPERMIL2PDY : xop_vpermil2<0x49, "vpermil2pd", VR256, i256mem, f256mem,
v4f64, loadv4f64, loadv4i64,
SchedWriteFVarShuffle.YMM>, VEX_L;
}
let ExeDomain = SSEPackedSingle in {
defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem,
v4f32, loadv4f32, loadv2i64,
SchedWriteFVarShuffle.XMM>;
defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem,
v8f32, loadv8f32, loadv4i64,
SchedWriteFVarShuffle.YMM>, VEX_L;
}

View File

@@ -0,0 +1,77 @@
//===-- X86PfmCounters.td - X86 Hardware Counters ----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This describes the available hardware counters for various subtargets.
//
//===----------------------------------------------------------------------===//
let SchedModel = SandyBridgeModel in {
def SBCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
def SBPort0Counter : PfmIssueCounter<SBPort0, ["uops_dispatched_port:port_0"]>;
def SBPort1Counter : PfmIssueCounter<SBPort1, ["uops_dispatched_port:port_1"]>;
def SBPort23Counter : PfmIssueCounter<SBPort23,
["uops_dispatched_port:port_2",
"uops_dispatched_port:port_3"]>;
def SBPort4Counter : PfmIssueCounter<SBPort4, ["uops_dispatched_port:port_4"]>;
def SBPort5Counter : PfmIssueCounter<SBPort5, ["uops_dispatched_port:port_5"]>;
}
let SchedModel = HaswellModel in {
def HWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
def HWPort0Counter : PfmIssueCounter<HWPort0, ["uops_dispatched_port:port_0"]>;
def HWPort1Counter : PfmIssueCounter<HWPort1, ["uops_dispatched_port:port_1"]>;
def HWPort2Counter : PfmIssueCounter<HWPort2, ["uops_dispatched_port:port_2"]>;
def HWPort3Counter : PfmIssueCounter<HWPort3, ["uops_dispatched_port:port_3"]>;
def HWPort4Counter : PfmIssueCounter<HWPort4, ["uops_dispatched_port:port_4"]>;
def HWPort5Counter : PfmIssueCounter<HWPort5, ["uops_dispatched_port:port_5"]>;
def HWPort6Counter : PfmIssueCounter<HWPort6, ["uops_dispatched_port:port_6"]>;
def HWPort7Counter : PfmIssueCounter<HWPort7, ["uops_dispatched_port:port_7"]>;
}
let SchedModel = BroadwellModel in {
def BWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
def BWPort0Counter : PfmIssueCounter<BWPort0, ["uops_executed_port:port_0"]>;
def BWPort1Counter : PfmIssueCounter<BWPort1, ["uops_executed_port:port_1"]>;
def BWPort2Counter : PfmIssueCounter<BWPort2, ["uops_executed_port:port_2"]>;
def BWPort3Counter : PfmIssueCounter<BWPort3, ["uops_executed_port:port_3"]>;
def BWPort4Counter : PfmIssueCounter<BWPort4, ["uops_executed_port:port_4"]>;
def BWPort5Counter : PfmIssueCounter<BWPort5, ["uops_executed_port:port_5"]>;
def BWPort6Counter : PfmIssueCounter<BWPort6, ["uops_executed_port:port_6"]>;
def BWPort7Counter : PfmIssueCounter<BWPort7, ["uops_executed_port:port_7"]>;
}
let SchedModel = SkylakeClientModel in {
def SKLCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
def SKLPort0Counter : PfmIssueCounter<SKLPort0, ["uops_dispatched_port:port_0"]>;
def SKLPort1Counter : PfmIssueCounter<SKLPort1, ["uops_dispatched_port:port_1"]>;
def SKLPort2Counter : PfmIssueCounter<SKLPort2, ["uops_dispatched_port:port_2"]>;
def SKLPort3Counter : PfmIssueCounter<SKLPort3, ["uops_dispatched_port:port_3"]>;
def SKLPort4Counter : PfmIssueCounter<SKLPort4, ["uops_dispatched_port:port_4"]>;
def SKLPort5Counter : PfmIssueCounter<SKLPort5, ["uops_dispatched_port:port_5"]>;
def SKLPort6Counter : PfmIssueCounter<SKLPort6, ["uops_dispatched_port:port_6"]>;
def SKLPort7Counter : PfmIssueCounter<SKLPort7, ["uops_dispatched_port:port_7"]>;
}
let SchedModel = SkylakeServerModel in {
def SKXCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
def SKXPort0Counter : PfmIssueCounter<SKXPort0, ["uops_dispatched_port:port_0"]>;
def SKXPort1Counter : PfmIssueCounter<SKXPort1, ["uops_dispatched_port:port_1"]>;
def SKXPort2Counter : PfmIssueCounter<SKXPort2, ["uops_dispatched_port:port_2"]>;
def SKXPort3Counter : PfmIssueCounter<SKXPort3, ["uops_dispatched_port:port_3"]>;
def SKXPort4Counter : PfmIssueCounter<SKXPort4, ["uops_dispatched_port:port_4"]>;
def SKXPort5Counter : PfmIssueCounter<SKXPort5, ["uops_dispatched_port:port_5"]>;
def SKXPort6Counter : PfmIssueCounter<SKXPort6, ["uops_dispatched_port:port_6"]>;
def SKXPort7Counter : PfmIssueCounter<SKXPort7, ["uops_dispatched_port:port_7"]>;
}
let SchedModel = BtVer2Model in {
def JCycleCounter : PfmCycleCounter<"cpu_clk_unhalted">;
def JFPU0Counter : PfmIssueCounter<JFPU0, ["dispatched_fpu:pipe0"]>;
def JFPU1Counter : PfmIssueCounter<JFPU1, ["dispatched_fpu:pipe1"]>;
}

View File

@@ -0,0 +1,17 @@
//=- X86RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//
/// General Purpose Registers: RAX, RCX,...
def GPRRegBank : RegisterBank<"GPR", [GR64]>;
/// Floating Point/Vector Registers
def VECRRegBank : RegisterBank<"VECR", [VR512]>;

View File

@@ -0,0 +1,591 @@
//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the X86 Register file, defining the registers themselves,
// aliases between the registers, and the register classes built out of the
// registers.
//
//===----------------------------------------------------------------------===//
class X86Reg<string n, bits<16> Enc, list<Register> subregs = []> : Register<n> {
let Namespace = "X86";
let HWEncoding = Enc;
let SubRegs = subregs;
}
// Subregister indices.
let Namespace = "X86" in {
def sub_8bit : SubRegIndex<8>;
def sub_8bit_hi : SubRegIndex<8, 8>;
def sub_8bit_hi_phony : SubRegIndex<8, 8>;
def sub_16bit : SubRegIndex<16>;
def sub_16bit_hi : SubRegIndex<16, 16>;
def sub_32bit : SubRegIndex<32>;
def sub_xmm : SubRegIndex<128>;
def sub_ymm : SubRegIndex<256>;
}
//===----------------------------------------------------------------------===//
// Register definitions...
//
// In the register alias definitions below, we define which registers alias
// which others. We only specify which registers the small registers alias,
// because the register file generator is smart enough to figure out that
// AL aliases AX if we tell it that AX aliased AL (for example).
// Dwarf numbering is different for 32-bit and 64-bit, and there are
// variations by target as well. Currently the first entry is for X86-64,
// second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
// and debug information on X86-32/Darwin)
// 8-bit registers
// Low registers
def AL : X86Reg<"al", 0>;
def DL : X86Reg<"dl", 2>;
def CL : X86Reg<"cl", 1>;
def BL : X86Reg<"bl", 3>;
// High registers. On x86-64, these cannot be used in any instruction
// with a REX prefix.
def AH : X86Reg<"ah", 4>;
def DH : X86Reg<"dh", 6>;
def CH : X86Reg<"ch", 5>;
def BH : X86Reg<"bh", 7>;
// X86-64 only, requires REX.
let CostPerUse = 1 in {
def SIL : X86Reg<"sil", 6>;
def DIL : X86Reg<"dil", 7>;
def BPL : X86Reg<"bpl", 5>;
def SPL : X86Reg<"spl", 4>;
def R8B : X86Reg<"r8b", 8>;
def R9B : X86Reg<"r9b", 9>;
def R10B : X86Reg<"r10b", 10>;
def R11B : X86Reg<"r11b", 11>;
def R12B : X86Reg<"r12b", 12>;
def R13B : X86Reg<"r13b", 13>;
def R14B : X86Reg<"r14b", 14>;
def R15B : X86Reg<"r15b", 15>;
}
let isArtificial = 1 in {
// High byte of the low 16 bits of the super-register:
def SIH : X86Reg<"", -1>;
def DIH : X86Reg<"", -1>;
def BPH : X86Reg<"", -1>;
def SPH : X86Reg<"", -1>;
def R8BH : X86Reg<"", -1>;
def R9BH : X86Reg<"", -1>;
def R10BH : X86Reg<"", -1>;
def R11BH : X86Reg<"", -1>;
def R12BH : X86Reg<"", -1>;
def R13BH : X86Reg<"", -1>;
def R14BH : X86Reg<"", -1>;
def R15BH : X86Reg<"", -1>;
// High word of the low 32 bits of the super-register:
def HAX : X86Reg<"", -1>;
def HDX : X86Reg<"", -1>;
def HCX : X86Reg<"", -1>;
def HBX : X86Reg<"", -1>;
def HSI : X86Reg<"", -1>;
def HDI : X86Reg<"", -1>;
def HBP : X86Reg<"", -1>;
def HSP : X86Reg<"", -1>;
def HIP : X86Reg<"", -1>;
def R8WH : X86Reg<"", -1>;
def R9WH : X86Reg<"", -1>;
def R10WH : X86Reg<"", -1>;
def R11WH : X86Reg<"", -1>;
def R12WH : X86Reg<"", -1>;
def R13WH : X86Reg<"", -1>;
def R14WH : X86Reg<"", -1>;
def R15WH : X86Reg<"", -1>;
}
// 16-bit registers
let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in {
def AX : X86Reg<"ax", 0, [AL,AH]>;
def DX : X86Reg<"dx", 2, [DL,DH]>;
def CX : X86Reg<"cx", 1, [CL,CH]>;
def BX : X86Reg<"bx", 3, [BL,BH]>;
}
let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CoveredBySubRegs = 1 in {
def SI : X86Reg<"si", 6, [SIL,SIH]>;
def DI : X86Reg<"di", 7, [DIL,DIH]>;
def BP : X86Reg<"bp", 5, [BPL,BPH]>;
def SP : X86Reg<"sp", 4, [SPL,SPH]>;
}
def IP : X86Reg<"ip", 0>;
// X86-64 only, requires REX.
let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CostPerUse = 1,
CoveredBySubRegs = 1 in {
def R8W : X86Reg<"r8w", 8, [R8B,R8BH]>;
def R9W : X86Reg<"r9w", 9, [R9B,R9BH]>;
def R10W : X86Reg<"r10w", 10, [R10B,R10BH]>;
def R11W : X86Reg<"r11w", 11, [R11B,R11BH]>;
def R12W : X86Reg<"r12w", 12, [R12B,R12BH]>;
def R13W : X86Reg<"r13w", 13, [R13B,R13BH]>;
def R14W : X86Reg<"r14w", 14, [R14B,R14BH]>;
def R15W : X86Reg<"r15w", 15, [R15B,R15BH]>;
}
// 32-bit registers
let SubRegIndices = [sub_16bit, sub_16bit_hi], CoveredBySubRegs = 1 in {
def EAX : X86Reg<"eax", 0, [AX, HAX]>, DwarfRegNum<[-2, 0, 0]>;
def EDX : X86Reg<"edx", 2, [DX, HDX]>, DwarfRegNum<[-2, 2, 2]>;
def ECX : X86Reg<"ecx", 1, [CX, HCX]>, DwarfRegNum<[-2, 1, 1]>;
def EBX : X86Reg<"ebx", 3, [BX, HBX]>, DwarfRegNum<[-2, 3, 3]>;
def ESI : X86Reg<"esi", 6, [SI, HSI]>, DwarfRegNum<[-2, 6, 6]>;
def EDI : X86Reg<"edi", 7, [DI, HDI]>, DwarfRegNum<[-2, 7, 7]>;
def EBP : X86Reg<"ebp", 5, [BP, HBP]>, DwarfRegNum<[-2, 4, 5]>;
def ESP : X86Reg<"esp", 4, [SP, HSP]>, DwarfRegNum<[-2, 5, 4]>;
def EIP : X86Reg<"eip", 0, [IP, HIP]>, DwarfRegNum<[-2, 8, 8]>;
}
// X86-64 only, requires REX
let SubRegIndices = [sub_16bit, sub_16bit_hi], CostPerUse = 1,
CoveredBySubRegs = 1 in {
def R8D : X86Reg<"r8d", 8, [R8W,R8WH]>;
def R9D : X86Reg<"r9d", 9, [R9W,R9WH]>;
def R10D : X86Reg<"r10d", 10, [R10W,R10WH]>;
def R11D : X86Reg<"r11d", 11, [R11W,R11WH]>;
def R12D : X86Reg<"r12d", 12, [R12W,R12WH]>;
def R13D : X86Reg<"r13d", 13, [R13W,R13WH]>;
def R14D : X86Reg<"r14d", 14, [R14W,R14WH]>;
def R15D : X86Reg<"r15d", 15, [R15W,R15WH]>;
}
// 64-bit registers, X86-64 only
let SubRegIndices = [sub_32bit] in {
def RAX : X86Reg<"rax", 0, [EAX]>, DwarfRegNum<[0, -2, -2]>;
def RDX : X86Reg<"rdx", 2, [EDX]>, DwarfRegNum<[1, -2, -2]>;
def RCX : X86Reg<"rcx", 1, [ECX]>, DwarfRegNum<[2, -2, -2]>;
def RBX : X86Reg<"rbx", 3, [EBX]>, DwarfRegNum<[3, -2, -2]>;
def RSI : X86Reg<"rsi", 6, [ESI]>, DwarfRegNum<[4, -2, -2]>;
def RDI : X86Reg<"rdi", 7, [EDI]>, DwarfRegNum<[5, -2, -2]>;
def RBP : X86Reg<"rbp", 5, [EBP]>, DwarfRegNum<[6, -2, -2]>;
def RSP : X86Reg<"rsp", 4, [ESP]>, DwarfRegNum<[7, -2, -2]>;
// These also require REX.
let CostPerUse = 1 in {
def R8 : X86Reg<"r8", 8, [R8D]>, DwarfRegNum<[ 8, -2, -2]>;
def R9 : X86Reg<"r9", 9, [R9D]>, DwarfRegNum<[ 9, -2, -2]>;
def R10 : X86Reg<"r10", 10, [R10D]>, DwarfRegNum<[10, -2, -2]>;
def R11 : X86Reg<"r11", 11, [R11D]>, DwarfRegNum<[11, -2, -2]>;
def R12 : X86Reg<"r12", 12, [R12D]>, DwarfRegNum<[12, -2, -2]>;
def R13 : X86Reg<"r13", 13, [R13D]>, DwarfRegNum<[13, -2, -2]>;
def R14 : X86Reg<"r14", 14, [R14D]>, DwarfRegNum<[14, -2, -2]>;
def R15 : X86Reg<"r15", 15, [R15D]>, DwarfRegNum<[15, -2, -2]>;
def RIP : X86Reg<"rip", 0, [EIP]>, DwarfRegNum<[16, -2, -2]>;
}}
// MMX Registers. These are actually aliased to ST0 .. ST7
def MM0 : X86Reg<"mm0", 0>, DwarfRegNum<[41, 29, 29]>;
def MM1 : X86Reg<"mm1", 1>, DwarfRegNum<[42, 30, 30]>;
def MM2 : X86Reg<"mm2", 2>, DwarfRegNum<[43, 31, 31]>;
def MM3 : X86Reg<"mm3", 3>, DwarfRegNum<[44, 32, 32]>;
def MM4 : X86Reg<"mm4", 4>, DwarfRegNum<[45, 33, 33]>;
def MM5 : X86Reg<"mm5", 5>, DwarfRegNum<[46, 34, 34]>;
def MM6 : X86Reg<"mm6", 6>, DwarfRegNum<[47, 35, 35]>;
def MM7 : X86Reg<"mm7", 7>, DwarfRegNum<[48, 36, 36]>;
// Pseudo Floating Point registers
def FP0 : X86Reg<"fp0", 0>;
def FP1 : X86Reg<"fp1", 0>;
def FP2 : X86Reg<"fp2", 0>;
def FP3 : X86Reg<"fp3", 0>;
def FP4 : X86Reg<"fp4", 0>;
def FP5 : X86Reg<"fp5", 0>;
def FP6 : X86Reg<"fp6", 0>;
def FP7 : X86Reg<"fp7", 0>;
// XMM Registers, used by the various SSE instruction set extensions.
def XMM0: X86Reg<"xmm0", 0>, DwarfRegNum<[17, 21, 21]>;
def XMM1: X86Reg<"xmm1", 1>, DwarfRegNum<[18, 22, 22]>;
def XMM2: X86Reg<"xmm2", 2>, DwarfRegNum<[19, 23, 23]>;
def XMM3: X86Reg<"xmm3", 3>, DwarfRegNum<[20, 24, 24]>;
def XMM4: X86Reg<"xmm4", 4>, DwarfRegNum<[21, 25, 25]>;
def XMM5: X86Reg<"xmm5", 5>, DwarfRegNum<[22, 26, 26]>;
def XMM6: X86Reg<"xmm6", 6>, DwarfRegNum<[23, 27, 27]>;
def XMM7: X86Reg<"xmm7", 7>, DwarfRegNum<[24, 28, 28]>;
// X86-64 only
let CostPerUse = 1 in {
def XMM8: X86Reg<"xmm8", 8>, DwarfRegNum<[25, -2, -2]>;
def XMM9: X86Reg<"xmm9", 9>, DwarfRegNum<[26, -2, -2]>;
def XMM10: X86Reg<"xmm10", 10>, DwarfRegNum<[27, -2, -2]>;
def XMM11: X86Reg<"xmm11", 11>, DwarfRegNum<[28, -2, -2]>;
def XMM12: X86Reg<"xmm12", 12>, DwarfRegNum<[29, -2, -2]>;
def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>;
def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>;
def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>;
def XMM16: X86Reg<"xmm16", 16>, DwarfRegNum<[67, -2, -2]>;
def XMM17: X86Reg<"xmm17", 17>, DwarfRegNum<[68, -2, -2]>;
def XMM18: X86Reg<"xmm18", 18>, DwarfRegNum<[69, -2, -2]>;
def XMM19: X86Reg<"xmm19", 19>, DwarfRegNum<[70, -2, -2]>;
def XMM20: X86Reg<"xmm20", 20>, DwarfRegNum<[71, -2, -2]>;
def XMM21: X86Reg<"xmm21", 21>, DwarfRegNum<[72, -2, -2]>;
def XMM22: X86Reg<"xmm22", 22>, DwarfRegNum<[73, -2, -2]>;
def XMM23: X86Reg<"xmm23", 23>, DwarfRegNum<[74, -2, -2]>;
def XMM24: X86Reg<"xmm24", 24>, DwarfRegNum<[75, -2, -2]>;
def XMM25: X86Reg<"xmm25", 25>, DwarfRegNum<[76, -2, -2]>;
def XMM26: X86Reg<"xmm26", 26>, DwarfRegNum<[77, -2, -2]>;
def XMM27: X86Reg<"xmm27", 27>, DwarfRegNum<[78, -2, -2]>;
def XMM28: X86Reg<"xmm28", 28>, DwarfRegNum<[79, -2, -2]>;
def XMM29: X86Reg<"xmm29", 29>, DwarfRegNum<[80, -2, -2]>;
def XMM30: X86Reg<"xmm30", 30>, DwarfRegNum<[81, -2, -2]>;
def XMM31: X86Reg<"xmm31", 31>, DwarfRegNum<[82, -2, -2]>;
} // CostPerUse
// YMM0-15 registers, used by AVX instructions and
// YMM16-31 registers, used by AVX-512 instructions.
let SubRegIndices = [sub_xmm] in {
foreach Index = 0-31 in {
def YMM#Index : X86Reg<"ymm"#Index, Index, [!cast<X86Reg>("XMM"#Index)]>,
DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
}
}
// ZMM Registers, used by AVX-512 instructions.
let SubRegIndices = [sub_ymm] in {
foreach Index = 0-31 in {
def ZMM#Index : X86Reg<"zmm"#Index, Index, [!cast<X86Reg>("YMM"#Index)]>,
DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
}
}
// Mask Registers, used by AVX-512 instructions.
def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, 93, 93]>;
def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, 94, 94]>;
def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, 95, 95]>;
def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, 96, 96]>;
def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, 97, 97]>;
def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, 98, 98]>;
def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, 99, 99]>;
def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, 100, 100]>;
// Floating point stack registers. These don't map one-to-one to the FP
// pseudo registers, but we still mark them as aliasing FP registers. That
// way both kinds can be live without exceeding the stack depth. ST registers
// are only live around inline assembly.
def ST0 : X86Reg<"st(0)", 0>, DwarfRegNum<[33, 12, 11]>;
def ST1 : X86Reg<"st(1)", 1>, DwarfRegNum<[34, 13, 12]>;
def ST2 : X86Reg<"st(2)", 2>, DwarfRegNum<[35, 14, 13]>;
def ST3 : X86Reg<"st(3)", 3>, DwarfRegNum<[36, 15, 14]>;
def ST4 : X86Reg<"st(4)", 4>, DwarfRegNum<[37, 16, 15]>;
def ST5 : X86Reg<"st(5)", 5>, DwarfRegNum<[38, 17, 16]>;
def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]>;
def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
// Floating-point status word
def FPSW : X86Reg<"fpsw", 0>;
// Status flags register.
//
// Note that some flags that are commonly thought of as part of the status
// flags register are modeled separately. Typically this is due to instructions
// reading and updating those flags independently of all the others. We don't
// want to create false dependencies between these instructions and so we use
// a separate register to model them.
def EFLAGS : X86Reg<"flags", 0>;
// The direction flag.
def DF : X86Reg<"dirflag", 0>;
// Segment registers
def CS : X86Reg<"cs", 1>;
def DS : X86Reg<"ds", 3>;
def SS : X86Reg<"ss", 2>;
def ES : X86Reg<"es", 0>;
def FS : X86Reg<"fs", 4>;
def GS : X86Reg<"gs", 5>;
// Debug registers
def DR0 : X86Reg<"dr0", 0>;
def DR1 : X86Reg<"dr1", 1>;
def DR2 : X86Reg<"dr2", 2>;
def DR3 : X86Reg<"dr3", 3>;
def DR4 : X86Reg<"dr4", 4>;
def DR5 : X86Reg<"dr5", 5>;
def DR6 : X86Reg<"dr6", 6>;
def DR7 : X86Reg<"dr7", 7>;
def DR8 : X86Reg<"dr8", 8>;
def DR9 : X86Reg<"dr9", 9>;
def DR10 : X86Reg<"dr10", 10>;
def DR11 : X86Reg<"dr11", 11>;
def DR12 : X86Reg<"dr12", 12>;
def DR13 : X86Reg<"dr13", 13>;
def DR14 : X86Reg<"dr14", 14>;
def DR15 : X86Reg<"dr15", 15>;
// Control registers
def CR0 : X86Reg<"cr0", 0>;
def CR1 : X86Reg<"cr1", 1>;
def CR2 : X86Reg<"cr2", 2>;
def CR3 : X86Reg<"cr3", 3>;
def CR4 : X86Reg<"cr4", 4>;
def CR5 : X86Reg<"cr5", 5>;
def CR6 : X86Reg<"cr6", 6>;
def CR7 : X86Reg<"cr7", 7>;
def CR8 : X86Reg<"cr8", 8>;
def CR9 : X86Reg<"cr9", 9>;
def CR10 : X86Reg<"cr10", 10>;
def CR11 : X86Reg<"cr11", 11>;
def CR12 : X86Reg<"cr12", 12>;
def CR13 : X86Reg<"cr13", 13>;
def CR14 : X86Reg<"cr14", 14>;
def CR15 : X86Reg<"cr15", 15>;
// Pseudo index registers
def EIZ : X86Reg<"eiz", 4>;
def RIZ : X86Reg<"riz", 4>;
// Bound registers, used in MPX instructions
def BND0 : X86Reg<"bnd0", 0>;
def BND1 : X86Reg<"bnd1", 1>;
def BND2 : X86Reg<"bnd2", 2>;
def BND3 : X86Reg<"bnd3", 3>;
// CET registers - Shadow Stack Pointer
def SSP : X86Reg<"ssp", 0>;
//===----------------------------------------------------------------------===//
// Register Class Definitions... now that we have all of the pieces, define the
// top-level register classes. The order specified in the register list is
// implicitly defined to be the register allocation order.
//
// List call-clobbered registers before callee-save registers. RBX, RBP, (and
// R12, R13, R14, and R15 for X86-64) are callee-save registers.
// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
// R8B, ... R15B.
// Allocate R12 and R13 last, as these require an extra byte when
// encoded in x86_64 instructions.
// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
// 64-bit mode. The main complication is that they cannot be encoded in an
// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
// cannot be encoded.
def GR8 : RegisterClass<"X86", [i8], 8,
(add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
let AltOrders = [(sub GR8, AH, BH, CH, DH)];
let AltOrderSelect = [{
return MF.getSubtarget<X86Subtarget>().is64Bit();
}];
}
let isAllocatable = 0 in
def GRH8 : RegisterClass<"X86", [i8], 8,
(add SIH, DIH, BPH, SPH, R8BH, R9BH, R10BH, R11BH,
R12BH, R13BH, R14BH, R15BH)>;
def GR16 : RegisterClass<"X86", [i16], 16,
(add AX, CX, DX, SI, DI, BX, BP, SP,
R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)>;
let isAllocatable = 0 in
def GRH16 : RegisterClass<"X86", [i16], 16,
(add HAX, HCX, HDX, HSI, HDI, HBX, HBP, HSP, HIP,
R8WH, R9WH, R10WH, R11WH, R12WH, R13WH, R14WH,
R15WH)>;
def GR32 : RegisterClass<"X86", [i32], 32,
(add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>;
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
// RIP isn't really a register and it can't be used anywhere except in an
// address, but it doesn't cause trouble.
// FIXME: it *does* cause trouble - CheckBaseRegAndIndexReg() has extra
// tests because of the inclusion of RIP in this register class.
def GR64 : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
// Segment registers for use by MOV instructions (and others) that have a
// segment register as one operand. Always contain a 16-bit segment
// descriptor.
def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>;
// Debug registers.
def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 15)>;
// Control registers.
def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>;
// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers
// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
// and GR64_ABCD are classes for registers that support 8-bit h-register
// operations.
def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>;
def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>;
def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>;
def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>;
def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
R8, R9, R11, RIP)>;
def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
R8, R9, R10, R11, RIP)>;
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
(add AL, CL, DL, AH, CH, DH, BL, BH)> {
let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
let AltOrderSelect = [{
return MF.getSubtarget<X86Subtarget>().is64Bit();
}];
}
// GR16_NOREX - GR16 registers which do not require a REX prefix.
def GR16_NOREX : RegisterClass<"X86", [i16], 16,
(add AX, CX, DX, SI, DI, BX, BP, SP)>;
// GR32_NOREX - GR32 registers which do not require a REX prefix.
def GR32_NOREX : RegisterClass<"X86", [i32], 32,
(add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)>;
// GR64_NOREX - GR64 registers which do not require a REX prefix.
def GR64_NOREX : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>;
// GR32_NOSP - GR32 registers except ESP.
def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>;
// GR64_NOSP - GR64 registers except RSP (and RIP).
def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)>;
// GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
// ESP.
def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
(and GR32_NOREX, GR32_NOSP)>;
// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
(and GR64_NOREX, GR64_NOSP)>;
// Register classes used for ABIs that use 32-bit address accesses,
// while using the whole x84_64 ISA.
// In such cases, it is fine to use RIP as we are sure the 32 high
// bits are not set. We do not need variants for NOSP as RIP is not
// allowed there.
// RIP is not spilled anywhere for now, so stick to 32-bit alignment
// to save on memory space.
// FIXME: We could allow all 64bit registers, but we would need
// something to check that the 32 high bits are not set,
// which we do not have right now.
def LOW32_ADDR_ACCESS : RegisterClass<"X86", [i32], 32, (add GR32, RIP)>;
// When RBP is used as a base pointer in a 32-bit addresses environment,
// this is also safe to use the full register to access addresses.
// Since RBP will never be spilled, stick to a 32 alignment to save
// on memory consumption.
def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32,
(add LOW32_ADDR_ACCESS, RBP)>;
// A class to support the 'A' assembler constraint: [ER]AX then [ER]DX.
def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>;
// Scalar SSE2 floating point registers.
def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
// FIXME: This sets up the floating point register files as though they are f64
// values, though they really are f80 values. This will cause us to spill
// values as 64-bit quantities instead of 80-bit quantities, which is much much
// faster on common hardware. In reality, this should be controlled by a
// command line option or something.
def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
// Floating point stack registers (these are not allocatable by the
// register allocator - the floating point stackifier is responsible
// for transforming FPn allocations to STn registers)
def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
let isAllocatable = 0;
}
// Generic vector registers: VR64 and VR128.
// Ensure that float types are declared first - only float is legal on SSE1.
def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
128, (add FR32)>;
def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 15)>;
// Special classes that help the assembly parser choose some alternate
// instructions to favor 2-byte VEX encodings.
def VR128L : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
128, (sequence "XMM%u", 0, 7)>;
def VR128H : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
128, (sequence "XMM%u", 8, 15)>;
def VR256L : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 7)>;
def VR256H : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 8, 15)>;
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
let CopyCost = -1; // Don't allow copying of status registers.
let isAllocatable = 0;
}
def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
let CopyCost = -1; // Don't allow copying of status registers.
let isAllocatable = 0;
}
def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> {
let CopyCost = -1; // Don't allow copying of status registers.
let isAllocatable = 0;
}
// AVX-512 vector/mask registers.
def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
512, (sequence "ZMM%u", 0, 31)>;
// Scalar AVX-512 floating point registers.
def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
// Extended VR128 and VR256 for AVX-512 instructions
def VR128X : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
128, (add FR32X)>;
def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 31)>;
// Mask registers
def VK1 : RegisterClass<"X86", [v1i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;}
def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;}
def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;}
def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;}
def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
def VK1WM : RegisterClass<"X86", [v1i1], 16, (sub VK1, K0)> {let Size = 16;}
def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;}
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
// Bound registers
def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,49 @@
//===-- X86SchedPredicates.td - X86 Scheduling Predicates --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines scheduling predicate definitions that are common to
// all X86 subtargets.
//
//===----------------------------------------------------------------------===//
// A predicate used to identify dependency-breaking instructions that clear the
// content of the destination register. Note that this predicate only checks if
// input registers are the same. This predicate doesn't make any assumptions on
// the expected instruction opcodes, because different processors may implement
// different zero-idioms.
def ZeroIdiomPredicate : CheckSameRegOperand<1, 2>;
// A predicate used to check if an instruction is a LEA, and if it uses all
// three source operands: base, index, and offset.
def IsThreeOperandsLEAPredicate: CheckAll<[
CheckOpcode<[LEA32r, LEA64r, LEA64_32r, LEA16r]>,
// isRegOperand(Base)
CheckIsRegOperand<1>,
CheckNot<CheckInvalidRegOperand<1>>,
// isRegOperand(Index)
CheckIsRegOperand<3>,
CheckNot<CheckInvalidRegOperand<3>>,
// hasLEAOffset(Offset)
CheckAny<[
CheckAll<[
CheckIsImmOperand<4>,
CheckNot<CheckZeroOperand<4>>
]>,
CheckNonPortable<"MI.getOperand(4).isGlobal()">
]>
]>;
// This predicate evaluates to true only if the input machine instruction is a
// 3-operands LEA. Tablegen automatically generates a new method for it in
// X86GenInstrInfo.
def IsThreeOperandsLEAFn :
TIIPredicate<"X86", "isThreeOperandsLEA", IsThreeOperandsLEAPredicate>;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,661 @@
//===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// InstrSchedModel annotations for out-of-order CPUs.
// Instructions with folded loads need to read the memory operand immediately,
// but other register operands don't have to be read until the load is ready.
// These operands are marked with ReadAfterLd.
def ReadAfterLd : SchedRead;
// Instructions with both a load and a store folded are modeled as a folded
// load + WriteRMW.
def WriteRMW : SchedWrite;
// Helper to set SchedWrite ExePorts/Latency/ResourceCycles/NumMicroOps.
multiclass X86WriteRes<SchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res, int UOps> {
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let ResourceCycles = Res;
let NumMicroOps = UOps;
}
}
// Most instructions can fold loads, so almost every SchedWrite comes in two
// variants: With and without a folded load.
// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
// with a folded load.
class X86FoldableSchedWrite : SchedWrite {
// The SchedWrite to use when a load is folded into the instruction.
SchedWrite Folded;
}
// Multiclass that produces a linked pair of SchedWrites.
multiclass X86SchedWritePair {
// Register-Memory operation.
def Ld : SchedWrite;
// Register-Register operation.
def NAME : X86FoldableSchedWrite {
let Folded = !cast<SchedWrite>(NAME#"Ld");
}
}
// Helpers to mark SchedWrites as unsupported.
multiclass X86WriteResUnsupported<SchedWrite SchedRW> {
let Unsupported = 1 in {
def : WriteRes<SchedRW, []>;
}
}
multiclass X86WriteResPairUnsupported<X86FoldableSchedWrite SchedRW> {
let Unsupported = 1 in {
def : WriteRes<SchedRW, []>;
def : WriteRes<SchedRW.Folded, []>;
}
}
// Multiclass that wraps X86FoldableSchedWrite for each vector width.
class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
X86FoldableSchedWrite s128,
X86FoldableSchedWrite s256,
X86FoldableSchedWrite s512> {
X86FoldableSchedWrite Scl = sScl; // Scalar float/double operations.
X86FoldableSchedWrite MMX = sScl; // MMX operations.
X86FoldableSchedWrite XMM = s128; // XMM operations.
X86FoldableSchedWrite YMM = s256; // YMM operations.
X86FoldableSchedWrite ZMM = s512; // ZMM operations.
}
// Multiclass that wraps X86SchedWriteWidths for each fp vector type.
class X86SchedWriteSizes<X86SchedWriteWidths sPS,
X86SchedWriteWidths sPD> {
X86SchedWriteWidths PS = sPS;
X86SchedWriteWidths PD = sPD;
}
// Multiclass that wraps move/load/store triple for a vector width.
class X86SchedWriteMoveLS<SchedWrite MoveRR,
SchedWrite LoadRM,
SchedWrite StoreMR> {
SchedWrite RR = MoveRR;
SchedWrite RM = LoadRM;
SchedWrite MR = StoreMR;
}
// Multiclass that wraps X86SchedWriteMoveLS for each vector width.
class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
X86SchedWriteMoveLS s128,
X86SchedWriteMoveLS s256,
X86SchedWriteMoveLS s512> {
X86SchedWriteMoveLS Scl = sScl; // Scalar float/double operations.
X86SchedWriteMoveLS MMX = sScl; // MMX operations.
X86SchedWriteMoveLS XMM = s128; // XMM operations.
X86SchedWriteMoveLS YMM = s256; // YMM operations.
X86SchedWriteMoveLS ZMM = s512; // ZMM operations.
}
// Loads, stores, and moves, not folded with other operations.
def WriteLoad : SchedWrite;
def WriteStore : SchedWrite;
def WriteStoreNT : SchedWrite;
def WriteMove : SchedWrite;
// Arithmetic.
defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
defm WriteADC : X86SchedWritePair; // Integer ALU + flags op.
def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
def WriteADCRMW : WriteSequence<[WriteADCLd, WriteStore]>;
defm WriteIMul : X86SchedWritePair; // Integer multiplication.
defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
def WriteIMulH : SchedWrite; // Integer multiplication, high part.
def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
// Integer division.
defm WriteDiv8 : X86SchedWritePair;
defm WriteDiv16 : X86SchedWritePair;
defm WriteDiv32 : X86SchedWritePair;
defm WriteDiv64 : X86SchedWritePair;
defm WriteIDiv8 : X86SchedWritePair;
defm WriteIDiv16 : X86SchedWritePair;
defm WriteIDiv32 : X86SchedWritePair;
defm WriteIDiv64 : X86SchedWritePair;
defm WriteBSF : X86SchedWritePair; // Bit scan forward.
defm WriteBSR : X86SchedWritePair; // Bit scan reverse.
defm WritePOPCNT : X86SchedWritePair; // Bit population count.
defm WriteLZCNT : X86SchedWritePair; // Leading zero count.
defm WriteTZCNT : X86SchedWritePair; // Trailing zero count.
defm WriteCMOV : X86SchedWritePair; // Conditional move.
defm WriteCMOV2 : X86SchedWritePair; // Conditional (CF + ZF flag) move.
def WriteFCMOV : SchedWrite; // X87 conditional move.
def WriteSETCC : SchedWrite; // Set register based on condition code.
def WriteSETCCStore : SchedWrite;
def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support
// Integer shifts and rotates.
defm WriteShift : X86SchedWritePair;
// Double shift instructions.
def WriteSHDrri : SchedWrite;
def WriteSHDrrcl : SchedWrite;
def WriteSHDmri : SchedWrite;
def WriteSHDmrcl : SchedWrite;
// BMI1 BEXTR, BMI2 BZHI
defm WriteBEXTR : X86SchedWritePair;
defm WriteBZHI : X86SchedWritePair;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
def WriteZero : SchedWrite;
// Branches don't produce values, so they have no latency, but they still
// consume resources. Indirect branches can fold loads.
defm WriteJump : X86SchedWritePair;
// Floating point. This covers both scalar and vector operations.
def WriteFLD0 : SchedWrite;
def WriteFLD1 : SchedWrite;
def WriteFLDC : SchedWrite;
def WriteFLoad : SchedWrite;
def WriteFLoadX : SchedWrite;
def WriteFLoadY : SchedWrite;
def WriteFMaskedLoad : SchedWrite;
def WriteFMaskedLoadY : SchedWrite;
def WriteFStore : SchedWrite;
def WriteFStoreX : SchedWrite;
def WriteFStoreY : SchedWrite;
def WriteFStoreNT : SchedWrite;
def WriteFStoreNTX : SchedWrite;
def WriteFStoreNTY : SchedWrite;
def WriteFMaskedStore : SchedWrite;
def WriteFMaskedStoreY : SchedWrite;
def WriteFMove : SchedWrite;
def WriteFMoveX : SchedWrite;
def WriteFMoveY : SchedWrite;
defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM).
defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM).
defm WriteFAddZ : X86SchedWritePair; // Floating point add/sub (ZMM).
defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub.
defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM).
defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM).
defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM).
defm WriteFCmp : X86SchedWritePair; // Floating point compare.
defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM).
defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM).
defm WriteFCmpZ : X86SchedWritePair; // Floating point compare (ZMM).
defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare.
defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM).
defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM).
defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM).
defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM).
defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM).
defm WriteFMulZ : X86SchedWritePair; // Floating point multiplication (YMM).
defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication.
defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM).
defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM).
defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM).
defm WriteFDiv : X86SchedWritePair; // Floating point division.
defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM).
defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM).
defm WriteFDivZ : X86SchedWritePair; // Floating point division (ZMM).
defm WriteFDiv64 : X86SchedWritePair; // Floating point double division.
defm WriteFDiv64X : X86SchedWritePair; // Floating point double division (XMM).
defm WriteFDiv64Y : X86SchedWritePair; // Floating point double division (YMM).
defm WriteFDiv64Z : X86SchedWritePair; // Floating point double division (ZMM).
defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM).
defm WriteFSqrt64 : X86SchedWritePair; // Floating point double square root.
defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM).
defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM).
defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM).
defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root.
defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate.
defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM).
defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM).
defm WriteFRcpZ : X86SchedWritePair; // Floating point reciprocal estimate (ZMM).
defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM).
defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM).
defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM).
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM).
defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM).
defm WriteFMAZ : X86SchedWritePair; // Fused Multiply Add (ZMM).
defm WriteDPPD : X86SchedWritePair; // Floating point double dot product.
defm WriteDPPS : X86SchedWritePair; // Floating point single dot product.
defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM).
defm WriteDPPSZ : X86SchedWritePair; // Floating point single dot product (ZMM).
defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
defm WriteFRnd : X86SchedWritePair; // Floating point rounding.
defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM).
defm WriteFRndZ : X86SchedWritePair; // Floating point rounding (ZMM).
defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM).
defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM).
defm WriteFTest : X86SchedWritePair; // Floating point TEST instructions.
defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM).
defm WriteFTestZ : X86SchedWritePair; // Floating point TEST instructions (ZMM).
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM).
defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM).
defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM).
defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM).
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM).
defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM).
defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM).
defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM).
// FMA Scheduling helper class.
class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
// Horizontal Add/Sub (float and integer)
defm WriteFHAdd : X86SchedWritePair;
defm WriteFHAddY : X86SchedWritePair;
defm WriteFHAddZ : X86SchedWritePair;
defm WritePHAdd : X86SchedWritePair;
defm WritePHAddX : X86SchedWritePair;
defm WritePHAddY : X86SchedWritePair;
defm WritePHAddZ : X86SchedWritePair;
// Vector integer operations.
def WriteVecLoad : SchedWrite;
def WriteVecLoadX : SchedWrite;
def WriteVecLoadY : SchedWrite;
def WriteVecLoadNT : SchedWrite;
def WriteVecLoadNTY : SchedWrite;
def WriteVecMaskedLoad : SchedWrite;
def WriteVecMaskedLoadY : SchedWrite;
def WriteVecStore : SchedWrite;
def WriteVecStoreX : SchedWrite;
def WriteVecStoreY : SchedWrite;
def WriteVecStoreNT : SchedWrite;
def WriteVecStoreNTY : SchedWrite;
def WriteVecMaskedStore : SchedWrite;
def WriteVecMaskedStoreY : SchedWrite;
def WriteVecMove : SchedWrite;
def WriteVecMoveX : SchedWrite;
def WriteVecMoveY : SchedWrite;
def WriteVecMoveToGpr : SchedWrite;
def WriteVecMoveFromGpr : SchedWrite;
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).
defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM).
defm WriteVecALUZ : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM).
defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM).
defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM).
defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM).
defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions.
defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM).
defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM).
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default).
defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM).
defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM).
defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM).
defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default).
defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM).
defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM).
defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM).
defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply (default).
defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM).
defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM).
defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM).
defm WritePMULLD : X86SchedWritePair; // Vector PMULLD.
defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM).
defm WritePMULLDZ : X86SchedWritePair; // Vector PMULLD (ZMM).
defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM).
defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM).
defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM).
defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles.
defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM).
defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM).
defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM).
defm WriteBlend : X86SchedWritePair; // Vector blends.
defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM).
defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM).
defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM).
defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM).
defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM).
defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM).
defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM).
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM).
defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM).
defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
// Vector insert/extract operations.
defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
def WriteVecExtract : SchedWrite; // Extract vector element to gpr.
def WriteVecExtractSt : SchedWrite; // Extract vector element and store.
// MOVMSK operations.
def WriteFMOVMSK : SchedWrite;
def WriteVecMOVMSK : SchedWrite;
def WriteVecMOVMSKY : SchedWrite;
def WriteMMXMOVMSK : SchedWrite;
// Conversion between integer and float.
defm WriteCvtSD2I : X86SchedWritePair; // Double -> Integer.
defm WriteCvtPD2I : X86SchedWritePair; // Double -> Integer (XMM).
defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM).
defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM).
defm WriteCvtSS2I : X86SchedWritePair; // Float -> Integer.
defm WriteCvtPS2I : X86SchedWritePair; // Float -> Integer (XMM).
defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM).
defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM).
defm WriteCvtI2SD : X86SchedWritePair; // Integer -> Double.
defm WriteCvtI2PD : X86SchedWritePair; // Integer -> Double (XMM).
defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM).
defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM).
defm WriteCvtI2SS : X86SchedWritePair; // Integer -> Float.
defm WriteCvtI2PS : X86SchedWritePair; // Integer -> Float (XMM).
defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM).
defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM).
defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion.
defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM).
defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM).
defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM).
defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion.
defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM).
defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM).
defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM).
defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion.
defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM).
defm WriteCvtPH2PSZ : X86SchedWritePair; // Half -> Float size conversion (ZMM).
def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion.
def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM).
def WriteCvtPS2PHZ : SchedWrite; // // Float -> Half size conversion (ZMM).
def WriteCvtPS2PHSt : SchedWrite; // // Float -> Half + store size conversion.
def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM).
def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
// CRC32 instruction.
defm WriteCRC32 : X86SchedWritePair;
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
defm WritePCmpIStrM : X86SchedWritePair;
// Packed Compare Explicit Length Strings, Return Mask
defm WritePCmpEStrM : X86SchedWritePair;
// Packed Compare Implicit Length Strings, Return Index
defm WritePCmpIStrI : X86SchedWritePair;
// Packed Compare Explicit Length Strings, Return Index
defm WritePCmpEStrI : X86SchedWritePair;
// AES instructions.
defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption.
defm WriteAESIMC : X86SchedWritePair; // InvMixColumn.
defm WriteAESKeyGen : X86SchedWritePair; // Key Generation.
// Carry-less multiplication instructions.
defm WriteCLMul : X86SchedWritePair;
// EMMS/FEMMS
def WriteEMMS : SchedWrite;
// Load/store MXCSR
def WriteLDMXCSR : SchedWrite;
def WriteSTMXCSR : SchedWrite;
// Catch-all for expensive system instructions.
def WriteSystem : SchedWrite;
// AVX2.
defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles.
defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles.
defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM).
defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM).
// Old microcoded instructions that nobody use.
def WriteMicrocoded : SchedWrite;
// Fence instructions.
def WriteFence : SchedWrite;
// Nop, not very useful expect it provides a model for nops!
def WriteNop : SchedWrite;
// Move/Load/Store wrappers.
def WriteFMoveLS
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStore>;
def WriteFMoveLSX
: X86SchedWriteMoveLS<WriteFMoveX, WriteFLoadX, WriteFStoreX>;
def WriteFMoveLSY
: X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreY>;
def SchedWriteFMoveLS
: X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
WriteFMoveLSY, WriteFMoveLSY>;
def WriteFMoveLSNT
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
def WriteFMoveLSNTX
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>;
def WriteFMoveLSNTY
: X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>;
def SchedWriteFMoveLSNT
: X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX,
WriteFMoveLSNTY, WriteFMoveLSNTY>;
def WriteVecMoveLS
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
def WriteVecMoveLSX
: X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadX, WriteVecStoreX>;
def WriteVecMoveLSY
: X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadY, WriteVecStoreY>;
def SchedWriteVecMoveLS
: X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
WriteVecMoveLSY, WriteVecMoveLSY>;
def WriteVecMoveLSNT
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
def WriteVecMoveLSNTX
: X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>;
def WriteVecMoveLSNTY
: X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>;
def SchedWriteVecMoveLSNT
: X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
// Vector width wrappers.
def SchedWriteFAdd
: X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>;
def SchedWriteFAdd64
: X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Z>;
def SchedWriteFHAdd
: X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddZ>;
def SchedWriteFCmp
: X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpZ>;
def SchedWriteFCmp64
: X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Z>;
def SchedWriteFMul
: X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulZ>;
def SchedWriteFMul64
: X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Z>;
def SchedWriteFMA
: X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAZ>;
def SchedWriteDPPD
: X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>;
def SchedWriteDPPS
: X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSZ>;
def SchedWriteFDiv
: X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>;
def SchedWriteFDiv64
: X86SchedWriteWidths<WriteFDiv64, WriteFDiv64X, WriteFDiv64Y, WriteFDiv64Z>;
def SchedWriteFSqrt
: X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX,
WriteFSqrtY, WriteFSqrtZ>;
def SchedWriteFSqrt64
: X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X,
WriteFSqrt64Y, WriteFSqrt64Z>;
def SchedWriteFRcp
: X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpZ>;
def SchedWriteFRsqrt
: X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtZ>;
def SchedWriteFRnd
: X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndZ>;
def SchedWriteFLogic
: X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicZ>;
def SchedWriteFTest
: X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestZ>;
def SchedWriteFShuffle
: X86SchedWriteWidths<WriteFShuffle, WriteFShuffle,
WriteFShuffleY, WriteFShuffleZ>;
def SchedWriteFVarShuffle
: X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle,
WriteFVarShuffleY, WriteFVarShuffleZ>;
def SchedWriteFBlend
: X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendZ>;
def SchedWriteFVarBlend
: X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
WriteFVarBlendY, WriteFVarBlendZ>;
def SchedWriteCvtDQ2PD
: X86SchedWriteWidths<WriteCvtI2SD, WriteCvtI2PD,
WriteCvtI2PDY, WriteCvtI2PDZ>;
def SchedWriteCvtDQ2PS
: X86SchedWriteWidths<WriteCvtI2SS, WriteCvtI2PS,
WriteCvtI2PSY, WriteCvtI2PSZ>;
def SchedWriteCvtPD2DQ
: X86SchedWriteWidths<WriteCvtSD2I, WriteCvtPD2I,
WriteCvtPD2IY, WriteCvtPD2IZ>;
def SchedWriteCvtPS2DQ
: X86SchedWriteWidths<WriteCvtSS2I, WriteCvtPS2I,
WriteCvtPS2IY, WriteCvtPS2IZ>;
def SchedWriteCvtPS2PD
: X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD,
WriteCvtPS2PDY, WriteCvtPS2PDZ>;
def SchedWriteCvtPD2PS
: X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS,
WriteCvtPD2PSY, WriteCvtPD2PSZ>;
def SchedWriteVecALU
: X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUZ>;
def SchedWritePHAdd
: X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddZ>;
def SchedWriteVecLogic
: X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX,
WriteVecLogicY, WriteVecLogicZ>;
def SchedWriteVecTest
: X86SchedWriteWidths<WriteVecTest, WriteVecTest,
WriteVecTestY, WriteVecTestZ>;
def SchedWriteVecShift
: X86SchedWriteWidths<WriteVecShift, WriteVecShiftX,
WriteVecShiftY, WriteVecShiftZ>;
def SchedWriteVecShiftImm
: X86SchedWriteWidths<WriteVecShiftImm, WriteVecShiftImmX,
WriteVecShiftImmY, WriteVecShiftImmZ>;
def SchedWriteVarVecShift
: X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift,
WriteVarVecShiftY, WriteVarVecShiftZ>;
def SchedWriteVecIMul
: X86SchedWriteWidths<WriteVecIMul, WriteVecIMulX,
WriteVecIMulY, WriteVecIMulZ>;
def SchedWritePMULLD
: X86SchedWriteWidths<WritePMULLD, WritePMULLD,
WritePMULLDY, WritePMULLDZ>;
def SchedWriteMPSAD
: X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
WriteMPSADY, WriteMPSADZ>;
def SchedWritePSADBW
: X86SchedWriteWidths<WritePSADBW, WritePSADBWX,
WritePSADBWY, WritePSADBWZ>;
def SchedWriteShuffle
: X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
WriteShuffleY, WriteShuffleZ>;
def SchedWriteVarShuffle
: X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX,
WriteVarShuffleY, WriteVarShuffleZ>;
def SchedWriteBlend
: X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendZ>;
def SchedWriteVarBlend
: X86SchedWriteWidths<WriteVarBlend, WriteVarBlend,
WriteVarBlendY, WriteVarBlendZ>;
// Vector size wrappers.
def SchedWriteFAddSizes
: X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd64>;
def SchedWriteFCmpSizes
: X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp64>;
def SchedWriteFMulSizes
: X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>;
def SchedWriteFDivSizes
: X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv64>;
def SchedWriteFSqrtSizes
: X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>;
def SchedWriteFLogicSizes
: X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic>;
def SchedWriteFShuffleSizes
: X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle>;
//===----------------------------------------------------------------------===//
// Generic Processor Scheduler Models.
// IssueWidth is analogous to the number of decode units. Core and its
// descendants, including Nehalem and SandyBridge have 4 decoders.
// Resources beyond the decoder operate on micro-ops and are buffered
// so adjacent micro-ops don't directly compete.
//
// MicroOpBufferSize > 1 indicates that RAW dependencies can be
// decoded in the same cycle. The value 32 is a reasonably arbitrary
// number of in-flight instructions.
//
// HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef
// indicates high latency opcodes. Alternatively, InstrItinData
// entries may be included here to define specific operand
// latencies. Since these latencies are not used for pipeline hazards,
// they do not need to be exact.
//
// The GenericX86Model contains no instruction schedules
// and disables PostRAScheduler.
class GenericX86Model : SchedMachineModel {
let IssueWidth = 4;
let MicroOpBufferSize = 32;
let LoadLatency = 4;
let HighLatency = 10;
let PostRAScheduler = 0;
let CompleteModel = 0;
}
def GenericModel : GenericX86Model;
// Define a model with the PostRAScheduler enabled.
def GenericPostRAModel : GenericX86Model {
let PostRAScheduler = 1;
}

View File

@@ -0,0 +1,917 @@
//===- X86ScheduleAtom.td - X86 Atom Scheduling Definitions -*- tablegen -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the schedule class data for the Intel Atom
// in order (Saltwell-32nm/Bonnell-45nm) processors.
//
//===----------------------------------------------------------------------===//
//
// Scheduling information derived from the "Intel 64 and IA32 Architectures
// Optimization Reference Manual", Chapter 13, Section 4.
// Atom machine model.
def AtomModel : SchedMachineModel {
let IssueWidth = 2; // Allows 2 instructions per scheduling group.
let MicroOpBufferSize = 0; // In-order execution, always hide latency.
let LoadLatency = 3; // Expected cycles, may be overridden.
let HighLatency = 30;// Expected, may be overridden.
// On the Atom, the throughput for taken branches is 2 cycles. For small
// simple loops, expand by a small factor to hide the backedge cost.
let LoopMicroOpBufferSize = 10;
let PostRAScheduler = 1;
let CompleteModel = 0;
}
let SchedModel = AtomModel in {
// Functional Units
def AtomPort0 : ProcResource<1>; // ALU: ALU0, shift/rotate, load/store
// SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
def AtomPort1 : ProcResource<1>; // ALU: ALU1, bit processing, jump, and LEA
// SIMD/FP: SIMD ALU, FP Adder
def AtomPort01 : ProcResGroup<[AtomPort0, AtomPort1]>;
// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when dispatched by the schedulers.
// This multiclass defines the resource usage for variants with and without
// folded loads.
multiclass AtomWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> RRPorts,
list<ProcResourceKind> RMPorts,
int RRLat = 1, int RMLat = 1,
list<int> RRRes = [1],
list<int> RMRes = [1]> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, RRPorts> {
let Latency = RRLat;
let ResourceCycles = RRRes;
}
// Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
// latency.
def : WriteRes<SchedRW.Folded, RMPorts> {
let Latency = RMLat;
let ResourceCycles = RMRes;
}
}
// A folded store needs a cycle on Port0 for the store data.
def : WriteRes<WriteRMW, [AtomPort0]>;
////////////////////////////////////////////////////////////////////////////////
// Arithmetic.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteADC, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [AtomPort0], 1, [1], 1>;
defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
defm : AtomWriteResPair<WriteDiv32, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
defm : AtomWriteResPair<WriteDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
defm : AtomWriteResPair<WriteIDiv8, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteIDiv16, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteIDiv32, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteIDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
defm : X86WriteResPairUnsupported<WriteCRC32>;
defm : AtomWriteResPair<WriteCMOV, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteCMOV2, [AtomPort01], [AtomPort0]>;
defm : X86WriteRes<WriteFCMOV, [AtomPort01], 9, [9], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [AtomPort01]>;
def : WriteRes<WriteSETCCStore, [AtomPort01]> {
let Latency = 2;
let ResourceCycles = [2];
}
def : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
let Latency = 2;
let ResourceCycles = [2];
}
def : WriteRes<WriteBitTest,[AtomPort01]>;
defm : X86WriteResUnsupported<WriteIMulH>;
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [AtomPort1]>;
def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> {
let Latency = 8;
let ResourceCycles = [8];
}
def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>;
def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> {
let Latency = 6;
let ResourceCycles = [6];
}
def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>;
def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
let Latency = 14;
let ResourceCycles = [14];
}
def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
IMUL64rmi8, IMUL64rmi32)>;
// Bit counts.
defm : AtomWriteResPair<WriteBSF, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
defm : AtomWriteResPair<WriteBSR, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
defm : X86WriteResPairUnsupported<WritePOPCNT>;
defm : X86WriteResPairUnsupported<WriteLZCNT>;
defm : X86WriteResPairUnsupported<WriteTZCNT>;
// BMI1 BEXTR, BMI2 BZHI
defm : X86WriteResPairUnsupported<WriteBEXTR>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
defm : X86WriteRes<WriteSHDrri, [AtomPort01], 2, [2], 1>;
defm : X86WriteRes<WriteSHDrrcl,[AtomPort01], 2, [2], 1>;
defm : X86WriteRes<WriteSHDmri, [AtomPort01], 4, [4], 1>;
defm : X86WriteRes<WriteSHDmrcl,[AtomPort01], 4, [4], 1>;
////////////////////////////////////////////////////////////////////////////////
// Loads, stores, and moves, not folded with other operations.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteLoad, [AtomPort0]>;
def : WriteRes<WriteStore, [AtomPort0]>;
def : WriteRes<WriteStoreNT, [AtomPort0]>;
def : WriteRes<WriteMove, [AtomPort01]>;
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
////////////////////////////////////////////////////////////////////////////////
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteZero, []>;
////////////////////////////////////////////////////////////////////////////////
// Branches don't produce values, so they have no latency, but they still
// consume resources. Indirect branches can fold loads.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteJump, [AtomPort1], [AtomPort1]>;
////////////////////////////////////////////////////////////////////////////////
// Special case scheduling classes.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteSystem, [AtomPort01]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [AtomPort01]> { let Latency = 100; }
def : WriteRes<WriteFence, [AtomPort0]>;
// Nops don't have dependencies, so there's no actual latency, but we set this
// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
def : WriteRes<WriteNop, [AtomPort01]>;
////////////////////////////////////////////////////////////////////////////////
// Floating point. This covers both scalar and vector operations.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteRes<WriteFLD0, [AtomPort01], 1, [1], 1>;
defm : X86WriteRes<WriteFLD1, [AtomPort01], 6, [6], 1>;
def : WriteRes<WriteFLoad, [AtomPort0]>;
def : WriteRes<WriteFLoadX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteFLoadY>;
defm : X86WriteResUnsupported<WriteFMaskedLoad>;
defm : X86WriteResUnsupported<WriteFMaskedLoadY>;
def : WriteRes<WriteFStore, [AtomPort0]>;
def : WriteRes<WriteFStoreX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteFStoreY>;
def : WriteRes<WriteFStoreNT, [AtomPort0]>;
def : WriteRes<WriteFStoreNTX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteFStoreNTY>;
defm : X86WriteResUnsupported<WriteFMaskedStore>;
defm : X86WriteResUnsupported<WriteFMaskedStoreY>;
def : WriteRes<WriteFMove, [AtomPort01]>;
def : WriteRes<WriteFMoveX, [AtomPort01]>;
defm : X86WriteResUnsupported<WriteFMoveY>;
defm : X86WriteRes<WriteEMMS, [AtomPort01], 5, [5], 1>;
defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAddX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFAddY>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : AtomWriteResPair<WriteFAdd64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFAdd64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : X86WriteResPairUnsupported<WriteFAdd64Y>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : AtomWriteResPair<WriteFCmp, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmpX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFCmpY>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : AtomWriteResPair<WriteFCmp64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFCmp64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : X86WriteResPairUnsupported<WriteFCmp64Y>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFMulY>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : AtomWriteResPair<WriteFMul64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteFMul64X, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : X86WriteResPairUnsupported<WriteFMul64Y>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRcpX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : X86WriteResPairUnsupported<WriteFRcpY>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteFRsqrtX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFDivX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
defm : X86WriteResPairUnsupported<WriteFDivY>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
defm : AtomWriteResPair<WriteFDiv64, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteFDiv64X, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
defm : AtomWriteResPair<WriteFSqrtX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
defm : X86WriteResPairUnsupported<WriteFSqrtY>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
defm : AtomWriteResPair<WriteFSqrt64, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
defm : AtomWriteResPair<WriteFSqrt64X, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : AtomWriteResPair<WriteFSqrt80, [AtomPort01], [AtomPort01], 71, 71, [71], [71]>;
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
defm : AtomWriteResPair<WriteFRnd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFRndY>;
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>;
defm : X86WriteResPairUnsupported<WriteFLogicY>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : AtomWriteResPair<WriteFTest, [AtomPort01], [AtomPort0]>;
defm : X86WriteResPairUnsupported<WriteFTestY>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;
defm : X86WriteResPairUnsupported<WriteFShuffleY>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : X86WriteResPairUnsupported<WriteFMA>;
defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : X86WriteResPairUnsupported<WriteDPPD>;
defm : X86WriteResPairUnsupported<WriteDPPS>;
defm : X86WriteResPairUnsupported<WriteDPPSY>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : X86WriteResPairUnsupported<WriteFBlend>;
defm : X86WriteResPairUnsupported<WriteFBlendY>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : X86WriteResPairUnsupported<WriteFVarBlend>;
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
////////////////////////////////////////////////////////////////////////////////
// Conversions.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteCvtSS2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteCvtPS2I, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IY>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : AtomWriteResPair<WriteCvtSD2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteCvtPD2I, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IY>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : AtomWriteResPair<WriteCvtI2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtI2PS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : AtomWriteResPair<WriteCvtI2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtI2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDY>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : AtomWriteResPair<WriteCvtSS2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtPS2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDY>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : AtomWriteResPair<WriteCvtSD2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
defm : AtomWriteResPair<WriteCvtPD2PS, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PH>;
defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
////////////////////////////////////////////////////////////////////////////////
// Vector integer operations.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteVecLoad, [AtomPort0]>;
def : WriteRes<WriteVecLoadX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecLoadY>;
def : WriteRes<WriteVecLoadNT, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecLoadNTY>;
defm : X86WriteResUnsupported<WriteVecMaskedLoad>;
defm : X86WriteResUnsupported<WriteVecMaskedLoadY>;
def : WriteRes<WriteVecStore, [AtomPort0]>;
def : WriteRes<WriteVecStoreX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecStoreY>;
def : WriteRes<WriteVecStoreNT, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecStoreNTY>;
def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecMaskedStoreY>;
def : WriteRes<WriteVecMove, [AtomPort0]>;
def : WriteRes<WriteVecMoveX, [AtomPort01]>;
defm : X86WriteResUnsupported<WriteVecMoveY>;
defm : X86WriteRes<WriteVecMoveToGpr, [AtomPort0], 3, [3], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [AtomPort0], 1, [1], 1>;
defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecALUX, [AtomPort01], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteVecALUY>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVecLogicX, [AtomPort01], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicY>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : AtomWriteResPair<WriteVecTest, [AtomPort01], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteVecTestY>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
defm : AtomWriteResPair<WriteVecShiftX, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
defm : X86WriteResPairUnsupported<WriteVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : AtomWriteResPair<WriteVecShiftImm, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
defm : AtomWriteResPair<WriteVecShiftImmX, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WriteVecIMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteVecIMulY>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : X86WriteResPairUnsupported<WritePMULLD>;
defm : X86WriteResPairUnsupported<WritePMULLDY>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : X86WriteResPairUnsupported<WritePHMINPOS>;
defm : X86WriteResPairUnsupported<WriteMPSAD>;
defm : X86WriteResPairUnsupported<WriteMPSADY>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : AtomWriteResPair<WritePSADBW, [AtomPort01], [AtomPort01], 4, 4, [4], [4]>;
defm : AtomWriteResPair<WritePSADBWX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WritePSADBWY>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : AtomWriteResPair<WriteShuffle, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteShuffleX, [AtomPort0], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteShuffleY>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : AtomWriteResPair<WriteVarShuffle, [AtomPort0], [AtomPort0], 1, 1>;
defm : AtomWriteResPair<WriteVarShuffleX, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : X86WriteResPairUnsupported<WriteBlend>;
defm : X86WriteResPairUnsupported<WriteBlendY>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : X86WriteResPairUnsupported<WriteVarBlend>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarVecShift>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
////////////////////////////////////////////////////////////////////////////////
// Vector insert/extract operations.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteVecInsert, [AtomPort0], [AtomPort0], 1, 1>;
def : WriteRes<WriteVecExtract, [AtomPort0]>;
def : WriteRes<WriteVecExtractSt, [AtomPort0]>;
////////////////////////////////////////////////////////////////////////////////
// SSE42 String instructions.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteResPairUnsupported<WritePCmpIStrI>;
defm : X86WriteResPairUnsupported<WritePCmpIStrM>;
defm : X86WriteResPairUnsupported<WritePCmpEStrI>;
defm : X86WriteResPairUnsupported<WritePCmpEStrM>;
////////////////////////////////////////////////////////////////////////////////
// MOVMSK Instructions.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteFMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
def : WriteRes<WriteVecMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
def : WriteRes<WriteMMXMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
////////////////////////////////////////////////////////////////////////////////
// AES instructions.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteResPairUnsupported<WriteAESIMC>;
defm : X86WriteResPairUnsupported<WriteAESKeyGen>;
defm : X86WriteResPairUnsupported<WriteAESDecEnc>;
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WriteFHAddY, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 3, 4, [3], [4]>;
defm : AtomWriteResPair<WritePHAddX, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
defm : AtomWriteResPair<WritePHAddY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteResPairUnsupported<WriteCLMul>;
////////////////////////////////////////////////////////////////////////////////
// Load/store MXCSR.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteLDMXCSR, [AtomPort01]> { let Latency = 5; let ResourceCycles = [5]; }
def : WriteRes<WriteSTMXCSR, [AtomPort01]> { let Latency = 15; let ResourceCycles = [15]; }
////////////////////////////////////////////////////////////////////////////////
// Special Cases.
////////////////////////////////////////////////////////////////////////////////
// Port0
def AtomWrite0_1 : SchedWriteRes<[AtomPort0]> {
let Latency = 1;
let ResourceCycles = [1];
}
def : InstRW<[AtomWrite0_1], (instrs FXAM, LD_Frr,
MOVSX64rr32)>;
def : SchedAlias<WriteALURMW, AtomWrite0_1>;
def : SchedAlias<WriteADCRMW, AtomWrite0_1>;
def : InstRW<[AtomWrite0_1], (instregex "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m",
"MOV(S|Z)X(32|64)rr(8|8_NOREX|16)")>;
def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> {
let Latency = 5;
let ResourceCycles = [5];
}
def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)")>;
// Port1
def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
let Latency = 1;
let ResourceCycles = [1];
}
def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>;
def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r",
"BT(C|R|S)?(16|32|64)(rr|ri8)")>;
def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {
let Latency = 5;
let ResourceCycles = [5];
}
def : InstRW<[AtomWrite1_5], (instrs MMX_CVTPI2PSirr, MMX_CVTPI2PSirm,
MMX_CVTPS2PIirr, MMX_CVTTPS2PIirr)>;
// Port0 and Port1
def AtomWrite0_1_1 : SchedWriteRes<[AtomPort0, AtomPort1]> {
let Latency = 1;
let ResourceCycles = [1, 1];
}
def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r,
POP16rmr, POP32rmr, POP64rmr,
PUSH16r, PUSH32r, PUSH64r,
PUSHi16, PUSHi32,
PUSH16rmr, PUSH32rmr, PUSH64rmr,
PUSH16i8, PUSH32i8, PUSH64i8, PUSH64i32,
XCH_F)>;
def : InstRW<[AtomWrite0_1_1], (instregex "RETI(L|Q|W)$",
"IRET(16|32|64)?")>;
def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
let Latency = 5;
let ResourceCycles = [5, 5];
}
def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIirm, MMX_CVTTPS2PIirm)>;
def : InstRW<[AtomWrite0_1_5], (instregex "ILD_F(16|32|64)")>;
// Port0 or Port1
def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
let Latency = 1;
let ResourceCycles = [1];
}
def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, WAIT,
LFENCE,
STOSB, STOSL, STOSQ, STOSW,
MOVSSrr, MOVSSrr_REV,
PSLLDQri, PSRLDQri)>;
def : InstRW<[AtomWrite01_1], (instregex "MMX_PACK(SSDW|SSWB|USWB)irr",
"MMX_PUNPCKH(BW|DQ|WD)irr")>;
def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
let Latency = 2;
let ResourceCycles = [2];
}
def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
PUSH16rmm, PUSH32rmm, PUSH64rmm,
LODSB, LODSL, LODSQ, LODSW,
SCASB, SCASL, SCASQ, SCASW)>;
def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
"PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
"XADD(8|16|32|64)rr",
"XCHG(8|16|32|64)(ar|rr)",
"(ST|ISTT)_F(P)?(16|32|64)?(m|rr)",
"MMX_P(ADD|SUB)Qirr",
"MOV(S|Z)X16rr8",
"MOV(UPS|UPD|DQU)mr",
"MASKMOVDQU(64)?",
"P(ADD|SUB)Qrr")>;
def AtomWrite01_3 : SchedWriteRes<[AtomPort01]> {
let Latency = 3;
let ResourceCycles = [3];
}
def : InstRW<[AtomWrite01_3], (instrs CLD, LDDQUrm,
CMPSB, CMPSL, CMPSQ, CMPSW,
MOVSB, MOVSL, MOVSQ, MOVSW,
POP16rmm, POP32rmm, POP64rmm)>;
def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm",
"XCHG(8|16|32|64)rm",
"PH(ADD|SUB)Drr",
"MOV(S|Z)X16rm8",
"MMX_P(ADD|SUB)Qirm",
"MOV(UPS|UPD|DQU)rm",
"P(ADD|SUB)Qrm")>;
def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> {
let Latency = 4;
let ResourceCycles = [4];
}
def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO,
JCXZ, JECXZ, JRCXZ,
LD_F80m)>;
def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm",
"(MMX_)?PEXTRWrr(_REV)?")>;
def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> {
let Latency = 5;
let ResourceCycles = [5];
}
def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, ST_FP80m)>;
def : InstRW<[AtomWrite01_5], (instregex "MMX_PH(ADD|SUB)S?Wrr")>;
def AtomWrite01_6 : SchedWriteRes<[AtomPort01]> {
let Latency = 6;
let ResourceCycles = [6];
}
def : InstRW<[AtomWrite01_6], (instrs CMPXCHG8rm, INTO, XLAT,
SHLD16rrCL, SHRD16rrCL,
SHLD16rri8, SHRD16rri8,
SHLD16mrCL, SHRD16mrCL,
SHLD16mri8, SHRD16mri8)>;
def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr",
"IST_F(P)?(16|32|64)?m",
"MMX_PH(ADD|SUB)S?Wrm")>;
def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
let Latency = 7;
let ResourceCycles = [7];
}
def : InstRW<[AtomWrite01_7], (instrs AAD8i8)>;
def AtomWrite01_8 : SchedWriteRes<[AtomPort01]> {
let Latency = 8;
let ResourceCycles = [8];
}
def : InstRW<[AtomWrite01_8], (instrs LOOPE,
PUSHA16, PUSHA32,
SHLD64rrCL, SHRD64rrCL,
FNSTCW16m)>;
def AtomWrite01_9 : SchedWriteRes<[AtomPort01]> {
let Latency = 9;
let ResourceCycles = [9];
}
def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr,
POPA16, POPA32,
PUSHF16, PUSHF32, PUSHF64,
SHLD64mrCL, SHRD64mrCL,
SHLD64mri8, SHRD64mri8,
SHLD64rri8, SHRD64rri8,
CMPXCHG8rr)>;
def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F",
"(U)?COMIS(D|S)rr",
"CVT(T)?SS2SI64rr(_Int)?")>;
def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
let Latency = 10;
let ResourceCycles = [10];
}
def : SchedAlias<WriteFLDC, AtomWrite01_10>;
def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
"CVT(T)?SS2SI64rm(_Int)?")>;
def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
let Latency = 11;
let ResourceCycles = [11];
}
def : InstRW<[AtomWrite01_11], (instrs BOUNDS16rm, BOUNDS32rm)>;
def : InstRW<[AtomWrite01_11], (instregex "BT(C|R|S)(16|32|64)mr")>;
def AtomWrite01_13 : SchedWriteRes<[AtomPort01]> {
let Latency = 13;
let ResourceCycles = [13];
}
def : InstRW<[AtomWrite01_13], (instrs AAA, AAS)>;
def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> {
let Latency = 14;
let ResourceCycles = [14];
}
def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> {
let Latency = 15;
let ResourceCycles = [15];
}
def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr)>;
def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> {
let Latency = 17;
let ResourceCycles = [17];
}
def : InstRW<[AtomWrite01_17], (instrs LOOPNE, PAUSE)>;
def AtomWrite01_18 : SchedWriteRes<[AtomPort01]> {
let Latency = 18;
let ResourceCycles = [18];
}
def : InstRW<[AtomWrite01_18], (instrs CMPXCHG8B, DAA, LOOP)>;
def AtomWrite01_20 : SchedWriteRes<[AtomPort01]> {
let Latency = 20;
let ResourceCycles = [20];
}
def : InstRW<[AtomWrite01_20], (instrs DAS)>;
def AtomWrite01_21 : SchedWriteRes<[AtomPort01]> {
let Latency = 21;
let ResourceCycles = [21];
}
def : InstRW<[AtomWrite01_21], (instrs AAM8i8, STD)>;
def AtomWrite01_22 : SchedWriteRes<[AtomPort01]> {
let Latency = 22;
let ResourceCycles = [22];
}
def : InstRW<[AtomWrite01_22], (instrs CMPXCHG16B)>;
def AtomWrite01_23 : SchedWriteRes<[AtomPort01]> {
let Latency = 23;
let ResourceCycles = [23];
}
def : InstRW<[AtomWrite01_23], (instrs ARPL16mr, ARPL16rr)>;
def AtomWrite01_25 : SchedWriteRes<[AtomPort01]> {
let Latency = 25;
let ResourceCycles = [25];
}
def : InstRW<[AtomWrite01_25], (instrs FNCLEX, FXTRACT)>;
def AtomWrite01_26 : SchedWriteRes<[AtomPort01]> {
let Latency = 26;
let ResourceCycles = [26];
}
def : InstRW<[AtomWrite01_26], (instrs POPF32, POPF64)>;
def AtomWrite01_29 : SchedWriteRes<[AtomPort01]> {
let Latency = 29;
let ResourceCycles = [29];
}
def : InstRW<[AtomWrite01_29], (instregex "POP(DS|ES|FS|GS)(16|32|64)")>;
def AtomWrite01_30 : SchedWriteRes<[AtomPort01]> {
let Latency = 30;
let ResourceCycles = [30];
}
def : InstRW<[AtomWrite01_30], (instrs RDTSC, RDTSCP)>;
def AtomWrite01_32 : SchedWriteRes<[AtomPort01]> {
let Latency = 32;
let ResourceCycles = [32];
}
def : InstRW<[AtomWrite01_32], (instrs ENTER, POPF16)>;
def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> {
let Latency = 45;
let ResourceCycles = [45];
}
def : InstRW<[AtomWrite01_45], (instrs MONITORrrr)>;
def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> {
let Latency = 46;
let ResourceCycles = [46];
}
def : InstRW<[AtomWrite01_46], (instrs FRNDINT, MWAITrr, RDPMC)>;
def AtomWrite01_48 : SchedWriteRes<[AtomPort01]> {
let Latency = 48;
let ResourceCycles = [48];
}
def : InstRW<[AtomWrite01_48], (instrs POPSS16, POPSS32)>;
def AtomWrite01_55 : SchedWriteRes<[AtomPort01]> {
let Latency = 55;
let ResourceCycles = [55];
}
def : InstRW<[AtomWrite01_55], (instrs FPREM)>;
def AtomWrite01_59 : SchedWriteRes<[AtomPort01]> {
let Latency = 59;
let ResourceCycles = [59];
}
def : InstRW<[AtomWrite01_59], (instrs INSB, INSL, INSW)>;
def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> {
let Latency = 63;
let ResourceCycles = [63];
}
def : InstRW<[AtomWrite01_63], (instrs FNINIT)>;
def AtomWrite01_68 : SchedWriteRes<[AtomPort01]> {
let Latency = 68;
let ResourceCycles = [68];
}
def : InstRW<[AtomWrite01_68], (instrs OUT8rr, OUT16rr, OUT32rr)>;
def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
let Latency = 71;
let ResourceCycles = [71];
}
def : InstRW<[AtomWrite01_71], (instrs FPREM1,
INVLPG, INVLPGA32, INVLPGA64)>;
def AtomWrite01_72 : SchedWriteRes<[AtomPort01]> {
let Latency = 72;
let ResourceCycles = [72];
}
def : InstRW<[AtomWrite01_72], (instrs OUT8ir, OUT16ir, OUT32ir)>;
def AtomWrite01_74 : SchedWriteRes<[AtomPort01]> {
let Latency = 74;
let ResourceCycles = [74];
}
def : InstRW<[AtomWrite01_74], (instrs OUTSB, OUTSL, OUTSW)>;
def AtomWrite01_77 : SchedWriteRes<[AtomPort01]> {
let Latency = 77;
let ResourceCycles = [77];
}
def : InstRW<[AtomWrite01_77], (instrs FSCALE)>;
def AtomWrite01_78 : SchedWriteRes<[AtomPort01]> {
let Latency = 78;
let ResourceCycles = [78];
}
def : InstRW<[AtomWrite01_78], (instrs RDMSR)>;
def AtomWrite01_79 : SchedWriteRes<[AtomPort01]> {
let Latency = 79;
let ResourceCycles = [79];
}
def : InstRW<[AtomWrite01_79], (instregex "RET(L|Q|W)?$",
"LRETI?(L|Q|W)")>;
def AtomWrite01_92 : SchedWriteRes<[AtomPort01]> {
let Latency = 92;
let ResourceCycles = [92];
}
def : InstRW<[AtomWrite01_92], (instrs IN8ri, IN16ri, IN32ri)>;
def AtomWrite01_94 : SchedWriteRes<[AtomPort01]> {
let Latency = 94;
let ResourceCycles = [94];
}
def : InstRW<[AtomWrite01_94], (instrs IN8rr, IN16rr, IN32rr)>;
def AtomWrite01_99 : SchedWriteRes<[AtomPort01]> {
let Latency = 99;
let ResourceCycles = [99];
}
def : InstRW<[AtomWrite01_99], (instrs F2XM1)>;
def AtomWrite01_121 : SchedWriteRes<[AtomPort01]> {
let Latency = 121;
let ResourceCycles = [121];
}
def : InstRW<[AtomWrite01_121], (instrs CPUID)>;
def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> {
let Latency = 127;
let ResourceCycles = [127];
}
def : InstRW<[AtomWrite01_127], (instrs INT)>;
def AtomWrite01_130 : SchedWriteRes<[AtomPort01]> {
let Latency = 130;
let ResourceCycles = [130];
}
def : InstRW<[AtomWrite01_130], (instrs INT3)>;
def AtomWrite01_140 : SchedWriteRes<[AtomPort01]> {
let Latency = 140;
let ResourceCycles = [140];
}
def : InstRW<[AtomWrite01_140], (instrs FXSAVE, FXSAVE64)>;
def AtomWrite01_141 : SchedWriteRes<[AtomPort01]> {
let Latency = 141;
let ResourceCycles = [141];
}
def : InstRW<[AtomWrite01_141], (instrs FXRSTOR, FXRSTOR64)>;
def AtomWrite01_146 : SchedWriteRes<[AtomPort01]> {
let Latency = 146;
let ResourceCycles = [146];
}
def : InstRW<[AtomWrite01_146], (instrs FYL2X)>;
def AtomWrite01_147 : SchedWriteRes<[AtomPort01]> {
let Latency = 147;
let ResourceCycles = [147];
}
def : InstRW<[AtomWrite01_147], (instrs FYL2XP1)>;
def AtomWrite01_168 : SchedWriteRes<[AtomPort01]> {
let Latency = 168;
let ResourceCycles = [168];
}
def : InstRW<[AtomWrite01_168], (instrs FPTAN)>;
def AtomWrite01_174 : SchedWriteRes<[AtomPort01]> {
let Latency = 174;
let ResourceCycles = [174];
}
def : InstRW<[AtomWrite01_174], (instrs FSINCOS)>;
def : InstRW<[AtomWrite01_174], (instregex "(COS|SIN)_F")>;
def AtomWrite01_183 : SchedWriteRes<[AtomPort01]> {
let Latency = 183;
let ResourceCycles = [183];
}
def : InstRW<[AtomWrite01_183], (instrs FPATAN)>;
def AtomWrite01_202 : SchedWriteRes<[AtomPort01]> {
let Latency = 202;
let ResourceCycles = [202];
}
def : InstRW<[AtomWrite01_202], (instrs WRMSR)>;
} // SchedModel

View File

@@ -0,0 +1,682 @@
//=- X86ScheduleBtVer2.td - X86 BtVer2 (Jaguar) Scheduling ---*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the machine model for AMD btver2 (Jaguar) to support
// instruction scheduling and other instruction cost heuristics. Based off AMD Software
// Optimization Guide for AMD Family 16h Processors & Instruction Latency appendix.
//
//===----------------------------------------------------------------------===//
def BtVer2Model : SchedMachineModel {
// All x86 instructions are modeled as a single micro-op, and btver2 can
// decode 2 instructions per cycle.
let IssueWidth = 2;
let MicroOpBufferSize = 64; // Retire Control Unit
let LoadLatency = 5; // FPU latency (worse case cf Integer 3 cycle latency)
let HighLatency = 25;
let MispredictPenalty = 14; // Minimum branch misdirection penalty
let PostRAScheduler = 1;
// FIXME: SSE4/AVX is unimplemented. This flag is set to allow
// the scheduler to assign a default model to unrecognized opcodes.
let CompleteModel = 0;
}
let SchedModel = BtVer2Model in {
// Jaguar can issue up to 6 micro-ops in one cycle
def JALU0 : ProcResource<1>; // Integer Pipe0: integer ALU0 (also handle FP->INT jam)
def JALU1 : ProcResource<1>; // Integer Pipe1: integer ALU1/MUL/DIV
def JLAGU : ProcResource<1>; // Integer Pipe2: LAGU
def JSAGU : ProcResource<1>; // Integer Pipe3: SAGU (also handles 3-operand LEA)
def JFPU0 : ProcResource<1>; // Vector/FPU Pipe0: VALU0/VIMUL/FPA
def JFPU1 : ProcResource<1>; // Vector/FPU Pipe1: VALU1/STC/FPM
// The Integer PRF for Jaguar is 64 entries, and it holds the architectural and
// speculative version of the 64-bit integer registers.
// Reference: www.realworldtech.com/jaguar/4/
//
// The processor always keeps the different parts of an integer register
// together. An instruction that writes to a part of a register will therefore
// have a false dependence on any previous write to the same register or any
// part of it.
// Reference: Section 21.10 "AMD Bobcat and Jaguar pipeline: Partial register
// access" - Agner Fog's "microarchitecture.pdf".
def JIntegerPRF : RegisterFile<64, [GR64, CCR]>;
// The Jaguar FP Retire Queue renames SIMD and FP uOps onto a pool of 72 SSE
// registers. Operations on 256-bit data types are cracked into two COPs.
// Reference: www.realworldtech.com/jaguar/4/
def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2]>;
// The retire control unit (RCU) can track up to 64 macro-ops in-flight. It can
// retire up to two macro-ops per cycle.
// Reference: "Software Optimization Guide for AMD Family 16h Processors"
def JRCU : RetireControlUnit<64, 2>;
// Integer Pipe Scheduler
def JALU01 : ProcResGroup<[JALU0, JALU1]> {
let BufferSize=20;
}
// AGU Pipe Scheduler
def JLSAGU : ProcResGroup<[JLAGU, JSAGU]> {
let BufferSize=12;
}
// Fpu Pipe Scheduler
def JFPU01 : ProcResGroup<[JFPU0, JFPU1]> {
let BufferSize=18;
}
// Functional units
def JDiv : ProcResource<1>; // integer division
def JMul : ProcResource<1>; // integer multiplication
def JVALU0 : ProcResource<1>; // vector integer
def JVALU1 : ProcResource<1>; // vector integer
def JVIMUL : ProcResource<1>; // vector integer multiplication
def JSTC : ProcResource<1>; // vector store/convert
def JFPM : ProcResource<1>; // FP multiplication
def JFPA : ProcResource<1>; // FP addition
// Functional unit groups
def JFPX : ProcResGroup<[JFPA, JFPM]>;
def JVALU : ProcResGroup<[JVALU0, JVALU1]>;
// Integer loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when dispatched by the schedulers.
// This multiclass defines the resource usage for variants with and without
// folded loads.
multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [], int UOps = 1> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let ResourceCycles = Res;
let NumMicroOps = UOps;
}
// Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
// latency.
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 3);
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
let NumMicroOps = UOps;
}
}
multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [], int UOps = 1> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let ResourceCycles = Res;
let NumMicroOps = UOps;
}
// Memory variant also uses a cycle on JLAGU and adds 5 cycles to the
// latency.
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 5);
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
let NumMicroOps = UOps;
}
}
multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [2], int UOps = 2> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let ResourceCycles = Res;
let NumMicroOps = UOps;
}
// Memory variant also uses 2 cycles on JLAGU and adds 5 cycles to the
// latency.
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
let Latency = !add(Lat, 5);
let ResourceCycles = !listconcat([2], Res);
let NumMicroOps = UOps;
}
}
// A folded store needs a cycle on the SAGU for the store data.
def : WriteRes<WriteRMW, [JSAGU]>;
////////////////////////////////////////////////////////////////////////////////
// Arithmetic.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
defm : JWriteResIntPair<WriteADC, [JALU01], 1, [2]>;
defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication
defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
defm : JWriteResIntPair<WriteDiv32, [JALU1, JDiv], 25, [1, 25], 2>;
defm : JWriteResIntPair<WriteDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
defm : JWriteResIntPair<WriteIDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteIDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
defm : JWriteResIntPair<WriteIDiv32, [JALU1, JDiv], 25, [1, 25], 2>;
defm : JWriteResIntPair<WriteIDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
defm : JWriteResIntPair<WriteCRC32, [JALU01], 3, [4], 3>;
defm : JWriteResIntPair<WriteCMOV, [JALU01], 1>; // Conditional move.
defm : JWriteResIntPair<WriteCMOV2, [JALU01], 1>; // Conditional (CF + ZF flag) move.
defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
def : WriteRes<WriteLAHFSAHF, [JALU01]>;
def : WriteRes<WriteBitTest,[JALU01]>;
// This is for simple LEAs with one or two input operands.
def : WriteRes<WriteLEA, [JALU01]>;
// Bit counts.
defm : JWriteResIntPair<WriteBSF, [JALU01], 5, [4], 8>;
defm : JWriteResIntPair<WriteBSR, [JALU01], 5, [4], 8>;
defm : JWriteResIntPair<WritePOPCNT, [JALU01], 1>;
defm : JWriteResIntPair<WriteLZCNT, [JALU01], 1>;
defm : JWriteResIntPair<WriteTZCNT, [JALU01], 2, [2]>;
// BMI1 BEXTR, BMI2 BZHI
defm : JWriteResIntPair<WriteBEXTR, [JALU01], 1>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResIntPair<WriteShift, [JALU01], 1>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [JALU01], 3, [6], 6>;
defm : X86WriteRes<WriteSHDrrcl,[JALU01], 4, [8], 7>;
defm : X86WriteRes<WriteSHDmri, [JLAGU, JALU01], 9, [1, 22], 8>;
defm : X86WriteRes<WriteSHDmrcl,[JLAGU, JALU01], 9, [1, 22], 8>;
////////////////////////////////////////////////////////////////////////////////
// Loads, stores, and moves, not folded with other operations.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
def : WriteRes<WriteStore, [JSAGU]>;
def : WriteRes<WriteStoreNT, [JSAGU]>;
def : WriteRes<WriteMove, [JALU01]>;
// Load/store MXCSR.
// FIXME: These are copy and pasted from WriteLoad/Store.
def : WriteRes<WriteLDMXCSR, [JLAGU]> { let Latency = 5; }
def : WriteRes<WriteSTMXCSR, [JSAGU]>;
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
////////////////////////////////////////////////////////////////////////////////
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteZero, []>;
////////////////////////////////////////////////////////////////////////////////
// Branches don't produce values, so they have no latency, but they still
// consume resources. Indirect branches can fold loads.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResIntPair<WriteJump, [JALU01], 1>;
////////////////////////////////////////////////////////////////////////////////
// Special case scheduling classes.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteSystem, [JALU01]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [JALU01]> { let Latency = 100; }
def : WriteRes<WriteFence, [JSAGU]>;
// Nops don't have dependencies, so there's no actual latency, but we set this
// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
def : WriteRes<WriteNop, [JALU01]> { let Latency = 1; }
////////////////////////////////////////////////////////////////////////////////
// Floating point. This covers both scalar and vector operations.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteRes<WriteFLD0, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLD1, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLDC, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFLoadX, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFLoadY, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 1, 2], 1>;
defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>;
defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteFMove, [JFPU01, JFPX], 1, [1, 1], 1>;
defm : X86WriteRes<WriteFMoveX, [JFPU01, JFPX], 1, [1, 1], 1>;
defm : X86WriteRes<WriteFMoveY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteRes<WriteEMMS, [JFPU01, JFPX], 2, [1, 1], 1>;
defm : JWriteResFpuPair<WriteFAdd, [JFPU0, JFPA], 3>;
defm : JWriteResFpuPair<WriteFAddX, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFAddY, [JFPU0, JFPA], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : JWriteResFpuPair<WriteFAdd64, [JFPU0, JFPA], 3>;
defm : JWriteResFpuPair<WriteFAdd64X, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFAdd64Y, [JFPU0, JFPA], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : JWriteResFpuPair<WriteFCmp, [JFPU0, JFPA], 2>;
defm : JWriteResFpuPair<WriteFCmpX, [JFPU0, JFPA], 2>;
defm : JWriteResYMMPair<WriteFCmpY, [JFPU0, JFPA], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : JWriteResFpuPair<WriteFCmp64, [JFPU0, JFPA], 2>;
defm : JWriteResFpuPair<WriteFCmp64X, [JFPU0, JFPA], 2>;
defm : JWriteResYMMPair<WriteFCmp64Y, [JFPU0, JFPA], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFMulX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFMulY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : JWriteResFpuPair<WriteFMul64, [JFPU1, JFPM], 4, [1,2]>;
defm : JWriteResFpuPair<WriteFMul64X, [JFPU1, JFPM], 4, [1,2]>;
defm : JWriteResYMMPair<WriteFMul64Y, [JFPU1, JFPM], 4, [2,4], 2>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : X86WriteResPairUnsupported<WriteFMA>;
defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : JWriteResFpuPair<WriteDPPD, [JFPU1, JFPM, JFPA], 9, [1, 3, 3], 3>;
defm : JWriteResFpuPair<WriteDPPS, [JFPU1, JFPM, JFPA], 11, [1, 3, 3], 5>;
defm : JWriteResYMMPair<WriteDPPSY, [JFPU1, JFPM, JFPA], 12, [2, 6, 6], 10>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRcpX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFRcpY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : JWriteResFpuPair<WriteFRsqrt, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRsqrtX, [JFPU1, JFPM], 2>;
defm : JWriteResYMMPair<WriteFRsqrtY, [JFPU1, JFPM], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResFpuPair<WriteFDivX, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResYMMPair<WriteFDivY, [JFPU1, JFPM], 38, [2, 38], 2>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
defm : JWriteResFpuPair<WriteFDiv64, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResFpuPair<WriteFDiv64X, [JFPU1, JFPM], 19, [1, 19]>;
defm : JWriteResYMMPair<WriteFDiv64Y, [JFPU1, JFPM], 38, [2, 38], 2>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>;
defm : JWriteResFpuPair<WriteFSqrtX, [JFPU1, JFPM], 21, [1, 21]>;
defm : JWriteResYMMPair<WriteFSqrtY, [JFPU1, JFPM], 42, [2, 42], 2>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
defm : JWriteResFpuPair<WriteFSqrt64, [JFPU1, JFPM], 27, [1, 27]>;
defm : JWriteResFpuPair<WriteFSqrt64X, [JFPU1, JFPM], 27, [1, 27]>;
defm : JWriteResYMMPair<WriteFSqrt64Y, [JFPU1, JFPM], 54, [2, 54], 2>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : JWriteResFpuPair<WriteFSqrt80, [JFPU1, JFPM], 35, [1, 35]>;
defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>;
defm : JWriteResFpuPair<WriteFRnd, [JFPU1, JSTC], 3>;
defm : JWriteResYMMPair<WriteFRndY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : JWriteResFpuPair<WriteFTest, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResYMMPair<WriteFTestY , [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFShuffleY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX], 3, [2, 6], 6>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
defm : JWriteResYMMPair<WriteFBlendY, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResYMMPair<WriteFVarBlendY, [JFPU01, JFPX], 3, [2, 6], 6>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
////////////////////////////////////////////////////////////////////////////////
// Conversions.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResFpuPair<WriteCvtSS2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
defm : JWriteResFpuPair<WriteCvtPS2I, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPS2IY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : JWriteResFpuPair<WriteCvtSD2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
defm : JWriteResFpuPair<WriteCvtPD2I, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPD2IY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
// FIXME: f+3 ST, LD+STC latency
defm : JWriteResFpuPair<WriteCvtI2SS, [JFPU1, JSTC], 9, [1,1], 2>;
defm : JWriteResFpuPair<WriteCvtI2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtI2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : JWriteResFpuPair<WriteCvtI2SD, [JFPU1, JSTC], 9, [1,1], 2>;
defm : JWriteResFpuPair<WriteCvtI2PD, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtI2PDY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : JWriteResFpuPair<WriteCvtSS2SD, [JFPU1, JSTC], 7, [1,2], 2>;
defm : JWriteResFpuPair<WriteCvtPS2PD, [JFPU1, JSTC], 2, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPS2PDY, [JFPU1, JSTC], 2, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : JWriteResFpuPair<WriteCvtSD2SS, [JFPU1, JSTC], 7, [1,2], 2>;
defm : JWriteResFpuPair<WriteCvtPD2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPD2PSY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
defm : JWriteResFpuPair<WriteCvtPH2PS, [JFPU1, JSTC], 3, [1,1], 1>;
defm : JWriteResYMMPair<WriteCvtPH2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteRes<WriteCvtPS2PH, [JFPU1, JSTC], 3, [1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHY, [JFPU1, JSTC, JFPX], 6, [2,2,2], 3>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteRes<WriteCvtPS2PHSt, [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
////////////////////////////////////////////////////////////////////////////////
// Vector integer operations.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteVecMove, [JFPU01, JVALU], 1, [1, 1], 1>;
defm : X86WriteRes<WriteVecMoveX, [JFPU01, JVALU], 1, [1, 1], 1>;
defm : X86WriteRes<WriteVecMoveY, [JFPU01, JVALU], 1, [2, 2], 2>;
defm : X86WriteRes<WriteVecMoveToGpr, [JFPU0, JFPA, JALU0], 4, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [JFPU01, JFPX], 8, [1, 1], 2>;
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecALUX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecALUY>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShiftX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : JWriteResFpuPair<WriteVecShiftImm, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : X86WriteResPairUnsupported<WriteVarVecShift>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteVecIMulX, [JFPU0, JVIMUL], 2>;
defm : X86WriteResPairUnsupported<WriteVecIMulY>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
defm : X86WriteResPairUnsupported<WritePMULLDY>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>;
defm : X86WriteResPairUnsupported<WriteMPSADY>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : JWriteResFpuPair<WritePSADBW, [JFPU01, JVALU], 2>;
defm : JWriteResFpuPair<WritePSADBWX, [JFPU01, JVALU], 2>;
defm : X86WriteResPairUnsupported<WritePSADBWY>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : JWriteResFpuPair<WritePHMINPOS, [JFPU0, JVALU], 2>;
defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteShuffleX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteShuffleY>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : JWriteResFpuPair<WriteVarShuffleX, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteBlendY>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WriteVecLogicX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicY>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : JWriteResFpuPair<WriteVecTest, [JFPU0, JFPA, JALU0], 3>;
defm : JWriteResYMMPair<WriteVecTestY, [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : X86WriteResPairUnsupported<WriteShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
////////////////////////////////////////////////////////////////////////////////
// Vector insert/extract operations.
////////////////////////////////////////////////////////////////////////////////
defm : X86WriteRes<WriteVecInsert, [JFPU01, JVALU], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecInsertLd, [JFPU01, JVALU, JLAGU], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteVecExtract, [JFPU0, JFPA, JALU0], 3, [1,1,1], 1>;
defm : X86WriteRes<WriteVecExtractSt, [JFPU1, JSTC, JSAGU], 3, [1,1,1], 1>;
////////////////////////////////////////////////////////////////////////////////
// SSE42 String instructions.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResFpuPair<WritePCmpIStrI, [JFPU1, JVALU1, JFPA, JALU0], 7, [1, 2, 1, 1], 3>;
defm : JWriteResFpuPair<WritePCmpIStrM, [JFPU1, JVALU1, JFPA, JALU0], 8, [1, 2, 1, 1], 3>;
defm : JWriteResFpuPair<WritePCmpEStrI, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
defm : JWriteResFpuPair<WritePCmpEStrM, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
////////////////////////////////////////////////////////////////////////////////
// MOVMSK Instructions.
////////////////////////////////////////////////////////////////////////////////
def : WriteRes<WriteFMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
def : WriteRes<WriteVecMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
def : WriteRes<WriteMMXMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
////////////////////////////////////////////////////////////////////////////////
// AES Instructions.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResFpuPair<WriteAESIMC, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteAESKeyGen, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU0, JVIMUL], 3, [1, 1], 2>;
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 3>;
defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 3, [2,2], 2>;
defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 1>;
defm : JWriteResFpuPair<WritePHAddX, [JFPU01, JVALU], 1>;
defm : X86WriteResPairUnsupported<WritePHAddY>;
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.
////////////////////////////////////////////////////////////////////////////////
defm : JWriteResFpuPair<WriteCLMul, [JFPU0, JVIMUL], 2>;
////////////////////////////////////////////////////////////////////////////////
// SSE4A instructions.
////////////////////////////////////////////////////////////////////////////////
def JWriteINSERTQ: SchedWriteRes<[JFPU01, JVALU]> {
let Latency = 2;
let ResourceCycles = [1, 4];
}
def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
////////////////////////////////////////////////////////////////////////////////
// AVX instructions.
////////////////////////////////////////////////////////////////////////////////
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [1, 2, 4];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
VBROADCASTSSYrm)>;
def JWriteJVZEROALL: SchedWriteRes<[]> {
let Latency = 90;
let NumMicroOps = 73;
}
def : InstRW<[JWriteJVZEROALL], (instrs VZEROALL)>;
def JWriteJVZEROUPPER: SchedWriteRes<[]> {
let Latency = 46;
let NumMicroOps = 37;
}
def : InstRW<[JWriteJVZEROUPPER], (instrs VZEROUPPER)>;
///////////////////////////////////////////////////////////////////////////////
// SchedWriteVariant definitions.
///////////////////////////////////////////////////////////////////////////////
def JWriteZeroLatency : SchedWriteRes<[]> {
let Latency = 0;
}
// Certain instructions that use the same register for both source
// operands do not have a real dependency on the previous contents of the
// register, and thus, do not have to wait before completing. They can be
// optimized out at register renaming stage.
// Reference: Section 10.8 of the "Software Optimization Guide for AMD Family
// 15h Processors".
// Reference: Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
// Section 21.8 [Dependency-breaking instructions].
def JWriteZeroIdiom : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteALU]>
]>;
def : InstRW<[JWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
XOR32rr, XOR64rr)>;
def JWriteFZeroIdiom : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]>
]>;
def : InstRW<[JWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, VXORPDrr,
ANDNPSrr, VANDNPSrr,
ANDNPDrr, VANDNPDrr)>;
def JWriteVZeroIdiomLogic : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]>
]>;
def : InstRW<[JWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
def JWriteVZeroIdiomLogicX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]>
]>;
def : InstRW<[JWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
PANDNrr, VPANDNrr)>;
def JWriteVZeroIdiomALU : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]>
]>;
def : InstRW<[JWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr,
MMX_PSUBQirr, MMX_PSUBWirr,
MMX_PCMPGTBirr, MMX_PCMPGTDirr,
MMX_PCMPGTWirr)>;
def JWriteVZeroIdiomALUX : SchedWriteVariant<[
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]>
]>;
def : InstRW<[JWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
PSUBDrr, VPSUBDrr,
PSUBQrr, VPSUBQrr,
PSUBWrr, VPSUBWrr,
PCMPGTBrr, VPCMPGTBrr,
PCMPGTDrr, VPCMPGTDrr,
PCMPGTQrr, VPCMPGTQrr,
PCMPGTWrr, VPCMPGTWrr)>;
// This write is used for slow LEA instructions.
def JWrite3OpsLEA : SchedWriteRes<[JALU1, JSAGU]> {
let Latency = 2;
}
// On Jaguar, a slow LEA is either a 3Ops LEA (base, index, offset), or an LEA
// with a `Scale` value different than 1.
def JSlowLEAPredicate : MCSchedPredicate<
CheckAny<[
// A 3-operand LEA (base, index, offset).
IsThreeOperandsLEAFn,
// An LEA with a "Scale" different than 1.
CheckAll<[
CheckIsImmOperand<2>,
CheckNot<CheckImmOperand<2, 1>>
]>
]>
>;
def JWriteLEA : SchedWriteVariant<[
SchedVar<JSlowLEAPredicate, [JWrite3OpsLEA]>,
SchedVar<MCSchedPredicate<TruePred>, [WriteLEA]>
]>;
def : InstRW<[JWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
def JSlowLEA16r : SchedWriteRes<[JALU01]> {
let Latency = 3;
let ResourceCycles = [4];
}
def : InstRW<[JSlowLEA16r], (instrs LEA16r)>;
} // SchedModel

View File

@@ -0,0 +1,486 @@
//=- X86ScheduleSLM.td - X86 Silvermont Scheduling -----------*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the machine model for Intel Silvermont to support
// instruction scheduling and other instruction cost heuristics.
//
//===----------------------------------------------------------------------===//
def SLMModel : SchedMachineModel {
// All x86 instructions are modeled as a single micro-op, and SLM can decode 2
// instructions per cycle.
let IssueWidth = 2;
let MicroOpBufferSize = 32; // Based on the reorder buffer.
let LoadLatency = 3;
let MispredictPenalty = 10;
let PostRAScheduler = 1;
// For small loops, expand by a small factor to hide the backedge cost.
let LoopMicroOpBufferSize = 10;
// FIXME: SSE4 is unimplemented. This flag is set to allow
// the scheduler to assign a default model to unrecognized opcodes.
let CompleteModel = 0;
}
let SchedModel = SLMModel in {
// Silvermont has 5 reservation stations for micro-ops
def SLM_IEC_RSV0 : ProcResource<1>;
def SLM_IEC_RSV1 : ProcResource<1>;
def SLM_FPC_RSV0 : ProcResource<1> { let BufferSize = 1; }
def SLM_FPC_RSV1 : ProcResource<1> { let BufferSize = 1; }
def SLM_MEC_RSV : ProcResource<1>;
// Many micro-ops are capable of issuing on multiple ports.
def SLM_IEC_RSV01 : ProcResGroup<[SLM_IEC_RSV0, SLM_IEC_RSV1]>;
def SLM_FPC_RSV01 : ProcResGroup<[SLM_FPC_RSV0, SLM_FPC_RSV1]>;
def SLMDivider : ProcResource<1>;
def SLMFPMultiplier : ProcResource<1>;
def SLMFPDivider : ProcResource<1>;
// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
// cycles after the memory operand.
def : ReadAdvance<ReadAfterLd, 3>;
// Many SchedWrites are defined in pairs with and without a folded load.
// Instructions with folded loads are usually micro-fused, so they only appear
// as two micro-ops when queued in the reservation station.
// This multiclass defines the resource usage for variants with and without
// folded loads.
multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [1], int UOps = 1,
int LoadLat = 3> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
let ResourceCycles = Res;
let NumMicroOps = UOps;
}
// Memory variant also uses a cycle on MEC_RSV and adds LoadLat cycles to
// the latency (default = 3).
def : WriteRes<SchedRW.Folded, !listconcat([SLM_MEC_RSV], ExePorts)> {
let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
let NumMicroOps = UOps;
}
}
// A folded store needs a cycle on MEC_RSV for the store data, but it does not
// need an extra port cycle to recompute the address.
def : WriteRes<WriteRMW, [SLM_MEC_RSV]>;
def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteStoreNT, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteMove, [SLM_IEC_RSV01]>;
def : WriteRes<WriteZero, []>;
// Load/store MXCSR.
// FIXME: These are probably wrong. They are copy pasted from WriteStore/Load.
def : WriteRes<WriteSTMXCSR, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteLDMXCSR, [SLM_MEC_RSV]> { let Latency = 3; }
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteADC, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
defm : X86WriteRes<WriteSHDrri, [SLM_IEC_RSV0], 1, [1], 1>;
defm : X86WriteRes<WriteSHDrrcl,[SLM_IEC_RSV0], 1, [1], 1>;
defm : X86WriteRes<WriteSHDmri, [SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
defm : X86WriteRes<WriteSHDmrcl,[SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>;
defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteCMOV, [SLM_IEC_RSV01], 2, [2]>;
defm : SLMWriteResPair<WriteCMOV2, [SLM_IEC_RSV01], 2, [2]>;
defm : X86WriteRes<WriteFCMOV, [SLM_FPC_RSV1], 3, [1], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [SLM_IEC_RSV01]>;
def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
// FIXME Latency and NumMicrOps?
let ResourceCycles = [2,1];
}
def : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
def : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
// the port to read all inputs. We don't model that.
def : WriteRes<WriteLEA, [SLM_IEC_RSV1]>;
// Bit counts.
defm : SLMWriteResPair<WriteBSF, [SLM_IEC_RSV01], 10, [20], 10>;
defm : SLMWriteResPair<WriteBSR, [SLM_IEC_RSV01], 10, [20], 10>;
defm : SLMWriteResPair<WriteLZCNT, [SLM_IEC_RSV0], 3>;
defm : SLMWriteResPair<WriteTZCNT, [SLM_IEC_RSV0], 3>;
defm : SLMWriteResPair<WritePOPCNT, [SLM_IEC_RSV0], 3>;
// BMI1 BEXTR, BMI2 BZHI
defm : X86WriteResPairUnsupported<WriteBEXTR>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
defm : SLMWriteResPair<WriteDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteIDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteIDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteIDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
defm : SLMWriteResPair<WriteIDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
// Scalar and vector floating point.
defm : X86WriteRes<WriteFLD0, [SLM_FPC_RSV01], 1, [1], 1>;
defm : X86WriteRes<WriteFLD1, [SLM_FPC_RSV01], 1, [1], 1>;
defm : X86WriteRes<WriteFLDC, [SLM_FPC_RSV01], 1, [2], 2>;
def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFLoadX, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNT, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNTX, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNTY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteFMoveX, [SLM_FPC_RSV01]>;
def : WriteRes<WriteFMoveY, [SLM_FPC_RSV01]>;
defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : SLMWriteResPair<WriteFAdd64, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAdd64X, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAdd64Y, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmpX, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmpY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : SLMWriteResPair<WriteFCmp64, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmp64X, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmp64Y, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMulX, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : SLMWriteResPair<WriteFMul64, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMul64X, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : SLMWriteResPair<WriteFMul64Y, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
defm : SLMWriteResPair<WriteFDiv64, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
defm : SLMWriteResPair<WriteFDiv64Y, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 5>;
defm : SLMWriteResPair<WriteFRsqrtY, [SLM_FPC_RSV0], 5>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20], 1, 3>;
defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35], 1, 3>;
defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFRndY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
defm : SLMWriteResPair<WriteFTest, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFTestY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFShuffleY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
// Conversion between integer and float.
defm : SLMWriteResPair<WriteCvtSS2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2IY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
defm : SLMWriteResPair<WriteCvtSD2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2I, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2IY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : SLMWriteResPair<WriteCvtI2SS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PSY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : SLMWriteResPair<WriteCvtI2SD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtI2PDY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : SLMWriteResPair<WriteCvtSS2SD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2PD, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : SLMWriteResPair<WriteCvtSD2SS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2PS, [SLM_FPC_RSV01], 4>;
defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV01], 4>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
// Vector integer operations.
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadX, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadNT, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadNTY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreNT, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreNTY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveX, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveY, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveToGpr, [SLM_IEC_RSV01]>;
def : WriteRes<WriteVecMoveFromGpr, [SLM_IEC_RSV01]>;
defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : SLMWriteResPair<WriteVecShiftImm, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftImmX,[SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVecShiftImmY,[SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicX,[SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : SLMWriteResPair<WriteVecTest, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecTestY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 4>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
// FIXME: The below is closer to correct, but caused some perf regressions.
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0], 4>;
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : SLMWriteResPair<WriteShuffleX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0], 7>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0], 4>;
defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0], 4>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : SLMWriteResPair<WritePHMINPOS, [SLM_FPC_RSV0], 4>;
// Vector insert/extract operations.
defm : SLMWriteResPair<WriteVecInsert, [SLM_FPC_RSV0], 1>;
def : WriteRes<WriteVecExtract, [SLM_FPC_RSV0]>;
def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 4;
let NumMicroOps = 2;
let ResourceCycles = [1, 2];
}
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>;
defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>;
defm : X86WriteResPairUnsupported<WriteFHAddZ>;
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WritePHAddZ>;
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
def : WriteRes<WritePCmpIStrM, [SLM_FPC_RSV0]> {
let Latency = 13;
let ResourceCycles = [13];
}
def : WriteRes<WritePCmpIStrMLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 13;
let ResourceCycles = [13, 1];
}
// Packed Compare Explicit Length Strings, Return Mask
def : WriteRes<WritePCmpEStrM, [SLM_FPC_RSV0]> {
let Latency = 17;
let ResourceCycles = [17];
}
def : WriteRes<WritePCmpEStrMLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 17;
let ResourceCycles = [17, 1];
}
// Packed Compare Implicit Length Strings, Return Index
def : WriteRes<WritePCmpIStrI, [SLM_FPC_RSV0]> {
let Latency = 17;
let ResourceCycles = [17];
}
def : WriteRes<WritePCmpIStrILd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 17;
let ResourceCycles = [17, 1];
}
// Packed Compare Explicit Length Strings, Return Index
def : WriteRes<WritePCmpEStrI, [SLM_FPC_RSV0]> {
let Latency = 21;
let ResourceCycles = [21];
}
def : WriteRes<WritePCmpEStrILd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 21;
let ResourceCycles = [21, 1];
}
// MOVMSK Instructions.
def : WriteRes<WriteFMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
def : WriteRes<WriteVecMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
def : WriteRes<WriteVecMOVMSKY, [SLM_FPC_RSV1]> { let Latency = 4; }
def : WriteRes<WriteMMXMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
// AES Instructions.
def : WriteRes<WriteAESDecEnc, [SLM_FPC_RSV0]> {
let Latency = 8;
let ResourceCycles = [5];
}
def : WriteRes<WriteAESDecEncLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 8;
let ResourceCycles = [5, 1];
}
def : WriteRes<WriteAESIMC, [SLM_FPC_RSV0]> {
let Latency = 8;
let ResourceCycles = [5];
}
def : WriteRes<WriteAESIMCLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 8;
let ResourceCycles = [5, 1];
}
def : WriteRes<WriteAESKeyGen, [SLM_FPC_RSV0]> {
let Latency = 8;
let ResourceCycles = [5];
}
def : WriteRes<WriteAESKeyGenLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 8;
let ResourceCycles = [5, 1];
}
// Carry-less multiplication instructions.
def : WriteRes<WriteCLMul, [SLM_FPC_RSV0]> {
let Latency = 10;
let ResourceCycles = [10];
}
def : WriteRes<WriteCLMulLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 10;
let ResourceCycles = [10, 1];
}
def : WriteRes<WriteSystem, [SLM_FPC_RSV0]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [SLM_FPC_RSV0]> { let Latency = 100; }
def : WriteRes<WriteFence, [SLM_MEC_RSV]>;
def : WriteRes<WriteNop, []>;
// AVX/FMA is not supported on that architecture, but we should define the basic
// scheduling resources anyway.
def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
defm : X86WriteResPairUnsupported<WriteFBlendY>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
defm : X86WriteResPairUnsupported<WriteShuffle256>;
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
defm : SLMWriteResPair<WriteVarVecShift, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
defm : X86WriteResPairUnsupported<WriteFMA>;
defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PH>;
defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
} // SchedModel

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,459 @@
//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This is a target description file for the Intel i386 architecture, referred
// to here as the "X86" architecture.
//
//===----------------------------------------------------------------------===//
// Get the target-independent interfaces which we are implementing...
//
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// X86 Subtarget state
//
def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
"64-bit mode (x86_64)">;
def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
"32-bit mode (80386)">;
def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
"16-bit mode (i8086)">;
//===----------------------------------------------------------------------===//
// X86 Subtarget features
//===----------------------------------------------------------------------===//
def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
"Enable X87 float instructions">;
def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true",
"Enable NOPL instruction">;
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
"Enable conditional move instructions">;
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
"Support POPCNT instruction">;
def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true",
"Support fxsave/fxrestore instructions">;
def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true",
"Support xsave instructions">;
def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
"Support xsaveopt instructions">;
def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
"Support xsavec instructions">;
def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
"Support xsaves instructions">;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
"Enable SSE instructions",
// SSE codegen depends on cmovs, and all
// SSE1+ processors support them.
[FeatureCMOV]>;
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
"Enable SSE2 instructions",
[FeatureSSE1]>;
def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
"Enable SSE3 instructions",
[FeatureSSE2]>;
def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
"Enable SSSE3 instructions",
[FeatureSSE3]>;
def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
"Enable SSE 4.1 instructions",
[FeatureSSSE3]>;
def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
[FeatureSSE41]>;
// The MMX subtarget feature is separate from the rest of the SSE features
// because it's important (for odd compatibility reasons) to be able to
// turn it off explicitly while allowing SSE+ to be on.
def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
"Enable MMX instructions">;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
"Enable 3DNow! instructions",
[FeatureMMX]>;
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
"Enable 3DNow! Athlon instructions",
[Feature3DNow]>;
// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
// without disabling 64-bit mode.
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions",
[FeatureCMOV]>;
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
"64-bit with cmpxchg16b",
[Feature64Bit]>;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
"PMULLD instruction is slow">;
// FIXME: This should not apply to CPUs that do not have SSE.
def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
"IsUAMem16Slow", "true",
"Slow unaligned 16-byte memory access">;
def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
"IsUAMem32Slow", "true",
"Slow unaligned 32-byte memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions",
[FeatureSSE3]>;
def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
"Enable AVX instructions",
[FeatureSSE42]>;
def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
"Enable AVX2 instructions",
[FeatureAVX]>;
def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
"Enable three-operand fused multiple-add",
[FeatureAVX]>;
def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
"Support 16-bit floating point conversion instructions",
[FeatureAVX]>;
def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
"Enable AVX-512 instructions",
[FeatureAVX2, FeatureFMA, FeatureF16C]>;
def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
"Enable AVX-512 Exponential and Reciprocal Instructions",
[FeatureAVX512]>;
def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
"Enable AVX-512 Conflict Detection Instructions",
[FeatureAVX512]>;
def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
"true", "Enable AVX-512 Population Count Instructions",
[FeatureAVX512]>;
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>;
def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
"true",
"Prefetch with Intent to Write and T1 Hint">;
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
"Enable AVX-512 Doubleword and Quadword Instructions",
[FeatureAVX512]>;
def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
"Enable AVX-512 Byte and Word Instructions",
[FeatureAVX512]>;
def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
"Enable AVX-512 Vector Length eXtensions",
[FeatureAVX512]>;
def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
"Enable AVX-512 Vector Byte Manipulation Instructions",
[FeatureBWI]>;
def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
"Enable AVX-512 further Vector Byte Manipulation Instructions",
[FeatureBWI]>;
def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
"Enable AVX-512 Integer Fused Multiple-Add",
[FeatureAVX512]>;
def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
"Enable protection keys">;
def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
"Enable AVX-512 Vector Neural Network Instructions",
[FeatureAVX512]>;
def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
"Enable AVX-512 Bit Algorithms",
[FeatureBWI]>;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true",
"Enable Galois Field Arithmetic Instructions",
[FeatureSSE2]>;
def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
"Enable vpclmulqdq instructions",
[FeatureAVX, FeaturePCLMUL]>;
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
"Enable four-operand fused multiple-add",
[FeatureAVX, FeatureSSE4A]>;
def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
"Enable XOP instructions",
[FeatureFMA4]>;
def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
"HasSSEUnalignedMem", "true",
"Allow unaligned memory operands with SSE instructions">;
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
"Enable AES instructions",
[FeatureSSE2]>;
def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true",
"Promote selected AES instructions to AVX512/AVX registers",
[FeatureAVX, FeatureAES]>;
def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true",
"Enable TBM instructions">;
def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true",
"Enable LWP instructions">;
def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
"Support MOVBE instruction">;
def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
"Support RDRAND instruction">;
def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
"Support FS/GS Base instructions">;
def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
"Support LZCNT instruction">;
def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
"Support BMI instructions">;
def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
"Support BMI2 instructions">;
def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
"Support RTM instructions">;
def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
"Support ADX instructions">;
def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
"Enable SHA instructions",
[FeatureSSE2]>;
def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
"Support CET Shadow-Stack instructions">;
def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
"Support RDSEED instruction">;
def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
"Support LAHF and SAHF instructions">;
def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
"Enable MONITORX/MWAITX timer functionality">;
def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
"Enable Cache Line Zero">;
def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
"Enable Cache Demote">;
def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
"Support ptwrite instruction">;
def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
"Support MPX instructions">;
def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
"HasSlowDivide32", "true",
"Use 8-bit divide for positive values less than 256">;
def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
"HasSlowDivide64", "true",
"Use 32-bit divide for positive values less than 2^32">;
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
"Invalidate Process-Context Identifier">;
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
"Enable Software Guard Extensions">;
def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
"Flush A Cache Line Optimized">;
def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
"Cache Line Write Back">;
def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
"Write Back No Invalidate">;
def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
"Support RDPID instructions">;
def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
"Wait and pause enhancements">;
// On some processors, instructions that implicitly take two memory operands are
// slow. In practice, this means that CALL, PUSH, and POP with memory operands
// should be avoided in favor of a MOV + register CALL/PUSH/POP.
def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
"SlowTwoMemOps", "true",
"Two memory operand instructions are slow">;
def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
"LEA instruction with 3 ops or certain registers is slow">;
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
def FeatureSoftFloat
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
"Use software floating point features.">;
def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
"HasPOPCNTFalseDeps", "true",
"POPCNT has a false dependency on dest register">;
def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
"HasLZCNTFalseDeps", "true",
"LZCNT/TZCNT have a false dependency on dest register">;
def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
"platform configuration instruction">;
// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
// using a variable mask over multiple fixed shuffles.
def FeatureFastVariableShuffle
: SubtargetFeature<"fast-variable-shuffle",
"HasFastVariableShuffle",
"true", "Shuffles with variable masks are fast">;
// On some X86 processors, there is no performance hazard to writing only the
// lower parts of a YMM or ZMM register without clearing the upper part.
def FeatureFastPartialYMMorZMMWrite
: SubtargetFeature<"fast-partial-ymm-or-zmm-write",
"HasFastPartialYMMorZMMWrite",
"true", "Partial writes to YMM/ZMM registers are fast">;
// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
// vector FSQRT has higher throughput than the corresponding NR code.
// The idea is that throughput bound code is likely to be vectorized, so for
// vectorized code we should care about the throughput of SQRT operations.
// But if the code is scalar that probably means that the code has some kind of
// dependency and we should care more about reducing the latency.
def FeatureFastScalarFSQRT
: SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
"true", "Scalar SQRT is fast (disable Newton-Raphson)">;
def FeatureFastVectorFSQRT
: SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
"true", "Vector SQRT is fast (disable Newton-Raphson)">;
// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
// be used to replace test/set sequences.
def FeatureFastLZCNT
: SubtargetFeature<
"fast-lzcnt", "HasFastLZCNT", "true",
"LZCNT instructions are as fast as most simple integer ops">;
// If the target can efficiently decode NOPs upto 11-bytes in length.
def FeatureFast11ByteNOP
: SubtargetFeature<
"fast-11bytenop", "HasFast11ByteNOP", "true",
"Target can quickly decode up to 11 byte NOPs">;
// If the target can efficiently decode NOPs upto 15-bytes in length.
def FeatureFast15ByteNOP
: SubtargetFeature<
"fast-15bytenop", "HasFast15ByteNOP", "true",
"Target can quickly decode up to 15 byte NOPs">;
// Sandy Bridge and newer processors can use SHLD with the same source on both
// inputs to implement rotate to avoid the partial flag update of the normal
// rotate instructions.
def FeatureFastSHLDRotate
: SubtargetFeature<
"fast-shld-rotate", "HasFastSHLDRotate", "true",
"SHLD can be used as a faster rotate">;
// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
// "string operations"). See "REP String Enhancement" in the Intel Software
// Development Manual. This feature essentially means that REP MOVSB will copy
// using the largest available size instead of copying bytes one by one, making
// it at least as fast as REPMOVS{W,D,Q}.
def FeatureERMSB
: SubtargetFeature<
"ermsb", "HasERMSB", "true",
"REP MOVS/STOS are fast">;
// Sandy Bridge and newer processors have many instructions that can be
// fused with conditional branches and pass through the CPU as a single
// operation.
def FeatureMacroFusion
: SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
"Various instructions can be fused with conditional branches">;
// Gather is available since Haswell (AVX2 set). So technically, we can
// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
// Skylake Client processor has faster Gathers than HSW and performance is
// similar to Skylake Server (AVX-512).
def FeatureHasFastGather
: SubtargetFeature<"fast-gather", "HasFastGather", "true",
"Indicates if gather is reasonably fast.">;
def FeaturePrefer256Bit
: SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
"Prefer 256-bit AVX instructions">;
// Enable mitigation of some aspects of speculative execution related
// vulnerabilities by removing speculatable indirect branches. This disables
// jump-table formation, rewrites explicit `indirectbr` instructions into
// `switch` instructions, and uses a special construct called a "retpoline" to
// prevent speculation of the remaining indirect branches (indirect calls and
// tail calls).
def FeatureRetpoline
: SubtargetFeature<"retpoline", "UseRetpoline", "true",
"Remove speculation of indirect branches from the "
"generated code, either by avoiding them entirely or "
"lowering them with a speculation blocking construct.">;
// Rely on external thunks for the emitted retpoline calls. This allows users
// to provide their own custom thunk definitions in highly specialized
// environments such as a kernel that does boot-time hot patching.
def FeatureRetpolineExternalThunk
: SubtargetFeature<
"retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
"Enable retpoline, but with an externally provided thunk.",
[FeatureRetpoline]>;
// Direct Move instructions.
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
"Support movdiri instruction">;
def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
"Support movdir64b instruction">;
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
include "X86RegisterInfo.td"
include "X86RegisterBanks.td"
//===----------------------------------------------------------------------===//
// Instruction Descriptions
//===----------------------------------------------------------------------===//
include "X86Schedule.td"
include "X86InstrInfo_reduce.td"
def X86InstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
// Assembly Parser
//===----------------------------------------------------------------------===//
def ATTAsmParserVariant : AsmParserVariant {
int Variant = 0;
// Variant name.
string Name = "att";
// Discard comments in assembly strings.
string CommentDelimiter = "#";
// Recognize hard coded registers.
string RegisterPrefix = "%";
}
def IntelAsmParserVariant : AsmParserVariant {
int Variant = 1;
// Variant name.
string Name = "intel";
// Discard comments in assembly strings.
string CommentDelimiter = ";";
// Recognize hard coded registers.
string RegisterPrefix = "";
}
//===----------------------------------------------------------------------===//
// Assembly Printers
//===----------------------------------------------------------------------===//
// The X86 target supports two different syntaxes for emitting machine code.
// This is controlled by the -x86-asm-syntax={att|intel}
def ATTAsmWriter : AsmWriter {
string AsmWriterClassName = "ATTInstPrinter";
int Variant = 0;
}
def IntelAsmWriter : AsmWriter {
string AsmWriterClassName = "IntelInstPrinter";
int Variant = 1;
}
def X86 : Target {
// Information about the instructions...
let InstructionSet = X86InstrInfo;
let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
let AllowRegisterRenaming = 1;
}