mirror of
https://github.com/hedge-dev/XenonRecomp.git
synced 2025-12-19 04:47:22 +00:00
Add missing thirdparty files
This commit is contained in:
1203
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86.td
vendored
Normal file
1203
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1150
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86CallingConv.td
vendored
Normal file
1150
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86CallingConv.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
7
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86Capstone.td
vendored
Normal file
7
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86Capstone.td
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
// Capstone definitions fix for X86 LLVM instructions.
|
||||
|
||||
let Defs = [EFLAGS] in
|
||||
def INT1 : I<0xf1, RawFrm, (outs), (ins), "int1", []>;
|
||||
|
||||
// def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", [], IIC_FNCLEX>;
|
||||
def FSETPM : I<0xDB, MRM_E4, (outs), (ins), "fsetpm", []>;
|
||||
103
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86CapstoneFull.td
vendored
Normal file
103
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86CapstoneFull.td
vendored
Normal file
@@ -0,0 +1,103 @@
|
||||
// Capstone definitions fix for X86 LLVM instructions.
|
||||
|
||||
let Defs = [EFLAGS] in
|
||||
def INT1 : I<0xf1, RawFrm, (outs), (ins), "int1", []>;
|
||||
|
||||
def FSETPM : I<0xDB, MRM_E4, (outs), (ins), "fsetpm", []>;
|
||||
|
||||
// Capstone: comment out below lines for X86 Reduce mode
|
||||
|
||||
/*
|
||||
// X87 Floating Point Stack.
|
||||
include "X86InstrFPStack.td"
|
||||
|
||||
// SIMD support (SSE, MMX and AVX)
|
||||
include "X86InstrFragmentsSIMD.td"
|
||||
|
||||
// FMA - Fused Multiply-Add support (requires FMA)
|
||||
include "X86InstrFMA.td"
|
||||
|
||||
// XOP
|
||||
include "X86InstrXOP.td"
|
||||
|
||||
// SSE, MMX and 3DNow! vector support.
|
||||
include "X86InstrSSE.td"
|
||||
include "X86InstrAVX512.td"
|
||||
include "X86InstrMMX.td"
|
||||
include "X86Instr3DNow.td"
|
||||
|
||||
// MPX instructions
|
||||
include "X86InstrMPX.td"
|
||||
|
||||
//include "X86InstrTSX.td"
|
||||
include "X86InstrSGX.td"
|
||||
|
||||
// Various unary fpstack operations default to operating on ST1.
|
||||
// For example, "fxch" -> "fxch %st(1)"
|
||||
def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
|
||||
def: InstAlias<"fadd", (ADD_FPrST0 ST1), 0>;
|
||||
def : InstAlias<"fsub{|r}p", (SUBR_FPrST0 ST1), 0>;
|
||||
def : InstAlias<"fsub{r|}p", (SUB_FPrST0 ST1), 0>;
|
||||
def : InstAlias<"fmul", (MUL_FPrST0 ST1), 0>;
|
||||
def : InstAlias<"fmulp", (MUL_FPrST0 ST1), 0>;
|
||||
def : InstAlias<"fdiv{|r}p", (DIVR_FPrST0 ST1), 0>;
|
||||
def : InstAlias<"fdiv{r|}p", (DIV_FPrST0 ST1), 0>;
|
||||
def : InstAlias<"fxch", (XCH_F ST1), 0>;
|
||||
def : InstAlias<"fcom", (COM_FST0r ST1), 0>;
|
||||
def : InstAlias<"fcomp", (COMP_FST0r ST1), 0>;
|
||||
def : InstAlias<"fcomi", (COM_FIr ST1), 0>;
|
||||
def : InstAlias<"fcompi", (COM_FIPr ST1), 0>;
|
||||
def : InstAlias<"fucom", (UCOM_Fr ST1), 0>;
|
||||
def : InstAlias<"fucomp", (UCOM_FPr ST1), 0>;
|
||||
def : InstAlias<"fucomi", (UCOM_FIr ST1), 0>;
|
||||
def : InstAlias<"fucompi", (UCOM_FIPr ST1), 0>;
|
||||
|
||||
// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
|
||||
// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
|
||||
// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
|
||||
// gas.
|
||||
multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
|
||||
def : InstAlias<!strconcat(Mnemonic, "\t{$op, %st(0)|st(0), $op}"),
|
||||
(Inst RST:$op), EmitAlias>;
|
||||
def : InstAlias<!strconcat(Mnemonic, "\t{%st(0), %st(0)|st(0), st(0)}"),
|
||||
(Inst ST0), EmitAlias>;
|
||||
}
|
||||
|
||||
defm : FpUnaryAlias<"fadd", ADD_FST0r>;
|
||||
defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>;
|
||||
defm : FpUnaryAlias<"fsub", SUB_FST0r>;
|
||||
defm : FpUnaryAlias<"fsub{|r}p", SUBR_FPrST0>;
|
||||
defm : FpUnaryAlias<"fsubr", SUBR_FST0r>;
|
||||
defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0>;
|
||||
defm : FpUnaryAlias<"fmul", MUL_FST0r>;
|
||||
defm : FpUnaryAlias<"fmulp", MUL_FPrST0>;
|
||||
defm : FpUnaryAlias<"fdiv", DIV_FST0r>;
|
||||
defm : FpUnaryAlias<"fdiv{|r}p", DIVR_FPrST0>;
|
||||
defm : FpUnaryAlias<"fdivr", DIVR_FST0r>;
|
||||
defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0>;
|
||||
defm : FpUnaryAlias<"fcomi", COM_FIr, 0>;
|
||||
defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>;
|
||||
defm : FpUnaryAlias<"fcompi", COM_FIPr>;
|
||||
defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
|
||||
|
||||
|
||||
// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
|
||||
// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
|
||||
// solely because gas supports it.
|
||||
def : InstAlias<"faddp\t{%st(0), $op|$op, st(0)}", (ADD_FPrST0 RST:$op), 0>;
|
||||
def : InstAlias<"fmulp\t{%st(0), $op|$op, st(0)}", (MUL_FPrST0 RST:$op)>;
|
||||
def : InstAlias<"fsub{|r}p\t{%st(0), $op|$op, st(0)}", (SUBR_FPrST0 RST:$op)>;
|
||||
def : InstAlias<"fsub{r|}p\t{%st(0), $op|$op, st(0)}", (SUB_FPrST0 RST:$op)>;
|
||||
def : InstAlias<"fdiv{|r}p\t{%st(0), $op|$op, st(0)}", (DIVR_FPrST0 RST:$op)>;
|
||||
def : InstAlias<"fdiv{r|}p\t{%st(0), $op|$op, st(0)}", (DIV_FPrST0 RST:$op)>;
|
||||
|
||||
def : InstAlias<"fnstsw" , (FNSTSW16r), 0>;
|
||||
|
||||
// Match 'movd GR64, MMX' as an alias for movq to be compatible with gas,
|
||||
// which supports this due to an old AMD documentation bug when 64-bit mode was
|
||||
// created.
|
||||
def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
|
||||
(MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
|
||||
def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
|
||||
(MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
|
||||
*/
|
||||
101
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86CapstoneReduce.td
vendored
Normal file
101
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86CapstoneReduce.td
vendored
Normal file
@@ -0,0 +1,101 @@
|
||||
// Capstone definitions fix for X86 LLVM instructions.
|
||||
|
||||
let Defs = [EFLAGS] in
|
||||
def INT1 : I<0xf1, RawFrm, (outs), (ins), "int1", []>;
|
||||
|
||||
def FSETPM : I<0xDB, MRM_E4, (outs), (ins), "fsetpm", []>;
|
||||
|
||||
// Capstone: comment out below lines for X86 Reduce mode
|
||||
|
||||
// X87 Floating Point Stack.
|
||||
//include "X86InstrFPStack.td"
|
||||
|
||||
// SIMD support (SSE, MMX and AVX)
|
||||
//include "X86InstrFragmentsSIMD.td"
|
||||
|
||||
// FMA - Fused Multiply-Add support (requires FMA)
|
||||
//include "X86InstrFMA.td"
|
||||
|
||||
// XOP
|
||||
//include "X86InstrXOP.td"
|
||||
|
||||
// SSE, MMX and 3DNow! vector support.
|
||||
//include "X86InstrSSE.td"
|
||||
//include "X86InstrAVX512.td"
|
||||
//include "X86InstrMMX.td"
|
||||
//include "X86Instr3DNow.td"
|
||||
|
||||
// MPX instructions
|
||||
//include "X86InstrMPX.td"
|
||||
|
||||
//include "X86InstrTSX.td"
|
||||
//include "X86InstrSGX.td"
|
||||
|
||||
// Various unary fpstack operations default to operating on ST1.
|
||||
// For example, "fxch" -> "fxch %st(1)"
|
||||
//def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
|
||||
//def: InstAlias<"fadd", (ADD_FPrST0 ST1), 0>;
|
||||
//def : InstAlias<"fsub{|r}p", (SUBR_FPrST0 ST1), 0>;
|
||||
//def : InstAlias<"fsub{r|}p", (SUB_FPrST0 ST1), 0>;
|
||||
//def : InstAlias<"fmul", (MUL_FPrST0 ST1), 0>;
|
||||
//def : InstAlias<"fmulp", (MUL_FPrST0 ST1), 0>;
|
||||
//def : InstAlias<"fdiv{|r}p", (DIVR_FPrST0 ST1), 0>;
|
||||
//def : InstAlias<"fdiv{r|}p", (DIV_FPrST0 ST1), 0>;
|
||||
//def : InstAlias<"fxch", (XCH_F ST1), 0>;
|
||||
//def : InstAlias<"fcom", (COM_FST0r ST1), 0>;
|
||||
//def : InstAlias<"fcomp", (COMP_FST0r ST1), 0>;
|
||||
//def : InstAlias<"fcomi", (COM_FIr ST1), 0>;
|
||||
//def : InstAlias<"fcompi", (COM_FIPr ST1), 0>;
|
||||
//def : InstAlias<"fucom", (UCOM_Fr ST1), 0>;
|
||||
//def : InstAlias<"fucomp", (UCOM_FPr ST1), 0>;
|
||||
//def : InstAlias<"fucomi", (UCOM_FIr ST1), 0>;
|
||||
//def : InstAlias<"fucompi", (UCOM_FIPr ST1), 0>;
|
||||
|
||||
// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
|
||||
// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
|
||||
// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
|
||||
// gas.
|
||||
multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
|
||||
def : InstAlias<!strconcat(Mnemonic, "\t{$op, %st(0)|st(0), $op}"),
|
||||
(Inst RST:$op), EmitAlias>;
|
||||
def : InstAlias<!strconcat(Mnemonic, "\t{%st(0), %st(0)|st(0), st(0)}"),
|
||||
(Inst ST0), EmitAlias>;
|
||||
}
|
||||
|
||||
//defm : FpUnaryAlias<"fadd", ADD_FST0r>;
|
||||
//defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>;
|
||||
//defm : FpUnaryAlias<"fsub", SUB_FST0r>;
|
||||
//defm : FpUnaryAlias<"fsub{|r}p", SUBR_FPrST0>;
|
||||
//defm : FpUnaryAlias<"fsubr", SUBR_FST0r>;
|
||||
//defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0>;
|
||||
//defm : FpUnaryAlias<"fmul", MUL_FST0r>;
|
||||
//defm : FpUnaryAlias<"fmulp", MUL_FPrST0>;
|
||||
//defm : FpUnaryAlias<"fdiv", DIV_FST0r>;
|
||||
//defm : FpUnaryAlias<"fdiv{|r}p", DIVR_FPrST0>;
|
||||
//defm : FpUnaryAlias<"fdivr", DIVR_FST0r>;
|
||||
//defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0>;
|
||||
//defm : FpUnaryAlias<"fcomi", COM_FIr, 0>;
|
||||
//defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>;
|
||||
//defm : FpUnaryAlias<"fcompi", COM_FIPr>;
|
||||
//defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
|
||||
|
||||
|
||||
// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
|
||||
// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
|
||||
// solely because gas supports it.
|
||||
//def : InstAlias<"faddp\t{%st(0), $op|$op, st(0)}", (ADD_FPrST0 RST:$op), 0>;
|
||||
//def : InstAlias<"fmulp\t{%st(0), $op|$op, st(0)}", (MUL_FPrST0 RST:$op)>;
|
||||
//def : InstAlias<"fsub{|r}p\t{%st(0), $op|$op, st(0)}", (SUBR_FPrST0 RST:$op)>;
|
||||
//def : InstAlias<"fsub{r|}p\t{%st(0), $op|$op, st(0)}", (SUB_FPrST0 RST:$op)>;
|
||||
//def : InstAlias<"fdiv{|r}p\t{%st(0), $op|$op, st(0)}", (DIVR_FPrST0 RST:$op)>;
|
||||
//def : InstAlias<"fdiv{r|}p\t{%st(0), $op|$op, st(0)}", (DIV_FPrST0 RST:$op)>;
|
||||
//
|
||||
//def : InstAlias<"fnstsw" , (FNSTSW16r), 0>;
|
||||
|
||||
// Match 'movd GR64, MMX' as an alias for movq to be compatible with gas,
|
||||
// which supports this due to an old AMD documentation bug when 64-bit mode was
|
||||
// created.
|
||||
//def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
|
||||
// (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
|
||||
//def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
|
||||
// (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
|
||||
111
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86Instr3DNow.td
vendored
Normal file
111
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86Instr3DNow.td
vendored
Normal file
@@ -0,0 +1,111 @@
|
||||
//===-- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the 3DNow! instruction set, which extends MMX to support
|
||||
// floating point and also adds a few more random instructions for good measure.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
|
||||
: I<o, F, outs, ins, asm, pat>, Requires<[Has3DNow]> {
|
||||
}
|
||||
|
||||
class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
|
||||
: I3DNow<o, F, (outs VR64:$dst), ins,
|
||||
!strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>, ThreeDNow {
|
||||
let Constraints = "$src1 = $dst";
|
||||
}
|
||||
|
||||
class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
|
||||
: I3DNow<o, F, (outs VR64:$dst), ins,
|
||||
!strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>, ThreeDNow;
|
||||
|
||||
multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn,
|
||||
X86FoldableSchedWrite sched, bit Commutable = 0,
|
||||
string Ver = ""> {
|
||||
let isCommutable = Commutable in
|
||||
def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
|
||||
[(set VR64:$dst, (!cast<Intrinsic>(
|
||||
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>,
|
||||
Sched<[sched]>;
|
||||
def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
|
||||
[(set VR64:$dst, (!cast<Intrinsic>(
|
||||
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
|
||||
(bitconvert (load_mmx addr:$src2))))]>,
|
||||
Sched<[sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn,
|
||||
X86FoldableSchedWrite sched, string Ver = ""> {
|
||||
def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
|
||||
[(set VR64:$dst, (!cast<Intrinsic>(
|
||||
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>,
|
||||
Sched<[sched]>;
|
||||
def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
|
||||
[(set VR64:$dst, (!cast<Intrinsic>(
|
||||
!strconcat("int_x86_3dnow", Ver, "_", Mn))
|
||||
(bitconvert (load_mmx addr:$src))))]>,
|
||||
Sched<[sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>;
|
||||
defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id", WriteCvtPS2I>;
|
||||
defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc", WriteFAdd>;
|
||||
defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd", WriteFAdd, 1>;
|
||||
defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq", WriteFAdd, 1>;
|
||||
defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge", WriteFAdd>;
|
||||
defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt", WriteFAdd>;
|
||||
defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax", WriteFAdd>;
|
||||
defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin", WriteFAdd>;
|
||||
defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul", WriteFAdd, 1>;
|
||||
defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp", WriteFAdd>;
|
||||
defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1", WriteFAdd>;
|
||||
defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2", WriteFAdd>;
|
||||
defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1", WriteFAdd>;
|
||||
defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt", WriteFAdd>;
|
||||
defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub", WriteFAdd, 1>;
|
||||
defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr", WriteFAdd, 1>;
|
||||
defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2PS>;
|
||||
defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>;
|
||||
|
||||
let SchedRW = [WriteEMMS] in
|
||||
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
|
||||
[(int_x86_mmx_femms)]>, TB;
|
||||
|
||||
// PREFETCHWT1 is supported we want to use it for everything but T0.
|
||||
def PrefetchWLevel : PatFrag<(ops), (i32 imm), [{
|
||||
return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1();
|
||||
}]>;
|
||||
|
||||
// Use PREFETCHWT1 for NTA, T2, T1.
|
||||
def PrefetchWT1Level : ImmLeaf<i32, [{
|
||||
return Imm < 3;
|
||||
}]>;
|
||||
|
||||
let SchedRW = [WriteLoad] in {
|
||||
let Predicates = [Has3DNow, NoSSEPrefetch] in
|
||||
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
|
||||
"prefetch\t$addr",
|
||||
[(prefetch addr:$addr, imm, imm, (i32 1))]>, TB;
|
||||
|
||||
def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
|
||||
[(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))]>,
|
||||
TB, Requires<[HasPrefetchW]>;
|
||||
|
||||
def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr",
|
||||
[(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))]>,
|
||||
TB, Requires<[HasPREFETCHWT1]>;
|
||||
}
|
||||
|
||||
// "3DNowA" instructions
|
||||
defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", WriteCvtPS2I, "a">;
|
||||
defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", WriteCvtI2PS, "a">;
|
||||
defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", WriteFAdd, 0, "a">;
|
||||
defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", WriteFAdd, 0, "a">;
|
||||
defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", SchedWriteShuffle.MMX, "a">;
|
||||
11968
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrAVX512.td
vendored
Normal file
11968
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrAVX512.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1338
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrArithmetic.td
vendored
Normal file
1338
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrArithmetic.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
116
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrCMovSetCC.td
vendored
Normal file
116
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrCMovSetCC.td
vendored
Normal file
@@ -0,0 +1,116 @@
|
||||
//===-- X86InstrCMovSetCC.td - Conditional Move and SetCC --*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the X86 conditional move and set on condition
|
||||
// instructions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
// CMOV instructions.
|
||||
multiclass CMOV<bits<8> opc, string Mnemonic, X86FoldableSchedWrite Sched,
|
||||
PatLeaf CondNode> {
|
||||
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
|
||||
isCommutable = 1, SchedRW = [Sched] in {
|
||||
def NAME#16rr
|
||||
: I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
|
||||
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set GR16:$dst,
|
||||
(X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,
|
||||
TB, OpSize16;
|
||||
def NAME#32rr
|
||||
: I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
|
||||
!strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set GR32:$dst,
|
||||
(X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>,
|
||||
TB, OpSize32;
|
||||
def NAME#64rr
|
||||
:RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
|
||||
!strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set GR64:$dst,
|
||||
(X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
|
||||
}
|
||||
|
||||
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
|
||||
SchedRW = [Sched.Folded, ReadAfterLd] in {
|
||||
def NAME#16rm
|
||||
: I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
|
||||
!strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
|
||||
CondNode, EFLAGS))]>, TB, OpSize16;
|
||||
def NAME#32rm
|
||||
: I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
|
||||
!strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
|
||||
CondNode, EFLAGS))]>, TB, OpSize32;
|
||||
def NAME#64rm
|
||||
:RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
|
||||
!strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
|
||||
CondNode, EFLAGS))]>, TB;
|
||||
} // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
|
||||
} // end multiclass
|
||||
|
||||
|
||||
// Conditional Moves.
|
||||
defm CMOVO : CMOV<0x40, "cmovo" , WriteCMOV, X86_COND_O>;
|
||||
defm CMOVNO : CMOV<0x41, "cmovno", WriteCMOV, X86_COND_NO>;
|
||||
defm CMOVB : CMOV<0x42, "cmovb" , WriteCMOV, X86_COND_B>;
|
||||
defm CMOVAE : CMOV<0x43, "cmovae", WriteCMOV, X86_COND_AE>;
|
||||
defm CMOVE : CMOV<0x44, "cmove" , WriteCMOV, X86_COND_E>;
|
||||
defm CMOVNE : CMOV<0x45, "cmovne", WriteCMOV, X86_COND_NE>;
|
||||
defm CMOVBE : CMOV<0x46, "cmovbe", WriteCMOV2, X86_COND_BE>;
|
||||
defm CMOVA : CMOV<0x47, "cmova" , WriteCMOV2, X86_COND_A>;
|
||||
defm CMOVS : CMOV<0x48, "cmovs" , WriteCMOV, X86_COND_S>;
|
||||
defm CMOVNS : CMOV<0x49, "cmovns", WriteCMOV, X86_COND_NS>;
|
||||
defm CMOVP : CMOV<0x4A, "cmovp" , WriteCMOV, X86_COND_P>;
|
||||
defm CMOVNP : CMOV<0x4B, "cmovnp", WriteCMOV, X86_COND_NP>;
|
||||
defm CMOVL : CMOV<0x4C, "cmovl" , WriteCMOV, X86_COND_L>;
|
||||
defm CMOVGE : CMOV<0x4D, "cmovge", WriteCMOV, X86_COND_GE>;
|
||||
defm CMOVLE : CMOV<0x4E, "cmovle", WriteCMOV, X86_COND_LE>;
|
||||
defm CMOVG : CMOV<0x4F, "cmovg" , WriteCMOV, X86_COND_G>;
|
||||
|
||||
|
||||
// SetCC instructions.
|
||||
multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
|
||||
let Uses = [EFLAGS] in {
|
||||
def r : I<opc, MRMXr, (outs GR8:$dst), (ins),
|
||||
!strconcat(Mnemonic, "\t$dst"),
|
||||
[(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>,
|
||||
TB, Sched<[WriteSETCC]>;
|
||||
def m : I<opc, MRMXm, (outs), (ins i8mem:$dst),
|
||||
!strconcat(Mnemonic, "\t$dst"),
|
||||
[(store (X86setcc OpNode, EFLAGS), addr:$dst)]>,
|
||||
TB, Sched<[WriteSETCCStore]>;
|
||||
} // Uses = [EFLAGS]
|
||||
}
|
||||
|
||||
defm SETO : SETCC<0x90, "seto", X86_COND_O>; // is overflow bit set
|
||||
defm SETNO : SETCC<0x91, "setno", X86_COND_NO>; // is overflow bit not set
|
||||
defm SETB : SETCC<0x92, "setb", X86_COND_B>; // unsigned less than
|
||||
defm SETAE : SETCC<0x93, "setae", X86_COND_AE>; // unsigned greater or equal
|
||||
defm SETE : SETCC<0x94, "sete", X86_COND_E>; // equal to
|
||||
defm SETNE : SETCC<0x95, "setne", X86_COND_NE>; // not equal to
|
||||
defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>; // unsigned less than or equal
|
||||
defm SETA : SETCC<0x97, "seta", X86_COND_A>; // unsigned greater than
|
||||
defm SETS : SETCC<0x98, "sets", X86_COND_S>; // is signed bit set
|
||||
defm SETNS : SETCC<0x99, "setns", X86_COND_NS>; // is not signed
|
||||
defm SETP : SETCC<0x9A, "setp", X86_COND_P>; // is parity bit set
|
||||
defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>; // is parity bit not set
|
||||
defm SETL : SETCC<0x9C, "setl", X86_COND_L>; // signed less than
|
||||
defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal
|
||||
defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal
|
||||
defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than
|
||||
|
||||
// SALC is an undocumented instruction. Information for this instruction can be found
|
||||
// here http://www.rcollins.org/secrets/opcodes/SALC.html
|
||||
// Set AL if carry.
|
||||
let Uses = [EFLAGS], Defs = [AL], SchedRW = [WriteALU] in {
|
||||
def SALC : I<0xD6, RawFrm, (outs), (ins), "salc", []>, Requires<[Not64BitMode]>;
|
||||
}
|
||||
2103
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrCompiler.td
vendored
Normal file
2103
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrCompiler.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
413
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrControl.td
vendored
Normal file
413
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrControl.td
vendored
Normal file
@@ -0,0 +1,413 @@
|
||||
//===-- X86InstrControl.td - Control Flow Instructions -----*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the X86 jump, return, call, and related instructions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Control Flow Instructions.
|
||||
//
|
||||
|
||||
// Return instructions.
|
||||
//
|
||||
// The X86retflag return instructions are variadic because we may add ST0 and
|
||||
// ST1 arguments when returning values on the x87 stack.
|
||||
let isTerminator = 1, isReturn = 1, isBarrier = 1,
|
||||
hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
|
||||
def RETL : I <0xC3, RawFrm, (outs), (ins variable_ops),
|
||||
"ret{l}", []>, OpSize32, Requires<[Not64BitMode]>;
|
||||
def RETQ : I <0xC3, RawFrm, (outs), (ins variable_ops),
|
||||
"ret{q}", []>, OpSize32, Requires<[In64BitMode]>;
|
||||
def RETW : I <0xC3, RawFrm, (outs), (ins),
|
||||
"ret{w}", []>, OpSize16;
|
||||
def RETIL : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
|
||||
"ret{l}\t$amt", []>, OpSize32, Requires<[Not64BitMode]>;
|
||||
def RETIQ : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
|
||||
"ret{q}\t$amt", []>, OpSize32, Requires<[In64BitMode]>;
|
||||
def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
|
||||
"ret{w}\t$amt", []>, OpSize16;
|
||||
def LRETL : I <0xCB, RawFrm, (outs), (ins),
|
||||
"{l}ret{l|f}", []>, OpSize32;
|
||||
def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
|
||||
"{l}ret{|f}q", []>, Requires<[In64BitMode]>;
|
||||
def LRETW : I <0xCB, RawFrm, (outs), (ins),
|
||||
"{l}ret{w|f}", []>, OpSize16;
|
||||
def LRETIL : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
|
||||
"{l}ret{l|f}\t$amt", []>, OpSize32;
|
||||
def LRETIQ : RIi16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
|
||||
"{l}ret{|f}q\t$amt", []>, Requires<[In64BitMode]>;
|
||||
def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
|
||||
"{l}ret{w|f}\t$amt", []>, OpSize16;
|
||||
|
||||
// The machine return from interrupt instruction, but sometimes we need to
|
||||
// perform a post-epilogue stack adjustment. Codegen emits the pseudo form
|
||||
// which expands to include an SP adjustment if necessary.
|
||||
def IRET16 : I <0xcf, RawFrm, (outs), (ins), "iret{w}", []>,
|
||||
OpSize16;
|
||||
def IRET32 : I <0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>, OpSize32;
|
||||
def IRET64 : RI <0xcf, RawFrm, (outs), (ins), "iretq", []>, Requires<[In64BitMode]>;
|
||||
// let isCodeGenOnly = 1 in
|
||||
// def IRET : PseudoI<(outs), (ins i32imm:$adj), [(X86iret timm:$adj)]>;
|
||||
// def RET : PseudoI<(outs), (ins i32imm:$adj, variable_ops), [(X86retflag timm:$adj)]>;
|
||||
}
|
||||
|
||||
// Unconditional branches.
|
||||
let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
|
||||
def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
|
||||
"jmp\t$dst", [(br bb:$dst)]>;
|
||||
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
|
||||
def JMP_2 : Ii16PCRel<0xE9, RawFrm, (outs), (ins brtarget16:$dst),
|
||||
"jmp\t$dst", []>, OpSize16;
|
||||
def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget32:$dst),
|
||||
"jmp\t$dst", []>, OpSize32;
|
||||
}
|
||||
}
|
||||
|
||||
// Conditional Branches.
|
||||
let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
|
||||
multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
|
||||
def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm,
|
||||
[(X86brcond bb:$dst, Cond, EFLAGS)]>;
|
||||
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
|
||||
def _2 : Ii16PCRel<opc4, RawFrm, (outs), (ins brtarget16:$dst), asm,
|
||||
[]>, OpSize16, TB;
|
||||
def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget32:$dst), asm,
|
||||
[]>, TB, OpSize32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defm JO : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
|
||||
defm JNO : ICBr<0x71, 0x81, "jno\t$dst", X86_COND_NO>;
|
||||
defm JB : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
|
||||
defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
|
||||
defm JE : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
|
||||
defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
|
||||
defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
|
||||
defm JA : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
|
||||
defm JS : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
|
||||
defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
|
||||
defm JP : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
|
||||
defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
|
||||
defm JL : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
|
||||
defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
|
||||
defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
|
||||
defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
|
||||
|
||||
// jcx/jecx/jrcx instructions.
|
||||
let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in {
|
||||
// These are the 32-bit versions of this instruction for the asmparser. In
|
||||
// 32-bit mode, the address size prefix is jcxz and the unprefixed version is
|
||||
// jecxz.
|
||||
let Uses = [CX] in
|
||||
def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
|
||||
"jcxz\t$dst", []>, AdSize16, Requires<[Not64BitMode]>;
|
||||
let Uses = [ECX] in
|
||||
def JECXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
|
||||
"jecxz\t$dst", []>, AdSize32;
|
||||
|
||||
let Uses = [RCX] in
|
||||
def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
|
||||
"jrcxz\t$dst", []>, AdSize64, Requires<[In64BitMode]>;
|
||||
}
|
||||
|
||||
// Indirect branches
|
||||
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
|
||||
def JMP16r : I<0xFF, MRM4r, (outs), (ins GR16:$dst), "jmp{w}\t{*}$dst",
|
||||
[(brind GR16:$dst)]>, Requires<[Not64BitMode]>,
|
||||
OpSize16, Sched<[WriteJump]>;
|
||||
def JMP16m : I<0xFF, MRM4m, (outs), (ins i16mem:$dst), "jmp{w}\t{*}$dst",
|
||||
[(brind (loadi16 addr:$dst))]>, Requires<[Not64BitMode]>,
|
||||
OpSize16, Sched<[WriteJumpLd]>;
|
||||
|
||||
def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
|
||||
[(brind GR32:$dst)]>, Requires<[Not64BitMode]>,
|
||||
OpSize32, Sched<[WriteJump]>;
|
||||
def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
|
||||
[(brind (loadi32 addr:$dst))]>, Requires<[Not64BitMode]>,
|
||||
OpSize32, Sched<[WriteJumpLd]>;
|
||||
|
||||
def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
|
||||
[(brind GR64:$dst)]>, Requires<[In64BitMode]>,
|
||||
Sched<[WriteJump]>;
|
||||
def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
|
||||
[(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>,
|
||||
Sched<[WriteJumpLd]>;
|
||||
|
||||
// Non-tracking jumps for IBT, use with caution.
|
||||
let isCodeGenOnly = 1 in {
|
||||
def JMP16r_NT : I<0xFF, MRM4r, (outs), (ins GR16 : $dst), "jmp{w}\t{*}$dst",
|
||||
[(X86NoTrackBrind GR16 : $dst)]>, Requires<[Not64BitMode]>,
|
||||
OpSize16, Sched<[WriteJump]>, NOTRACK;
|
||||
|
||||
def JMP16m_NT : I<0xFF, MRM4m, (outs), (ins i16mem : $dst), "jmp{w}\t{*}$dst",
|
||||
[(X86NoTrackBrind (loadi16 addr : $dst))]>,
|
||||
Requires<[Not64BitMode]>, OpSize16, Sched<[WriteJumpLd]>,
|
||||
NOTRACK;
|
||||
|
||||
def JMP32r_NT : I<0xFF, MRM4r, (outs), (ins GR32 : $dst), "jmp{l}\t{*}$dst",
|
||||
[(X86NoTrackBrind GR32 : $dst)]>, Requires<[Not64BitMode]>,
|
||||
OpSize32, Sched<[WriteJump]>, NOTRACK;
|
||||
def JMP32m_NT : I<0xFF, MRM4m, (outs), (ins i32mem : $dst), "jmp{l}\t{*}$dst",
|
||||
[(X86NoTrackBrind (loadi32 addr : $dst))]>,
|
||||
Requires<[Not64BitMode]>, OpSize32, Sched<[WriteJumpLd]>,
|
||||
NOTRACK;
|
||||
|
||||
def JMP64r_NT : I<0xFF, MRM4r, (outs), (ins GR64 : $dst), "jmp{q}\t{*}$dst",
|
||||
[(X86NoTrackBrind GR64 : $dst)]>, Requires<[In64BitMode]>,
|
||||
Sched<[WriteJump]>, NOTRACK;
|
||||
def JMP64m_NT : I<0xFF, MRM4m, (outs), (ins i64mem : $dst), "jmp{q}\t{*}$dst",
|
||||
[(X86NoTrackBrind(loadi64 addr : $dst))]>,
|
||||
Requires<[In64BitMode]>, Sched<[WriteJumpLd]>, NOTRACK;
|
||||
}
|
||||
|
||||
let Predicates = [Not64BitMode], AsmVariantName = "att" in {
|
||||
def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
|
||||
(ins i16imm:$off, i16imm:$seg),
|
||||
"ljmp{w}\t$seg : $off", []>,
|
||||
OpSize16, Sched<[WriteJump]>;
|
||||
def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
|
||||
(ins i32imm:$off, i16imm:$seg),
|
||||
"ljmp{l}\t$seg : $off", []>,
|
||||
OpSize32, Sched<[WriteJump]>;
|
||||
}
|
||||
def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
|
||||
"ljmp{q}\t{*}$dst", []>, Sched<[WriteJump]>, Requires<[In64BitMode]>;
|
||||
|
||||
let AsmVariantName = "att" in
|
||||
def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
|
||||
"ljmp{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
|
||||
def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
|
||||
"{l}jmp{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
|
||||
}
|
||||
|
||||
// Loop instructions
|
||||
let SchedRW = [WriteJump] in {
|
||||
def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
|
||||
def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
|
||||
def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Call Instructions...
|
||||
//
|
||||
let isCall = 1 in
|
||||
// All calls clobber the non-callee saved registers. ESP is marked as
|
||||
// a use to prevent stack-pointer assignments that appear immediately
|
||||
// before calls from potentially appearing dead. Uses for argument
|
||||
// registers are added manually.
|
||||
let Uses = [ESP, SSP] in {
|
||||
def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
|
||||
(outs), (ins i32imm_pcrel:$dst),
|
||||
"call{l}\t$dst", []>, OpSize32,
|
||||
Requires<[Not64BitMode]>, Sched<[WriteJump]>;
|
||||
let hasSideEffects = 0 in
|
||||
def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
|
||||
(outs), (ins i16imm_pcrel:$dst),
|
||||
"call{w}\t$dst", []>, OpSize16,
|
||||
Sched<[WriteJump]>;
|
||||
def CALL16r : I<0xFF, MRM2r, (outs), (ins GR16:$dst),
|
||||
"call{w}\t{*}$dst", [(X86call GR16:$dst)]>,
|
||||
OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>;
|
||||
def CALL16m : I<0xFF, MRM2m, (outs), (ins i16mem:$dst),
|
||||
"call{w}\t{*}$dst", [(X86call (loadi16 addr:$dst))]>,
|
||||
OpSize16, Requires<[Not64BitMode,FavorMemIndirectCall]>,
|
||||
Sched<[WriteJumpLd]>;
|
||||
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
|
||||
"call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
|
||||
Requires<[Not64BitMode,NotUseRetpoline]>, Sched<[WriteJump]>;
|
||||
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
|
||||
"call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
|
||||
OpSize32,
|
||||
Requires<[Not64BitMode,FavorMemIndirectCall,NotUseRetpoline]>,
|
||||
Sched<[WriteJumpLd]>;
|
||||
|
||||
// Non-tracking calls for IBT, use with caution.
|
||||
let isCodeGenOnly = 1 in {
|
||||
def CALL16r_NT : I<0xFF, MRM2r, (outs), (ins GR16 : $dst),
|
||||
"call{w}\t{*}$dst",[(X86NoTrackCall GR16 : $dst)]>,
|
||||
OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>, NOTRACK;
|
||||
def CALL16m_NT : I<0xFF, MRM2m, (outs), (ins i16mem : $dst),
|
||||
"call{w}\t{*}$dst",[(X86NoTrackCall(loadi16 addr : $dst))]>,
|
||||
OpSize16, Requires<[Not64BitMode,FavorMemIndirectCall]>,
|
||||
Sched<[WriteJumpLd]>, NOTRACK;
|
||||
def CALL32r_NT : I<0xFF, MRM2r, (outs), (ins GR32 : $dst),
|
||||
"call{l}\t{*}$dst",[(X86NoTrackCall GR32 : $dst)]>,
|
||||
OpSize32, Requires<[Not64BitMode]>, Sched<[WriteJump]>, NOTRACK;
|
||||
def CALL32m_NT : I<0xFF, MRM2m, (outs), (ins i32mem : $dst),
|
||||
"call{l}\t{*}$dst",[(X86NoTrackCall(loadi32 addr : $dst))]>,
|
||||
OpSize32, Requires<[Not64BitMode,FavorMemIndirectCall]>,
|
||||
Sched<[WriteJumpLd]>, NOTRACK;
|
||||
}
|
||||
|
||||
let Predicates = [Not64BitMode], AsmVariantName = "att" in {
|
||||
def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
|
||||
(ins i16imm:$off, i16imm:$seg),
|
||||
"lcall{w}\t$seg : $off", []>,
|
||||
OpSize16, Sched<[WriteJump]>;
|
||||
def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
|
||||
(ins i32imm:$off, i16imm:$seg),
|
||||
"lcall{l}\t$seg : $off", []>,
|
||||
OpSize32, Sched<[WriteJump]>;
|
||||
}
|
||||
|
||||
def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
|
||||
"lcall{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
|
||||
def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
|
||||
"{l}call{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
// Tail call stuff.
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
|
||||
isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
|
||||
let Uses = [ESP, SSP] in {
|
||||
def TCRETURNdi : PseudoI<(outs),
|
||||
(ins i32imm_pcrel:$dst, i32imm:$offset), []>, NotMemoryFoldable;
|
||||
def TCRETURNri : PseudoI<(outs),
|
||||
(ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable;
|
||||
let mayLoad = 1 in
|
||||
def TCRETURNmi : PseudoI<(outs),
|
||||
(ins i32mem_TC:$dst, i32imm:$offset), []>;
|
||||
|
||||
// FIXME: The should be pseudo instructions that are lowered when going to
|
||||
// mcinst.
|
||||
def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
|
||||
(ins i32imm_pcrel:$dst), "jmp\t$dst", []>;
|
||||
|
||||
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
|
||||
"", []>; // FIXME: Remove encoding when JIT is dead.
|
||||
let mayLoad = 1 in
|
||||
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst),
|
||||
"jmp{l}\t{*}$dst", []>;
|
||||
}
|
||||
|
||||
// Conditional tail calls are similar to the above, but they are branches
|
||||
// rather than barriers, and they use EFLAGS.
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
|
||||
isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
|
||||
let Uses = [ESP, EFLAGS, SSP] in {
|
||||
def TCRETURNdicc : PseudoI<(outs),
|
||||
(ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
|
||||
|
||||
// This gets substituted to a conditional jump instruction in MC lowering.
|
||||
def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
|
||||
(ins i32imm_pcrel:$dst, i32imm:$cond), "", []>;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Call Instructions...
|
||||
//
|
||||
|
||||
// RSP is marked as a use to prevent stack-pointer assignments that appear
|
||||
// immediately before calls from potentially appearing dead. Uses for argument
|
||||
// registers are added manually.
|
||||
let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
|
||||
// NOTE: this pattern doesn't match "X86call imm", because we do not know
|
||||
// that the offset between an arbitrary immediate and the call will fit in
|
||||
// the 32-bit pcrel field that we have.
|
||||
def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
|
||||
(outs), (ins i64i32imm_pcrel:$dst),
|
||||
"call{q}\t$dst", []>, OpSize32,
|
||||
Requires<[In64BitMode]>;
|
||||
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
|
||||
"call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
|
||||
Requires<[In64BitMode,NotUseRetpoline]>;
|
||||
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
|
||||
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
|
||||
Requires<[In64BitMode,FavorMemIndirectCall,
|
||||
NotUseRetpoline]>;
|
||||
|
||||
// Non-tracking calls for IBT, use with caution.
|
||||
let isCodeGenOnly = 1 in {
|
||||
def CALL64r_NT : I<0xFF, MRM2r, (outs), (ins GR64 : $dst),
|
||||
"call{q}\t{*}$dst",[(X86NoTrackCall GR64 : $dst)]>,
|
||||
Requires<[In64BitMode]>, NOTRACK;
|
||||
def CALL64m_NT : I<0xFF, MRM2m, (outs), (ins i64mem : $dst),
|
||||
"call{q}\t{*}$dst",
|
||||
[(X86NoTrackCall(loadi64 addr : $dst))]>,
|
||||
Requires<[In64BitMode,FavorMemIndirectCall]>, NOTRACK;
|
||||
}
|
||||
|
||||
def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
|
||||
"lcall{q}\t{*}$dst", []>;
|
||||
}
|
||||
|
||||
/*
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
|
||||
isCodeGenOnly = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
|
||||
def TCRETURNdi64 : PseudoI<(outs),
|
||||
(ins i64i32imm_pcrel:$dst, i32imm:$offset),
|
||||
[]>;
|
||||
def TCRETURNri64 : PseudoI<(outs),
|
||||
(ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable;
|
||||
let mayLoad = 1 in
|
||||
def TCRETURNmi64 : PseudoI<(outs),
|
||||
(ins i64mem_TC:$dst, i32imm:$offset), []>, NotMemoryFoldable;
|
||||
|
||||
def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst),
|
||||
"jmp\t$dst", []>;
|
||||
|
||||
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
|
||||
"jmp{q}\t{*}$dst", []>;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
|
||||
"jmp{q}\t{*}$dst", []>;
|
||||
|
||||
// Win64 wants indirect jumps leaving the function to have a REX_W prefix.
|
||||
let hasREX_WPrefix = 1 in {
|
||||
def TAILJMPr64_REX : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
|
||||
"rex64 jmp{q}\t{*}$dst", []>;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def TAILJMPm64_REX : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
|
||||
"rex64 jmp{q}\t{*}$dst", []>;
|
||||
}
|
||||
}
|
||||
|
||||
let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
|
||||
Uses = [RSP, SSP],
|
||||
usesCustomInserter = 1,
|
||||
SchedRW = [WriteJump] in {
|
||||
def RETPOLINE_CALL32 :
|
||||
PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
|
||||
Requires<[Not64BitMode,UseRetpoline]>;
|
||||
|
||||
def RETPOLINE_CALL64 :
|
||||
PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
|
||||
Requires<[In64BitMode,UseRetpoline]>;
|
||||
|
||||
// Retpoline variant of indirect tail calls.
|
||||
let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
|
||||
def RETPOLINE_TCRETURN64 :
|
||||
PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>;
|
||||
def RETPOLINE_TCRETURN32 :
|
||||
PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;
|
||||
}
|
||||
}
|
||||
|
||||
// Conditional tail calls are similar to the above, but they are branches
|
||||
// rather than barriers, and they use EFLAGS.
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
|
||||
isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
|
||||
let Uses = [RSP, EFLAGS, SSP] in {
|
||||
def TCRETURNdi64cc : PseudoI<(outs),
|
||||
(ins i64i32imm_pcrel:$dst, i32imm:$offset,
|
||||
i32imm:$cond), []>;
|
||||
|
||||
// This gets substituted to a conditional jump instruction in MC lowering.
|
||||
def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
|
||||
(ins i64i32imm_pcrel:$dst, i32imm:$cond), "", []>;
|
||||
}
|
||||
*/
|
||||
204
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrExtension.td
vendored
Normal file
204
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrExtension.td
vendored
Normal file
@@ -0,0 +1,204 @@
|
||||
//===-- X86InstrExtension.td - Sign and Zero Extensions ----*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the sign and zero extension operations.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let hasSideEffects = 0 in {
|
||||
let Defs = [AX], Uses = [AL] in // AX = signext(AL)
|
||||
def CBW : I<0x98, RawFrm, (outs), (ins),
|
||||
"{cbtw|cbw}", []>, OpSize16, Sched<[WriteALU]>;
|
||||
let Defs = [EAX], Uses = [AX] in // EAX = signext(AX)
|
||||
def CWDE : I<0x98, RawFrm, (outs), (ins),
|
||||
"{cwtl|cwde}", []>, OpSize32, Sched<[WriteALU]>;
|
||||
|
||||
let Defs = [AX,DX], Uses = [AX] in // DX:AX = signext(AX)
|
||||
def CWD : I<0x99, RawFrm, (outs), (ins),
|
||||
"{cwtd|cwd}", []>, OpSize16, Sched<[WriteALU]>;
|
||||
let Defs = [EAX,EDX], Uses = [EAX] in // EDX:EAX = signext(EAX)
|
||||
def CDQ : I<0x99, RawFrm, (outs), (ins),
|
||||
"{cltd|cdq}", []>, OpSize32, Sched<[WriteALU]>;
|
||||
|
||||
|
||||
let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX)
|
||||
def CDQE : RI<0x98, RawFrm, (outs), (ins),
|
||||
"{cltq|cdqe}", []>, Sched<[WriteALU]>;
|
||||
|
||||
let Defs = [RAX,RDX], Uses = [RAX] in // RDX:RAX = signext(RAX)
|
||||
def CQO : RI<0x99, RawFrm, (outs), (ins),
|
||||
"{cqto|cqo}", []>, Sched<[WriteALU]>;
|
||||
}
|
||||
|
||||
// Sign/Zero extenders
|
||||
let hasSideEffects = 0 in {
|
||||
def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
|
||||
"movs{bw|x}\t{$src, $dst|$dst, $src}", []>,
|
||||
TB, OpSize16, Sched<[WriteALU]>;
|
||||
let mayLoad = 1 in
|
||||
def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
|
||||
"movs{bw|x}\t{$src, $dst|$dst, $src}", []>,
|
||||
TB, OpSize16, Sched<[WriteALULd]>;
|
||||
} // hasSideEffects = 0
|
||||
def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
|
||||
"movs{bl|x}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (sext GR8:$src))]>, TB,
|
||||
OpSize32, Sched<[WriteALU]>;
|
||||
def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
|
||||
"movs{bl|x}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB,
|
||||
OpSize32, Sched<[WriteALULd]>;
|
||||
def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
|
||||
"movs{wl|x}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (sext GR16:$src))]>, TB,
|
||||
OpSize32, Sched<[WriteALU]>;
|
||||
def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
|
||||
"movs{wl|x}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (sextloadi32i16 addr:$src))]>,
|
||||
OpSize32, TB, Sched<[WriteALULd]>;
|
||||
|
||||
let hasSideEffects = 0 in {
|
||||
def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
|
||||
"movz{bw|x}\t{$src, $dst|$dst, $src}", []>,
|
||||
TB, OpSize16, Sched<[WriteALU]>;
|
||||
let mayLoad = 1 in
|
||||
def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
|
||||
"movz{bw|x}\t{$src, $dst|$dst, $src}", []>,
|
||||
TB, OpSize16, Sched<[WriteALULd]>;
|
||||
} // hasSideEffects = 0
|
||||
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
|
||||
"movz{bl|x}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (zext GR8:$src))]>, TB,
|
||||
OpSize32, Sched<[WriteALU]>;
|
||||
def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
|
||||
"movz{bl|x}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB,
|
||||
OpSize32, Sched<[WriteALULd]>;
|
||||
def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
|
||||
"movz{wl|x}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (zext GR16:$src))]>, TB,
|
||||
OpSize32, Sched<[WriteALU]>;
|
||||
def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
|
||||
"movz{wl|x}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (zextloadi32i16 addr:$src))]>,
|
||||
TB, OpSize32, Sched<[WriteALULd]>;
|
||||
|
||||
// These instructions exist as a consequence of operand size prefix having
|
||||
// control of the destination size, but not the input size. Only support them
|
||||
// for the disassembler.
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
|
||||
def MOVSX16rr16: I<0xBF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
|
||||
"movs{ww|x}\t{$src, $dst|$dst, $src}",
|
||||
[]>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
|
||||
def MOVZX16rr16: I<0xB7, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
|
||||
"movz{ww|x}\t{$src, $dst|$dst, $src}",
|
||||
[]>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
|
||||
let mayLoad = 1 in {
|
||||
def MOVSX16rm16: I<0xBF, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
|
||||
"movs{ww|x}\t{$src, $dst|$dst, $src}",
|
||||
[]>, OpSize16, TB, Sched<[WriteALULd]>, NotMemoryFoldable;
|
||||
def MOVZX16rm16: I<0xB7, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
|
||||
"movz{ww|x}\t{$src, $dst|$dst, $src}",
|
||||
[]>, TB, OpSize16, Sched<[WriteALULd]>, NotMemoryFoldable;
|
||||
} // mayLoad = 1
|
||||
} // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0
|
||||
|
||||
// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
|
||||
// except that they use GR32_NOREX for the output operand register class
|
||||
// instead of GR32. This allows them to operate on h registers on x86-64.
|
||||
let hasSideEffects = 0, isCodeGenOnly = 1 in {
|
||||
def MOVZX32rr8_NOREX : I<0xB6, MRMSrcReg,
|
||||
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
|
||||
"movz{bl|x}\t{$src, $dst|$dst, $src}",
|
||||
[]>, TB, OpSize32, Sched<[WriteALU]>;
|
||||
let mayLoad = 1 in
|
||||
def MOVZX32rm8_NOREX : I<0xB6, MRMSrcMem,
|
||||
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
|
||||
"movz{bl|x}\t{$src, $dst|$dst, $src}",
|
||||
[]>, TB, OpSize32, Sched<[WriteALULd]>;
|
||||
|
||||
def MOVSX32rr8_NOREX : I<0xBE, MRMSrcReg,
|
||||
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
|
||||
"movs{bl|x}\t{$src, $dst|$dst, $src}",
|
||||
[]>, TB, OpSize32, Sched<[WriteALU]>;
|
||||
let mayLoad = 1 in
|
||||
def MOVSX32rm8_NOREX : I<0xBE, MRMSrcMem,
|
||||
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
|
||||
"movs{bl|x}\t{$src, $dst|$dst, $src}",
|
||||
[]>, TB, OpSize32, Sched<[WriteALULd]>;
|
||||
}
|
||||
|
||||
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
|
||||
// operand, which makes it a rare instruction with an 8-bit register
|
||||
// operand that can never access an h register. If support for h registers
|
||||
// were generalized, this would require a special register class.
|
||||
def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
|
||||
"movs{bq|x}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (sext GR8:$src))]>, TB,
|
||||
Sched<[WriteALU]>;
|
||||
def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
|
||||
"movs{bq|x}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (sextloadi64i8 addr:$src))]>,
|
||||
TB, Sched<[WriteALULd]>;
|
||||
def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
|
||||
"movs{wq|x}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (sext GR16:$src))]>, TB,
|
||||
Sched<[WriteALU]>;
|
||||
def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
|
||||
"movs{wq|x}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (sextloadi64i16 addr:$src))]>,
|
||||
TB, Sched<[WriteALULd]>;
|
||||
def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
|
||||
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (sext GR32:$src))]>,
|
||||
Sched<[WriteALU]>, Requires<[In64BitMode]>;
|
||||
def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
|
||||
"movs{lq|xd}\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (sextloadi64i32 addr:$src))]>,
|
||||
Sched<[WriteALULd]>, Requires<[In64BitMode]>;
|
||||
|
||||
// movzbq and movzwq encodings for the disassembler
|
||||
let hasSideEffects = 0 in {
|
||||
def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
|
||||
"movz{bq|x}\t{$src, $dst|$dst, $src}", []>,
|
||||
TB, Sched<[WriteALU]>;
|
||||
let mayLoad = 1 in
|
||||
def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
|
||||
"movz{bq|x}\t{$src, $dst|$dst, $src}", []>,
|
||||
TB, Sched<[WriteALULd]>;
|
||||
def MOVZX64rr16 : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
|
||||
"movz{wq|x}\t{$src, $dst|$dst, $src}", []>,
|
||||
TB, Sched<[WriteALU]>;
|
||||
let mayLoad = 1 in
|
||||
def MOVZX64rm16 : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
|
||||
"movz{wq|x}\t{$src, $dst|$dst, $src}", []>,
|
||||
TB, Sched<[WriteALULd]>;
|
||||
}
|
||||
|
||||
// 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a
|
||||
// 32-bit register.
|
||||
def : Pat<(i64 (zext GR8:$src)),
|
||||
(SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8:$src), sub_32bit)>;
|
||||
def : Pat<(zextloadi64i8 addr:$src),
|
||||
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
|
||||
|
||||
def : Pat<(i64 (zext GR16:$src)),
|
||||
(SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16:$src), sub_32bit)>;
|
||||
def : Pat<(zextloadi64i16 addr:$src),
|
||||
(SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
|
||||
|
||||
// The preferred way to do 32-bit-to-64-bit zero extension on x86-64 is to use a
|
||||
// SUBREG_TO_REG to utilize implicit zero-extension, however this isn't possible
|
||||
// when the 32-bit value is defined by a truncate or is copied from something
|
||||
// where the high bits aren't necessarily all zero. In such cases, we fall back
|
||||
// to these explicit zext instructions.
|
||||
def : Pat<(i64 (zext GR32:$src)),
|
||||
(SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src), sub_32bit)>;
|
||||
def : Pat<(i64 (zextloadi64i32 addr:$src)),
|
||||
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
|
||||
636
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFMA.td
vendored
Normal file
636
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFMA.td
vendored
Normal file
@@ -0,0 +1,636 @@
|
||||
//===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes FMA (Fused Multiply-Add) instructions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FMA3 - Intel 3 operand Fused Multiply-Add instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// For all FMA opcodes declared in fma3p_rm_* and fma3s_rm_* multiclasses
|
||||
// defined below, both the register and memory variants are commutable.
|
||||
// For the register form the commutable operands are 1, 2 and 3.
|
||||
// For the memory variant the folded operand must be in 3. Thus,
|
||||
// in that case, only the operands 1 and 2 can be swapped.
|
||||
// Commuting some of operands may require the opcode change.
|
||||
// FMA*213*:
|
||||
// operands 1 and 2 (memory & register forms): *213* --> *213*(no changes);
|
||||
// operands 1 and 3 (register forms only): *213* --> *231*;
|
||||
// operands 2 and 3 (register forms only): *213* --> *132*.
|
||||
// FMA*132*:
|
||||
// operands 1 and 2 (memory & register forms): *132* --> *231*;
|
||||
// operands 1 and 3 (register forms only): *132* --> *132*(no changes);
|
||||
// operands 2 and 3 (register forms only): *132* --> *213*.
|
||||
// FMA*231*:
|
||||
// operands 1 and 2 (memory & register forms): *231* --> *132*;
|
||||
// operands 1 and 3 (register forms only): *231* --> *213*;
|
||||
// operands 2 and 3 (register forms only): *231* --> *231*(no changes).
|
||||
|
||||
multiclass fma3p_rm_213<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
|
||||
SDNode Op, X86FoldableSchedWrite sched> {
|
||||
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set RC:$dst, (VT (Op RC:$src2, RC:$src1, RC:$src3)))]>,
|
||||
Sched<[sched]>;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, x86memop:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set RC:$dst, (VT (Op RC:$src2, RC:$src1,
|
||||
(MemFrag addr:$src3))))]>,
|
||||
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
|
||||
SDNode Op, X86FoldableSchedWrite sched> {
|
||||
let hasSideEffects = 0 in
|
||||
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[]>, Sched<[sched]>;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, x86memop:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3),
|
||||
RC:$src1)))]>,
|
||||
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
|
||||
SDNode Op, X86FoldableSchedWrite sched> {
|
||||
let hasSideEffects = 0 in
|
||||
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[]>, Sched<[sched]>;
|
||||
|
||||
// Pattern is 312 order so that the load is in a different place from the
|
||||
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
|
||||
let mayLoad = 1 in
|
||||
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, x86memop:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1,
|
||||
RC:$src2)))]>,
|
||||
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
|
||||
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
||||
string OpcodeStr, string PackTy, string Suff,
|
||||
PatFrag MemFrag128, PatFrag MemFrag256,
|
||||
SDNode Op, ValueType OpTy128, ValueType OpTy256,
|
||||
X86SchedWriteWidths sched> {
|
||||
defm NAME#213#Suff : fma3p_rm_213<opc213, !strconcat(OpcodeStr, "213", PackTy),
|
||||
VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
|
||||
defm NAME#231#Suff : fma3p_rm_231<opc231, !strconcat(OpcodeStr, "231", PackTy),
|
||||
VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
|
||||
defm NAME#132#Suff : fma3p_rm_132<opc132, !strconcat(OpcodeStr, "132", PackTy),
|
||||
VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
|
||||
|
||||
defm NAME#213#Suff#Y : fma3p_rm_213<opc213, !strconcat(OpcodeStr, "213", PackTy),
|
||||
VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
|
||||
VEX_L;
|
||||
defm NAME#231#Suff#Y : fma3p_rm_231<opc231, !strconcat(OpcodeStr, "231", PackTy),
|
||||
VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
|
||||
VEX_L;
|
||||
defm NAME#132#Suff#Y : fma3p_rm_132<opc132, !strconcat(OpcodeStr, "132", PackTy),
|
||||
VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
|
||||
VEX_L;
|
||||
}
|
||||
|
||||
// Fused Multiply-Add
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS",
|
||||
loadv4f32, loadv8f32, X86Fmadd, v4f32, v8f32,
|
||||
SchedWriteFMA>;
|
||||
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS",
|
||||
loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32,
|
||||
SchedWriteFMA>;
|
||||
defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", "PS",
|
||||
loadv4f32, loadv8f32, X86Fmaddsub, v4f32, v8f32,
|
||||
SchedWriteFMA>;
|
||||
defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", "PS",
|
||||
loadv4f32, loadv8f32, X86Fmsubadd, v4f32, v8f32,
|
||||
SchedWriteFMA>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD",
|
||||
loadv2f64, loadv4f64, X86Fmadd, v2f64,
|
||||
v4f64, SchedWriteFMA>, VEX_W;
|
||||
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD",
|
||||
loadv2f64, loadv4f64, X86Fmsub, v2f64,
|
||||
v4f64, SchedWriteFMA>, VEX_W;
|
||||
defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", "PD",
|
||||
loadv2f64, loadv4f64, X86Fmaddsub,
|
||||
v2f64, v4f64, SchedWriteFMA>, VEX_W;
|
||||
defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", "PD",
|
||||
loadv2f64, loadv4f64, X86Fmsubadd,
|
||||
v2f64, v4f64, SchedWriteFMA>, VEX_W;
|
||||
}
|
||||
|
||||
// Fused Negative Multiply-Add
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", "PS", loadv4f32,
|
||||
loadv8f32, X86Fnmadd, v4f32, v8f32, SchedWriteFMA>;
|
||||
defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", "PS", loadv4f32,
|
||||
loadv8f32, X86Fnmsub, v4f32, v8f32, SchedWriteFMA>;
|
||||
}
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", "PD", loadv2f64,
|
||||
loadv4f64, X86Fnmadd, v2f64, v4f64, SchedWriteFMA>, VEX_W;
|
||||
defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", "PD", loadv2f64,
|
||||
loadv4f64, X86Fnmsub, v2f64, v4f64, SchedWriteFMA>, VEX_W;
|
||||
}
|
||||
|
||||
// All source register operands of FMA opcodes defined in fma3s_rm multiclass
|
||||
// can be commuted. In many cases such commute transformation requires an opcode
|
||||
// adjustment, for example, commuting the operands 1 and 2 in FMA*132 form
|
||||
// would require an opcode change to FMA*231:
|
||||
// FMA*132* reg1, reg2, reg3; // reg1 * reg3 + reg2;
|
||||
// -->
|
||||
// FMA*231* reg2, reg1, reg3; // reg1 * reg3 + reg2;
|
||||
// Please see more detailed comment at the very beginning of the section
|
||||
// defining FMA3 opcodes above.
|
||||
multiclass fma3s_rm_213<bits<8> opc, string OpcodeStr,
|
||||
X86MemOperand x86memop, RegisterClass RC,
|
||||
SDPatternOperator OpNode,
|
||||
X86FoldableSchedWrite sched> {
|
||||
def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set RC:$dst, (OpNode RC:$src2, RC:$src1, RC:$src3))]>,
|
||||
Sched<[sched]>;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, x86memop:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set RC:$dst,
|
||||
(OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>,
|
||||
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
|
||||
X86MemOperand x86memop, RegisterClass RC,
|
||||
SDPatternOperator OpNode, X86FoldableSchedWrite sched> {
|
||||
let hasSideEffects = 0 in
|
||||
def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[]>, Sched<[sched]>;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, x86memop:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set RC:$dst,
|
||||
(OpNode RC:$src2, (load addr:$src3), RC:$src1))]>,
|
||||
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
|
||||
X86MemOperand x86memop, RegisterClass RC,
|
||||
SDPatternOperator OpNode, X86FoldableSchedWrite sched> {
|
||||
let hasSideEffects = 0 in
|
||||
def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[]>, Sched<[sched]>;
|
||||
|
||||
// Pattern is 312 order so that the load is in a different place from the
|
||||
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
|
||||
let mayLoad = 1 in
|
||||
def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, x86memop:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set RC:$dst,
|
||||
(OpNode (load addr:$src3), RC:$src1, RC:$src2))]>,
|
||||
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
|
||||
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
||||
string OpStr, string PackTy, string Suff,
|
||||
SDNode OpNode, RegisterClass RC,
|
||||
X86MemOperand x86memop, X86FoldableSchedWrite sched> {
|
||||
defm NAME#213#Suff : fma3s_rm_213<opc213, !strconcat(OpStr, "213", PackTy),
|
||||
x86memop, RC, OpNode, sched>;
|
||||
defm NAME#231#Suff : fma3s_rm_231<opc231, !strconcat(OpStr, "231", PackTy),
|
||||
x86memop, RC, OpNode, sched>;
|
||||
defm NAME#132#Suff : fma3s_rm_132<opc132, !strconcat(OpStr, "132", PackTy),
|
||||
x86memop, RC, OpNode, sched>;
|
||||
}
|
||||
|
||||
// These FMA*_Int instructions are defined specially for being used when
|
||||
// the scalar FMA intrinsics are lowered to machine instructions, and in that
|
||||
// sense, they are similar to existing ADD*_Int, SUB*_Int, MUL*_Int, etc.
|
||||
// instructions.
|
||||
//
|
||||
// All of the FMA*_Int opcodes are defined as commutable here.
|
||||
// Commuting the 2nd and 3rd source register operands of FMAs is quite trivial
|
||||
// and the corresponding optimizations have been developed.
|
||||
// Commuting the 1st operand of FMA*_Int requires some additional analysis,
|
||||
// the commute optimization is legal only if all users of FMA*_Int use only
|
||||
// the lowest element of the FMA*_Int instruction. Even though such analysis
|
||||
// may be not implemented yet we allow the routines doing the actual commute
|
||||
// transformation to decide if one or another instruction is commutable or not.
|
||||
let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
|
||||
hasSideEffects = 0 in
|
||||
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
|
||||
Operand memopr, RegisterClass RC,
|
||||
X86FoldableSchedWrite sched> {
|
||||
def r_Int : FMA3S_Int<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[]>, Sched<[sched]>;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def m_Int : FMA3S_Int<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, memopr:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[]>, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
// The FMA 213 form is created for lowering of scalar FMA intrinscis
|
||||
// to machine instructions.
|
||||
// The FMA 132 form can trivially be get by commuting the 2nd and 3rd operands
|
||||
// of FMA 213 form.
|
||||
// The FMA 231 form can be get only by commuting the 1st operand of 213 or 132
|
||||
// forms and is possible only after special analysis of all uses of the initial
|
||||
// instruction. Such analysis do not exist yet and thus introducing the 231
|
||||
// form of FMA*_Int instructions is done using an optimistic assumption that
|
||||
// such analysis will be implemented eventually.
|
||||
multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
||||
string OpStr, string PackTy, string Suff,
|
||||
RegisterClass RC, Operand memop,
|
||||
X86FoldableSchedWrite sched> {
|
||||
defm NAME#132#Suff : fma3s_rm_int<opc132, !strconcat(OpStr, "132", PackTy),
|
||||
memop, RC, sched>;
|
||||
defm NAME#213#Suff : fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
|
||||
memop, RC, sched>;
|
||||
defm NAME#231#Suff : fma3s_rm_int<opc231, !strconcat(OpStr, "231", PackTy),
|
||||
memop, RC, sched>;
|
||||
}
|
||||
|
||||
multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
|
||||
string OpStr, SDNode OpNode, X86FoldableSchedWrite sched> {
|
||||
let ExeDomain = SSEPackedSingle in
|
||||
defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", OpNode,
|
||||
FR32, f32mem, sched>,
|
||||
fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", "SS",
|
||||
VR128, ssmem, sched>;
|
||||
|
||||
let ExeDomain = SSEPackedDouble in
|
||||
defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "SD", OpNode,
|
||||
FR64, f64mem, sched>,
|
||||
fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", "SD",
|
||||
VR128, sdmem, sched>, VEX_W;
|
||||
}
|
||||
|
||||
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadd,
|
||||
SchedWriteFMA.Scl>, VEX_LIG;
|
||||
defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsub,
|
||||
SchedWriteFMA.Scl>, VEX_LIG;
|
||||
|
||||
defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadd,
|
||||
SchedWriteFMA.Scl>, VEX_LIG;
|
||||
defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsub,
|
||||
SchedWriteFMA.Scl>, VEX_LIG;
|
||||
|
||||
multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
|
||||
SDNode Move, ValueType VT, ValueType EltVT,
|
||||
RegisterClass RC, PatFrag mem_frag> {
|
||||
let Predicates = [HasFMA, NoAVX512] in {
|
||||
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
|
||||
(Op RC:$src2,
|
||||
(EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
|
||||
RC:$src3))))),
|
||||
(!cast<Instruction>(Prefix#"213"#Suffix#"r_Int")
|
||||
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
|
||||
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
|
||||
|
||||
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
|
||||
(Op RC:$src2, RC:$src3,
|
||||
(EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))),
|
||||
(!cast<Instruction>(Prefix#"231"#Suffix#"r_Int")
|
||||
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
|
||||
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
|
||||
|
||||
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
|
||||
(Op RC:$src2,
|
||||
(EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
|
||||
(mem_frag addr:$src3)))))),
|
||||
(!cast<Instruction>(Prefix#"213"#Suffix#"m_Int")
|
||||
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
|
||||
addr:$src3)>;
|
||||
|
||||
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
|
||||
(Op (EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
|
||||
(mem_frag addr:$src3), RC:$src2))))),
|
||||
(!cast<Instruction>(Prefix#"132"#Suffix#"m_Int")
|
||||
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
|
||||
addr:$src3)>;
|
||||
|
||||
def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
|
||||
(Op RC:$src2, (mem_frag addr:$src3),
|
||||
(EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))),
|
||||
(!cast<Instruction>(Prefix#"231"#Suffix#"m_Int")
|
||||
VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
|
||||
addr:$src3)>;
|
||||
}
|
||||
}
|
||||
|
||||
defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
|
||||
|
||||
defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FMA4 - AMD 4 operand Fused Multiply-Add instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
|
||||
PatFrag mem_frag, X86FoldableSchedWrite sched> {
|
||||
let isCommutable = 1 in
|
||||
def rr : FMA4S<opc, MRMSrcRegOp4, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set RC:$dst,
|
||||
(OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG,
|
||||
Sched<[sched]>;
|
||||
def rm : FMA4S<opc, MRMSrcMemOp4, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, x86memop:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set RC:$dst, (OpNode RC:$src1, RC:$src2,
|
||||
(mem_frag addr:$src3)))]>, VEX_W, VEX_LIG,
|
||||
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
def mr : FMA4S<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set RC:$dst,
|
||||
(OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG,
|
||||
Sched<[sched.Folded, ReadAfterLd,
|
||||
// x86memop:$src2
|
||||
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
|
||||
ReadDefault,
|
||||
// RC:$src3
|
||||
ReadAfterLd]>;
|
||||
// For disassembler
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
|
||||
def rr_REV : FMA4S<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
|
||||
VEX_LIG, FoldGenData<NAME#rr>, Sched<[sched]>;
|
||||
}
|
||||
|
||||
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
|
||||
ValueType VT, X86FoldableSchedWrite sched> {
|
||||
let isCodeGenOnly = 1, hasSideEffects = 0 in {
|
||||
def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, VEX_W, VEX_LIG, Sched<[sched]>;
|
||||
let mayLoad = 1 in
|
||||
def rm_Int : FMA4S_Int<opc, MRMSrcMemOp4, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, memop:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, VEX_W, VEX_LIG,
|
||||
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
let mayLoad = 1 in
|
||||
def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, memop:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>,
|
||||
VEX_LIG, Sched<[sched.Folded, ReadAfterLd,
|
||||
// memop:$src2
|
||||
ReadDefault, ReadDefault, ReadDefault,
|
||||
ReadDefault, ReadDefault,
|
||||
// VR128::$src3
|
||||
ReadAfterLd]>;
|
||||
def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, VEX_LIG, FoldGenData<NAME#rr_Int>, Sched<[sched]>;
|
||||
} // isCodeGenOnly = 1
|
||||
}
|
||||
|
||||
multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType OpVT128, ValueType OpVT256,
|
||||
PatFrag ld_frag128, PatFrag ld_frag256,
|
||||
X86SchedWriteWidths sched> {
|
||||
let isCommutable = 1 in
|
||||
def rr : FMA4<opc, MRMSrcRegOp4, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>,
|
||||
VEX_W, Sched<[sched.XMM]>;
|
||||
def rm : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
|
||||
(ld_frag128 addr:$src3)))]>, VEX_W,
|
||||
Sched<[sched.XMM.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>,
|
||||
Sched<[sched.XMM.Folded, ReadAfterLd,
|
||||
// f128mem:$src2
|
||||
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
|
||||
ReadDefault,
|
||||
// VR128::$src3
|
||||
ReadAfterLd]>;
|
||||
let isCommutable = 1 in
|
||||
def Yrr : FMA4<opc, MRMSrcRegOp4, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR256:$dst,
|
||||
(OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>,
|
||||
VEX_W, VEX_L, Sched<[sched.YMM]>;
|
||||
def Yrm : FMA4<opc, MRMSrcMemOp4, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
|
||||
(ld_frag256 addr:$src3)))]>, VEX_W, VEX_L,
|
||||
Sched<[sched.YMM.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, f256mem:$src2, VR256:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR256:$dst, (OpNode VR256:$src1,
|
||||
(ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L,
|
||||
Sched<[sched.YMM.Folded, ReadAfterLd,
|
||||
// f256mem:$src2
|
||||
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
|
||||
ReadDefault,
|
||||
// VR256::$src3
|
||||
ReadAfterLd]>;
|
||||
// For disassembler
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
|
||||
def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
|
||||
Sched<[sched.XMM]>, FoldGenData<NAME#rr>;
|
||||
def Yrr_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2, VR256:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
|
||||
VEX_L, Sched<[sched.YMM]>, FoldGenData<NAME#Yrr>;
|
||||
} // isCodeGenOnly = 1
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
// Scalar Instructions
|
||||
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32,
|
||||
SchedWriteFMA.Scl>,
|
||||
fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
|
||||
SchedWriteFMA.Scl>;
|
||||
defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32,
|
||||
SchedWriteFMA.Scl>,
|
||||
fma4s_int<0x6E, "vfmsubss", ssmem, v4f32,
|
||||
SchedWriteFMA.Scl>;
|
||||
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
|
||||
X86Fnmadd, loadf32, SchedWriteFMA.Scl>,
|
||||
fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32,
|
||||
SchedWriteFMA.Scl>;
|
||||
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
|
||||
X86Fnmsub, loadf32, SchedWriteFMA.Scl>,
|
||||
fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
|
||||
SchedWriteFMA.Scl>;
|
||||
// Packed Instructions
|
||||
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
|
||||
loadv4f32, loadv8f32, SchedWriteFMA>;
|
||||
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
|
||||
loadv4f32, loadv8f32, SchedWriteFMA>;
|
||||
defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
|
||||
loadv4f32, loadv8f32, SchedWriteFMA>;
|
||||
defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
|
||||
loadv4f32, loadv8f32, SchedWriteFMA>;
|
||||
defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
|
||||
loadv4f32, loadv8f32, SchedWriteFMA>;
|
||||
defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
|
||||
loadv4f32, loadv8f32, SchedWriteFMA>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
// Scalar Instructions
|
||||
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64,
|
||||
SchedWriteFMA.Scl>,
|
||||
fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
|
||||
SchedWriteFMA.Scl>;
|
||||
defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64,
|
||||
SchedWriteFMA.Scl>,
|
||||
fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64,
|
||||
SchedWriteFMA.Scl>;
|
||||
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
|
||||
X86Fnmadd, loadf64, SchedWriteFMA.Scl>,
|
||||
fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64,
|
||||
SchedWriteFMA.Scl>;
|
||||
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
|
||||
X86Fnmsub, loadf64, SchedWriteFMA.Scl>,
|
||||
fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
|
||||
SchedWriteFMA.Scl>;
|
||||
// Packed Instructions
|
||||
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
|
||||
loadv2f64, loadv4f64, SchedWriteFMA>;
|
||||
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
|
||||
loadv2f64, loadv4f64, SchedWriteFMA>;
|
||||
defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
|
||||
loadv2f64, loadv4f64, SchedWriteFMA>;
|
||||
defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
|
||||
loadv2f64, loadv4f64, SchedWriteFMA>;
|
||||
defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
|
||||
loadv2f64, loadv4f64, SchedWriteFMA>;
|
||||
defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
|
||||
loadv2f64, loadv4f64, SchedWriteFMA>;
|
||||
}
|
||||
|
||||
multiclass scalar_fma4_patterns<SDNode Op, string Name,
|
||||
ValueType VT, ValueType EltVT,
|
||||
RegisterClass RC, PatFrag mem_frag> {
|
||||
let Predicates = [HasFMA4] in {
|
||||
def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
|
||||
(Op RC:$src1, RC:$src2, RC:$src3))))),
|
||||
(!cast<Instruction>(Name#"rr_Int")
|
||||
(VT (COPY_TO_REGCLASS RC:$src1, VR128)),
|
||||
(VT (COPY_TO_REGCLASS RC:$src2, VR128)),
|
||||
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
|
||||
|
||||
def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
|
||||
(Op RC:$src1, RC:$src2,
|
||||
(mem_frag addr:$src3)))))),
|
||||
(!cast<Instruction>(Name#"rm_Int")
|
||||
(VT (COPY_TO_REGCLASS RC:$src1, VR128)),
|
||||
(VT (COPY_TO_REGCLASS RC:$src2, VR128)), addr:$src3)>;
|
||||
|
||||
def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
|
||||
(Op RC:$src1, (mem_frag addr:$src2),
|
||||
RC:$src3))))),
|
||||
(!cast<Instruction>(Name#"mr_Int")
|
||||
(VT (COPY_TO_REGCLASS RC:$src1, VR128)), addr:$src2,
|
||||
(VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
|
||||
}
|
||||
}
|
||||
|
||||
defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
|
||||
defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
|
||||
|
||||
defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
|
||||
defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;
|
||||
748
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFPStack.td
vendored
Normal file
748
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFPStack.td
vendored
Normal file
@@ -0,0 +1,748 @@
|
||||
//===- X86InstrFPStack.td - FPU Instruction Set ------------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the X86 x87 FPU instruction set, defining the
|
||||
// instructions, and properties of the instructions which are needed for code
|
||||
// generation, machine code emission, and analysis.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FPStack specific DAG Nodes.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
|
||||
SDTCisVT<1, f80>]>;
|
||||
def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
|
||||
SDTCisPtrTy<1>,
|
||||
SDTCisVT<2, OtherVT>]>;
|
||||
def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
|
||||
SDTCisPtrTy<1>,
|
||||
SDTCisVT<2, OtherVT>]>;
|
||||
def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
|
||||
SDTCisVT<2, OtherVT>]>;
|
||||
def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
|
||||
def SDTX86FpToIMem : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
|
||||
|
||||
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
|
||||
|
||||
def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
def X86fst : SDNode<"X86ISD::FST", SDTX86Fst,
|
||||
[SDNPHasChain, SDNPInGlue, SDNPMayStore,
|
||||
SDNPMemOperand]>;
|
||||
def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
|
||||
SDNPMemOperand]>;
|
||||
def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
|
||||
def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPSideEffect,
|
||||
SDNPMemOperand]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FPStack pattern fragments
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def fpimm0 : FPImmLeaf<fAny, [{
|
||||
return Imm.isExactlyValue(+0.0);
|
||||
}]>;
|
||||
|
||||
def fpimmneg0 : FPImmLeaf<fAny, [{
|
||||
return Imm.isExactlyValue(-0.0);
|
||||
}]>;
|
||||
|
||||
def fpimm1 : FPImmLeaf<fAny, [{
|
||||
return Imm.isExactlyValue(+1.0);
|
||||
}]>;
|
||||
|
||||
def fpimmneg1 : FPImmLeaf<fAny, [{
|
||||
return Imm.isExactlyValue(-1.0);
|
||||
}]>;
|
||||
|
||||
/*
|
||||
// Some 'special' instructions - expanded after instruction selection.
|
||||
let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
|
||||
def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
|
||||
[(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
|
||||
def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
|
||||
[(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
|
||||
def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
|
||||
[(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
|
||||
def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
|
||||
[(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
|
||||
def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
|
||||
[(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
|
||||
def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
|
||||
[(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
|
||||
def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
|
||||
[(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
|
||||
def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
|
||||
[(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
|
||||
def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
|
||||
[(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
|
||||
}
|
||||
*/
|
||||
|
||||
// All FP Stack operations are represented with four instructions here. The
|
||||
// first three instructions, generated by the instruction selector, use "RFP32"
|
||||
// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
|
||||
// 64-bit or 80-bit floating point values. These sizes apply to the values,
|
||||
// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
|
||||
// copied to each other without losing information. These instructions are all
|
||||
// pseudo instructions and use the "_Fp" suffix.
|
||||
// In some cases there are additional variants with a mixture of different
|
||||
// register sizes.
|
||||
// The second instruction is defined with FPI, which is the actual instruction
|
||||
// emitted by the assembler. These use "RST" registers, although frequently
|
||||
// the actual register(s) used are implicit. These are always 80 bits.
|
||||
// The FP stackifier pass converts one to the other after register allocation
|
||||
// occurs.
|
||||
//
|
||||
// Note that the FpI instruction should have instruction selection info (e.g.
|
||||
// a pattern) and the FPI instruction should have emission info (e.g. opcode
|
||||
// encoding and asm printing info).
|
||||
|
||||
// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
|
||||
// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
|
||||
// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
|
||||
// f80 instructions cannot use SSE and use neither of these.
|
||||
class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
|
||||
FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
|
||||
class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
|
||||
FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
|
||||
|
||||
// Factoring for arithmetic.
|
||||
multiclass FPBinary_rr<SDNode OpNode> {
|
||||
// Register op register -> register
|
||||
// These are separated out because they have no reversed form.
|
||||
def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
|
||||
[(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
|
||||
def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
|
||||
[(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
|
||||
def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
|
||||
[(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
|
||||
}
|
||||
// The FopST0 series are not included here because of the irregularities
|
||||
// in where the 'r' goes in assembly output.
|
||||
// These instructions cannot address 80-bit memory.
|
||||
multiclass FPBinary<SDNode OpNode, Format fp, string asmstring,
|
||||
bit Forward = 1> {
|
||||
let mayLoad = 1, hasSideEffects = 1 in {
|
||||
// ST(0) = ST(0) + [mem]
|
||||
def _Fp32m : FpIf32<(outs RFP32:$dst),
|
||||
(ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[!if(Forward,
|
||||
(set RFP32:$dst,
|
||||
(OpNode RFP32:$src1, (loadf32 addr:$src2))),
|
||||
(set RFP32:$dst,
|
||||
(OpNode (loadf32 addr:$src2), RFP32:$src1)))]>;
|
||||
def _Fp64m : FpIf64<(outs RFP64:$dst),
|
||||
(ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[!if(Forward,
|
||||
(set RFP64:$dst,
|
||||
(OpNode RFP64:$src1, (loadf64 addr:$src2))),
|
||||
(set RFP64:$dst,
|
||||
(OpNode (loadf64 addr:$src2), RFP64:$src1)))]>;
|
||||
def _Fp64m32: FpIf64<(outs RFP64:$dst),
|
||||
(ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[!if(Forward,
|
||||
(set RFP64:$dst,
|
||||
(OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2)))),
|
||||
(set RFP64:$dst,
|
||||
(OpNode (f64 (extloadf32 addr:$src2)), RFP64:$src1)))]>;
|
||||
def _Fp80m32: FpI_<(outs RFP80:$dst),
|
||||
(ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
|
||||
[!if(Forward,
|
||||
(set RFP80:$dst,
|
||||
(OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2)))),
|
||||
(set RFP80:$dst,
|
||||
(OpNode (f80 (extloadf32 addr:$src2)), RFP80:$src1)))]>;
|
||||
def _Fp80m64: FpI_<(outs RFP80:$dst),
|
||||
(ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
|
||||
[!if(Forward,
|
||||
(set RFP80:$dst,
|
||||
(OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))),
|
||||
(set RFP80:$dst,
|
||||
(OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>;
|
||||
def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
|
||||
!strconcat("f", asmstring, "{s}\t$src")>;
|
||||
def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
|
||||
!strconcat("f", asmstring, "{l}\t$src")>;
|
||||
// ST(0) = ST(0) + [memint]
|
||||
def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
|
||||
OneArgFPRW,
|
||||
[!if(Forward,
|
||||
(set RFP32:$dst,
|
||||
(OpNode RFP32:$src1, (X86fild addr:$src2, i16))),
|
||||
(set RFP32:$dst,
|
||||
(OpNode (X86fild addr:$src2, i16), RFP32:$src1)))]>;
|
||||
def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
|
||||
OneArgFPRW,
|
||||
[!if(Forward,
|
||||
(set RFP32:$dst,
|
||||
(OpNode RFP32:$src1, (X86fild addr:$src2, i32))),
|
||||
(set RFP32:$dst,
|
||||
(OpNode (X86fild addr:$src2, i32), RFP32:$src1)))]>;
|
||||
def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
|
||||
OneArgFPRW,
|
||||
[!if(Forward,
|
||||
(set RFP64:$dst,
|
||||
(OpNode RFP64:$src1, (X86fild addr:$src2, i16))),
|
||||
(set RFP64:$dst,
|
||||
(OpNode (X86fild addr:$src2, i16), RFP64:$src1)))]>;
|
||||
def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
|
||||
OneArgFPRW,
|
||||
[!if(Forward,
|
||||
(set RFP64:$dst,
|
||||
(OpNode RFP64:$src1, (X86fild addr:$src2, i32))),
|
||||
(set RFP64:$dst,
|
||||
(OpNode (X86fild addr:$src2, i32), RFP64:$src1)))]>;
|
||||
def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
|
||||
OneArgFPRW,
|
||||
[!if(Forward,
|
||||
(set RFP80:$dst,
|
||||
(OpNode RFP80:$src1, (X86fild addr:$src2, i16))),
|
||||
(set RFP80:$dst,
|
||||
(OpNode (X86fild addr:$src2, i16), RFP80:$src1)))]>;
|
||||
def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
|
||||
OneArgFPRW,
|
||||
[!if(Forward,
|
||||
(set RFP80:$dst,
|
||||
(OpNode RFP80:$src1, (X86fild addr:$src2, i32))),
|
||||
(set RFP80:$dst,
|
||||
(OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>;
|
||||
def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
|
||||
!strconcat("fi", asmstring, "{s}\t$src")>;
|
||||
def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
|
||||
!strconcat("fi", asmstring, "{l}\t$src")>;
|
||||
} // mayLoad = 1, hasSideEffects = 1
|
||||
}
|
||||
|
||||
let Defs = [FPSW] in {
|
||||
// FPBinary_rr just defines pseudo-instructions, no need to set a scheduling
|
||||
// resources.
|
||||
let hasNoSchedulingInfo = 1 in {
|
||||
defm ADD : FPBinary_rr<fadd>;
|
||||
defm SUB : FPBinary_rr<fsub>;
|
||||
defm MUL : FPBinary_rr<fmul>;
|
||||
defm DIV : FPBinary_rr<fdiv>;
|
||||
}
|
||||
|
||||
// Sets the scheduling resources for the actual NAME#_F<size>m definitions.
|
||||
let SchedRW = [WriteFAddLd] in {
|
||||
defm ADD : FPBinary<fadd, MRM0m, "add">;
|
||||
defm SUB : FPBinary<fsub, MRM4m, "sub">;
|
||||
defm SUBR: FPBinary<fsub ,MRM5m, "subr", 0>;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteFMulLd] in {
|
||||
defm MUL : FPBinary<fmul, MRM1m, "mul">;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteFDivLd] in {
|
||||
defm DIV : FPBinary<fdiv, MRM6m, "div">;
|
||||
defm DIVR: FPBinary<fdiv, MRM7m, "divr", 0>;
|
||||
}
|
||||
} // Defs = [FPSW]
|
||||
|
||||
class FPST0rInst<Format fp, string asm>
|
||||
: FPI<0xD8, fp, (outs), (ins RST:$op), asm>;
|
||||
class FPrST0Inst<Format fp, string asm>
|
||||
: FPI<0xDC, fp, (outs), (ins RST:$op), asm>;
|
||||
class FPrST0PInst<Format fp, string asm>
|
||||
: FPI<0xDE, fp, (outs), (ins RST:$op), asm>;
|
||||
|
||||
// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
|
||||
// of some of the 'reverse' forms of the fsub and fdiv instructions. As such,
|
||||
// we have to put some 'r's in and take them out of weird places.
|
||||
let SchedRW = [WriteFAdd] in {
|
||||
def ADD_FST0r : FPST0rInst <MRM0r, "fadd\t$op">;
|
||||
def ADD_FrST0 : FPrST0Inst <MRM0r, "fadd\t{%st(0), $op|$op, st(0)}">;
|
||||
def ADD_FPrST0 : FPrST0PInst<MRM0r, "faddp\t$op">;
|
||||
def SUBR_FST0r : FPST0rInst <MRM5r, "fsubr\t$op">;
|
||||
def SUB_FrST0 : FPrST0Inst <MRM5r, "fsub{r}\t{%st(0), $op|$op, st(0)}">;
|
||||
def SUB_FPrST0 : FPrST0PInst<MRM5r, "fsub{r}p\t$op">;
|
||||
def SUB_FST0r : FPST0rInst <MRM4r, "fsub\t$op">;
|
||||
def SUBR_FrST0 : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">;
|
||||
def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">;
|
||||
} // SchedRW
|
||||
let SchedRW = [WriteFCom] in {
|
||||
def COM_FST0r : FPST0rInst <MRM2r, "fcom\t$op">;
|
||||
def COMP_FST0r : FPST0rInst <MRM3r, "fcomp\t$op">;
|
||||
} // SchedRW
|
||||
let SchedRW = [WriteFMul] in {
|
||||
def MUL_FST0r : FPST0rInst <MRM1r, "fmul\t$op">;
|
||||
def MUL_FrST0 : FPrST0Inst <MRM1r, "fmul\t{%st(0), $op|$op, st(0)}">;
|
||||
def MUL_FPrST0 : FPrST0PInst<MRM1r, "fmulp\t$op">;
|
||||
} // SchedRW
|
||||
let SchedRW = [WriteFDiv] in {
|
||||
def DIVR_FST0r : FPST0rInst <MRM7r, "fdivr\t$op">;
|
||||
def DIV_FrST0 : FPrST0Inst <MRM7r, "fdiv{r}\t{%st(0), $op|$op, st(0)}">;
|
||||
def DIV_FPrST0 : FPrST0PInst<MRM7r, "fdiv{r}p\t$op">;
|
||||
def DIV_FST0r : FPST0rInst <MRM6r, "fdiv\t$op">;
|
||||
def DIVR_FrST0 : FPrST0Inst <MRM6r, "fdiv{|r}\t{%st(0), $op|$op, st(0)}">;
|
||||
def DIVR_FPrST0 : FPrST0PInst<MRM6r, "fdiv{|r}p\t$op">;
|
||||
} // SchedRW
|
||||
|
||||
// Unary operations.
|
||||
multiclass FPUnary<SDNode OpNode, Format fp, string asmstring> {
|
||||
def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
|
||||
[(set RFP32:$dst, (OpNode RFP32:$src))]>;
|
||||
def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
|
||||
[(set RFP64:$dst, (OpNode RFP64:$src))]>;
|
||||
def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
|
||||
[(set RFP80:$dst, (OpNode RFP80:$src))]>;
|
||||
def _F : FPI<0xD9, fp, (outs), (ins), asmstring>;
|
||||
}
|
||||
|
||||
let Defs = [FPSW] in {
|
||||
|
||||
let SchedRW = [WriteFSign] in {
|
||||
defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
|
||||
defm ABS : FPUnary<fabs, MRM_E1, "fabs">;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteFSqrt80] in
|
||||
defm SQRT: FPUnary<fsqrt,MRM_FA, "fsqrt">;
|
||||
|
||||
let SchedRW = [WriteMicrocoded] in {
|
||||
defm SIN : FPUnary<fsin, MRM_FE, "fsin">;
|
||||
defm COS : FPUnary<fcos, MRM_FF, "fcos">;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteFCom] in {
|
||||
let hasSideEffects = 0 in {
|
||||
def TST_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
|
||||
def TST_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
|
||||
def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
|
||||
} // hasSideEffects
|
||||
|
||||
def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
|
||||
} // SchedRW
|
||||
} // Defs = [FPSW]
|
||||
|
||||
// Versions of FP instructions that take a single memory operand. Added for the
|
||||
// disassembler; remove as they are included with patterns elsewhere.
|
||||
let SchedRW = [WriteFComLd] in {
|
||||
def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
|
||||
def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
|
||||
|
||||
def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">;
|
||||
def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
|
||||
|
||||
def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
|
||||
def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
|
||||
|
||||
def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
|
||||
def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
|
||||
} // SchedRW
|
||||
|
||||
let SchedRW = [WriteMicrocoded] in {
|
||||
def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
|
||||
def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins f32mem:$dst), "fnstenv\t$dst">;
|
||||
|
||||
def FRSTORm : FPI<0xDD, MRM4m, (outs), (ins f32mem:$dst), "frstor\t$dst">;
|
||||
def FSAVEm : FPI<0xDD, MRM6m, (outs), (ins f32mem:$dst), "fnsave\t$dst">;
|
||||
def FNSTSWm : FPI<0xDD, MRM7m, (outs), (ins i16mem:$dst), "fnstsw\t$dst">;
|
||||
|
||||
def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f80mem:$src), "fbld\ttbyte ptr $src">;
|
||||
def FBSTPm : FPI<0xDF, MRM6m, (outs), (ins f80mem:$dst), "fbstp\ttbyte ptr $dst">;
|
||||
} // SchedRW
|
||||
|
||||
// Floating point cmovs.
|
||||
class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
|
||||
FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32, HasCMov]>;
|
||||
class FpIf64CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
|
||||
FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64, HasCMov]>;
|
||||
|
||||
multiclass FPCMov<PatLeaf cc> {
|
||||
def _Fp32 : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
|
||||
CondMovFP,
|
||||
[(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
|
||||
cc, EFLAGS))]>;
|
||||
def _Fp64 : FpIf64CMov<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
|
||||
CondMovFP,
|
||||
[(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
|
||||
cc, EFLAGS))]>;
|
||||
def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
|
||||
CondMovFP,
|
||||
[(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
|
||||
cc, EFLAGS))]>,
|
||||
Requires<[HasCMov]>;
|
||||
}
|
||||
|
||||
let Defs = [FPSW] in {
|
||||
let SchedRW = [WriteFCMOV] in {
|
||||
let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
|
||||
defm CMOVB : FPCMov<X86_COND_B>;
|
||||
defm CMOVBE : FPCMov<X86_COND_BE>;
|
||||
defm CMOVE : FPCMov<X86_COND_E>;
|
||||
defm CMOVP : FPCMov<X86_COND_P>;
|
||||
defm CMOVNB : FPCMov<X86_COND_AE>;
|
||||
defm CMOVNBE: FPCMov<X86_COND_A>;
|
||||
defm CMOVNE : FPCMov<X86_COND_NE>;
|
||||
defm CMOVNP : FPCMov<X86_COND_NP>;
|
||||
} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
|
||||
|
||||
let Predicates = [HasCMov] in {
|
||||
// These are not factored because there's no clean way to pass DA/DB.
|
||||
def CMOVB_F : FPI<0xDA, MRM0r, (outs), (ins RST:$op),
|
||||
"fcmovb\t{$op, %st(0)|st(0), $op}">;
|
||||
def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RST:$op),
|
||||
"fcmovbe\t{$op, %st(0)|st(0), $op}">;
|
||||
def CMOVE_F : FPI<0xDA, MRM1r, (outs), (ins RST:$op),
|
||||
"fcmove\t{$op, %st(0)|st(0), $op}">;
|
||||
def CMOVP_F : FPI<0xDA, MRM3r, (outs), (ins RST:$op),
|
||||
"fcmovu\t{$op, %st(0)|st(0), $op}">;
|
||||
def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RST:$op),
|
||||
"fcmovnb\t{$op, %st(0)|st(0), $op}">;
|
||||
def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RST:$op),
|
||||
"fcmovnbe\t{$op, %st(0)|st(0), $op}">;
|
||||
def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RST:$op),
|
||||
"fcmovne\t{$op, %st(0)|st(0), $op}">;
|
||||
def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RST:$op),
|
||||
"fcmovnu\t{$op, %st(0)|st(0), $op}">;
|
||||
} // Predicates = [HasCMov]
|
||||
} // SchedRW
|
||||
|
||||
// Floating point loads & stores.
|
||||
let SchedRW = [WriteLoad] in {
|
||||
let canFoldAsLoad = 1 in {
|
||||
def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
|
||||
[(set RFP32:$dst, (loadf32 addr:$src))]>;
|
||||
let isReMaterializable = 1 in
|
||||
def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
|
||||
[(set RFP64:$dst, (loadf64 addr:$src))]>;
|
||||
def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
|
||||
[(set RFP80:$dst, (loadf80 addr:$src))]>;
|
||||
} // canFoldAsLoad
|
||||
def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
|
||||
[(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
|
||||
def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
|
||||
[(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
|
||||
def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
|
||||
[(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
|
||||
def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
|
||||
[(set RFP32:$dst, (X86fild addr:$src, i16))]>;
|
||||
def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
|
||||
[(set RFP32:$dst, (X86fild addr:$src, i32))]>;
|
||||
def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
|
||||
[(set RFP32:$dst, (X86fild addr:$src, i64))]>;
|
||||
def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
|
||||
[(set RFP64:$dst, (X86fild addr:$src, i16))]>;
|
||||
def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
|
||||
[(set RFP64:$dst, (X86fild addr:$src, i32))]>;
|
||||
def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
|
||||
[(set RFP64:$dst, (X86fild addr:$src, i64))]>;
|
||||
def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
|
||||
[(set RFP80:$dst, (X86fild addr:$src, i16))]>;
|
||||
def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
|
||||
[(set RFP80:$dst, (X86fild addr:$src, i32))]>;
|
||||
def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
|
||||
[(set RFP80:$dst, (X86fild addr:$src, i64))]>;
|
||||
} // SchedRW
|
||||
|
||||
let SchedRW = [WriteStore] in {
|
||||
def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
|
||||
[(store RFP32:$src, addr:$op)]>;
|
||||
def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
|
||||
[(truncstoref32 RFP64:$src, addr:$op)]>;
|
||||
def ST_Fp64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
|
||||
[(store RFP64:$src, addr:$op)]>;
|
||||
def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
|
||||
[(truncstoref32 RFP80:$src, addr:$op)]>;
|
||||
def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP,
|
||||
[(truncstoref64 RFP80:$src, addr:$op)]>;
|
||||
// FST does not support 80-bit memory target; FSTP must be used.
|
||||
|
||||
let mayStore = 1, hasSideEffects = 0 in {
|
||||
def ST_FpP32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
|
||||
def ST_FpP64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
|
||||
def ST_FpP64m : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
|
||||
def ST_FpP80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
|
||||
def ST_FpP80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
|
||||
} // mayStore
|
||||
|
||||
def ST_FpP80m : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
|
||||
[(store RFP80:$src, addr:$op)]>;
|
||||
|
||||
let mayStore = 1, hasSideEffects = 0 in {
|
||||
def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
|
||||
def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
|
||||
def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
|
||||
def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
|
||||
def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
|
||||
def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
|
||||
def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
|
||||
def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
|
||||
def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
|
||||
} // mayStore
|
||||
} // SchedRW
|
||||
|
||||
let mayLoad = 1, SchedRW = [WriteLoad] in {
|
||||
def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
|
||||
def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
|
||||
def LD_F80m : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
|
||||
def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
|
||||
def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
|
||||
def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
|
||||
}
|
||||
let mayStore = 1, SchedRW = [WriteStore] in {
|
||||
def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
|
||||
def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
|
||||
def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
|
||||
def ST_FP64m : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">;
|
||||
def ST_FP80m : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">;
|
||||
def IST_F16m : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">;
|
||||
def IST_F32m : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">;
|
||||
def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">;
|
||||
def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">;
|
||||
def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
|
||||
}
|
||||
|
||||
// FISTTP requires SSE3 even though it's a FPStack op.
|
||||
let Predicates = [HasSSE3], SchedRW = [WriteStore] in {
|
||||
def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
|
||||
[(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
|
||||
def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
|
||||
[(X86fp_to_i32mem RFP32:$src, addr:$op)]>;
|
||||
def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
|
||||
[(X86fp_to_i64mem RFP32:$src, addr:$op)]>;
|
||||
def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
|
||||
[(X86fp_to_i16mem RFP64:$src, addr:$op)]>;
|
||||
def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
|
||||
[(X86fp_to_i32mem RFP64:$src, addr:$op)]>;
|
||||
def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
|
||||
[(X86fp_to_i64mem RFP64:$src, addr:$op)]>;
|
||||
def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
|
||||
[(X86fp_to_i16mem RFP80:$src, addr:$op)]>;
|
||||
def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
|
||||
[(X86fp_to_i32mem RFP80:$src, addr:$op)]>;
|
||||
def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
|
||||
[(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
|
||||
} // Predicates = [HasSSE3]
|
||||
|
||||
let mayStore = 1, SchedRW = [WriteStore] in {
|
||||
def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
|
||||
def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
|
||||
def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">;
|
||||
}
|
||||
|
||||
// FP Stack manipulation instructions.
|
||||
let SchedRW = [WriteMove] in {
|
||||
def LD_Frr : FPI<0xD9, MRM0r, (outs), (ins RST:$op), "fld\t$op">;
|
||||
def ST_Frr : FPI<0xDD, MRM2r, (outs), (ins RST:$op), "fst\t$op">;
|
||||
def ST_FPrr : FPI<0xDD, MRM3r, (outs), (ins RST:$op), "fstp\t$op">;
|
||||
def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RST:$op), "fxch\t$op">;
|
||||
}
|
||||
|
||||
// Floating point constant loads.
|
||||
let isReMaterializable = 1, SchedRW = [WriteZero] in {
|
||||
def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
|
||||
[(set RFP32:$dst, fpimm0)]>;
|
||||
def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
|
||||
[(set RFP32:$dst, fpimm1)]>;
|
||||
def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
|
||||
[(set RFP64:$dst, fpimm0)]>;
|
||||
def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
|
||||
[(set RFP64:$dst, fpimm1)]>;
|
||||
def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
|
||||
[(set RFP80:$dst, fpimm0)]>;
|
||||
def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
|
||||
[(set RFP80:$dst, fpimm1)]>;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteFLD0] in
|
||||
def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
|
||||
|
||||
let SchedRW = [WriteFLD1] in
|
||||
def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
|
||||
|
||||
let SchedRW = [WriteFLDC], Defs = [FPSW] in {
|
||||
def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
|
||||
def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
|
||||
def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
|
||||
def FLDLG2 : I<0xD9, MRM_EC, (outs), (ins), "fldlg2", []>;
|
||||
def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", []>;
|
||||
} // SchedRW
|
||||
|
||||
// Floating point compares.
|
||||
let SchedRW = [WriteFCom] in {
|
||||
def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
|
||||
[(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
|
||||
def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
|
||||
[(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>;
|
||||
def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
|
||||
[(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>;
|
||||
} // SchedRW
|
||||
} // Defs = [FPSW]
|
||||
|
||||
let SchedRW = [WriteFCom] in {
|
||||
// CC = ST(0) cmp ST(i)
|
||||
let Defs = [EFLAGS, FPSW] in {
|
||||
def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
|
||||
[(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
|
||||
def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
|
||||
[(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
|
||||
def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
|
||||
[(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
|
||||
}
|
||||
|
||||
let Defs = [FPSW], Uses = [ST0] in {
|
||||
def UCOM_Fr : FPI<0xDD, MRM4r, // FPSW = cmp ST(0) with ST(i)
|
||||
(outs), (ins RST:$reg), "fucom\t$reg">;
|
||||
def UCOM_FPr : FPI<0xDD, MRM5r, // FPSW = cmp ST(0) with ST(i), pop
|
||||
(outs), (ins RST:$reg), "fucomp\t$reg">;
|
||||
def UCOM_FPPr : FPI<0xDA, MRM_E9, // cmp ST(0) with ST(1), pop, pop
|
||||
(outs), (ins), "fucompp">;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS, FPSW], Uses = [ST0] in {
|
||||
def UCOM_FIr : FPI<0xDB, MRM5r, // CC = cmp ST(0) with ST(i)
|
||||
(outs), (ins RST:$reg), "fucomi\t$reg">;
|
||||
def UCOM_FIPr : FPI<0xDF, MRM5r, // CC = cmp ST(0) with ST(i), pop
|
||||
(outs), (ins RST:$reg), "fucompi\t$reg">;
|
||||
}
|
||||
|
||||
let Defs = [EFLAGS, FPSW] in {
|
||||
def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RST:$reg), "fcomi\t$reg">;
|
||||
def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RST:$reg), "fcompi\t$reg">;
|
||||
}
|
||||
} // SchedRW
|
||||
|
||||
// Floating point flag ops.
|
||||
let SchedRW = [WriteALU] in {
|
||||
let Defs = [AX], Uses = [FPSW] in
|
||||
def FNSTSW16r : I<0xDF, MRM_E0, // AX = fp flags
|
||||
(outs), (ins), "fnstsw\t{%ax|ax}",
|
||||
[(set AX, (X86fp_stsw FPSW))]>;
|
||||
let Defs = [FPSW] in
|
||||
def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world
|
||||
(outs), (ins i16mem:$dst), "fnstcw\t$dst",
|
||||
[(X86fp_cwd_get16 addr:$dst)]>;
|
||||
} // SchedRW
|
||||
let Defs = [FPSW], mayLoad = 1 in
|
||||
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
|
||||
(outs), (ins i16mem:$dst), "fldcw\t$dst", []>,
|
||||
Sched<[WriteLoad]>;
|
||||
|
||||
// FPU control instructions
|
||||
let SchedRW = [WriteMicrocoded] in {
|
||||
let Defs = [FPSW] in {
|
||||
def FNINIT : I<0xDB, MRM_E3, (outs), (ins), "fninit", []>;
|
||||
def FFREE : FPI<0xDD, MRM0r, (outs), (ins RST:$reg), "ffree\t$reg">;
|
||||
def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RST:$reg), "ffreep\t$reg">;
|
||||
|
||||
def FPNCEST0r : FPI<0xD9, MRM3r, (outs RST:$op), (ins),
|
||||
"fstpnce\t{%st(0), $op|$op, st(0)}">;
|
||||
|
||||
def FENI8087_NOP : I<0xDB, MRM_E0, (outs), (ins), "feni8087_nop", []>;
|
||||
|
||||
def FDISI8087_NOP : I<0xDB, MRM_E1, (outs), (ins), "fdisi8087_nop", []>;
|
||||
|
||||
// Clear exceptions
|
||||
def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", []>;
|
||||
} // Defs = [FPSW]
|
||||
} // SchedRW
|
||||
|
||||
// Operand-less floating-point instructions for the disassembler.
|
||||
def FNOP : I<0xD9, MRM_D0, (outs), (ins), "fnop", []>, Sched<[WriteNop]>;
|
||||
|
||||
let SchedRW = [WriteMicrocoded] in {
|
||||
let Defs = [FPSW] in {
|
||||
def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
|
||||
def FXAM : I<0xD9, MRM_E5, (outs), (ins), "fxam", []>;
|
||||
def F2XM1 : I<0xD9, MRM_F0, (outs), (ins), "f2xm1", []>;
|
||||
def FYL2X : I<0xD9, MRM_F1, (outs), (ins), "fyl2x", []>;
|
||||
def FPTAN : I<0xD9, MRM_F2, (outs), (ins), "fptan", []>;
|
||||
def FPATAN : I<0xD9, MRM_F3, (outs), (ins), "fpatan", []>;
|
||||
def FXTRACT : I<0xD9, MRM_F4, (outs), (ins), "fxtract", []>;
|
||||
def FPREM1 : I<0xD9, MRM_F5, (outs), (ins), "fprem1", []>;
|
||||
def FDECSTP : I<0xD9, MRM_F6, (outs), (ins), "fdecstp", []>;
|
||||
def FINCSTP : I<0xD9, MRM_F7, (outs), (ins), "fincstp", []>;
|
||||
def FPREM : I<0xD9, MRM_F8, (outs), (ins), "fprem", []>;
|
||||
def FYL2XP1 : I<0xD9, MRM_F9, (outs), (ins), "fyl2xp1", []>;
|
||||
def FSINCOS : I<0xD9, MRM_FB, (outs), (ins), "fsincos", []>;
|
||||
def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", []>;
|
||||
def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", []>;
|
||||
def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", []>;
|
||||
} // Defs = [FPSW]
|
||||
|
||||
def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
|
||||
"fxsave\t$dst", [(int_x86_fxsave addr:$dst)]>, TB,
|
||||
Requires<[HasFXSR]>;
|
||||
def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
|
||||
"fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)]>,
|
||||
TB, Requires<[HasFXSR, In64BitMode]>;
|
||||
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaquemem:$src),
|
||||
"fxrstor\t$src", [(int_x86_fxrstor addr:$src)]>,
|
||||
TB, Requires<[HasFXSR]>;
|
||||
def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaquemem:$src),
|
||||
"fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)]>,
|
||||
TB, Requires<[HasFXSR, In64BitMode]>;
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Non-Instruction Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Required for RET of f32 / f64 / f80 values.
|
||||
def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
|
||||
def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
|
||||
def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
|
||||
|
||||
// Required for CALL which return f32 / f64 / f80 values.
|
||||
def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
|
||||
def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
|
||||
RFP64:$src)>;
|
||||
def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
|
||||
def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
|
||||
RFP80:$src)>;
|
||||
def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
|
||||
RFP80:$src)>;
|
||||
def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
|
||||
RFP80:$src)>;
|
||||
|
||||
// Floating point constant -0.0 and -1.0
|
||||
def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
|
||||
def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
|
||||
def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
|
||||
def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
|
||||
def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
|
||||
def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
|
||||
|
||||
// Used to conv. i64 to f64 since there isn't a SSE version.
|
||||
def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
|
||||
|
||||
// FP extensions map onto simple pseudo-value conversions if they are to/from
|
||||
// the FP stack.
|
||||
def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
|
||||
Requires<[FPStackf32]>;
|
||||
def : Pat<(f80 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
|
||||
Requires<[FPStackf32]>;
|
||||
def : Pat<(f80 (fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
|
||||
Requires<[FPStackf64]>;
|
||||
|
||||
// FP truncations map onto simple pseudo-value conversions if they are to/from
|
||||
// the FP stack. We have validated that only value-preserving truncations make
|
||||
// it through isel.
|
||||
def : Pat<(f32 (fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
|
||||
Requires<[FPStackf32]>;
|
||||
def : Pat<(f32 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
|
||||
Requires<[FPStackf32]>;
|
||||
def : Pat<(f64 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
|
||||
Requires<[FPStackf64]>;
|
||||
993
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFormats.td
vendored
Normal file
993
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFormats.td
vendored
Normal file
@@ -0,0 +1,993 @@
|
||||
//===-- X86InstrFormats.td - X86 Instruction Formats -------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Instruction Format Definitions.
|
||||
//
|
||||
|
||||
// Format specifies the encoding used by the instruction. This is part of the
|
||||
// ad-hoc solution used to emit machine instruction encodings by our machine
|
||||
// code emitter.
|
||||
class Format<bits<7> val> {
|
||||
bits<7> Value = val;
|
||||
}
|
||||
|
||||
def Pseudo : Format<0>;
|
||||
def RawFrm : Format<1>;
|
||||
def AddRegFrm : Format<2>;
|
||||
def RawFrmMemOffs : Format<3>;
|
||||
def RawFrmSrc : Format<4>;
|
||||
def RawFrmDst : Format<5>;
|
||||
def RawFrmDstSrc : Format<6>;
|
||||
def RawFrmImm8 : Format<7>;
|
||||
def RawFrmImm16 : Format<8>;
|
||||
def MRMDestMem : Format<32>;
|
||||
def MRMSrcMem : Format<33>;
|
||||
def MRMSrcMem4VOp3 : Format<34>;
|
||||
def MRMSrcMemOp4 : Format<35>;
|
||||
def MRMXm : Format<39>;
|
||||
def MRM0m : Format<40>; def MRM1m : Format<41>; def MRM2m : Format<42>;
|
||||
def MRM3m : Format<43>; def MRM4m : Format<44>; def MRM5m : Format<45>;
|
||||
def MRM6m : Format<46>; def MRM7m : Format<47>;
|
||||
def MRMDestReg : Format<48>;
|
||||
def MRMSrcReg : Format<49>;
|
||||
def MRMSrcReg4VOp3 : Format<50>;
|
||||
def MRMSrcRegOp4 : Format<51>;
|
||||
def MRMXr : Format<55>;
|
||||
def MRM0r : Format<56>; def MRM1r : Format<57>; def MRM2r : Format<58>;
|
||||
def MRM3r : Format<59>; def MRM4r : Format<60>; def MRM5r : Format<61>;
|
||||
def MRM6r : Format<62>; def MRM7r : Format<63>;
|
||||
def MRM_C0 : Format<64>; def MRM_C1 : Format<65>; def MRM_C2 : Format<66>;
|
||||
def MRM_C3 : Format<67>; def MRM_C4 : Format<68>; def MRM_C5 : Format<69>;
|
||||
def MRM_C6 : Format<70>; def MRM_C7 : Format<71>; def MRM_C8 : Format<72>;
|
||||
def MRM_C9 : Format<73>; def MRM_CA : Format<74>; def MRM_CB : Format<75>;
|
||||
def MRM_CC : Format<76>; def MRM_CD : Format<77>; def MRM_CE : Format<78>;
|
||||
def MRM_CF : Format<79>; def MRM_D0 : Format<80>; def MRM_D1 : Format<81>;
|
||||
def MRM_D2 : Format<82>; def MRM_D3 : Format<83>; def MRM_D4 : Format<84>;
|
||||
def MRM_D5 : Format<85>; def MRM_D6 : Format<86>; def MRM_D7 : Format<87>;
|
||||
def MRM_D8 : Format<88>; def MRM_D9 : Format<89>; def MRM_DA : Format<90>;
|
||||
def MRM_DB : Format<91>; def MRM_DC : Format<92>; def MRM_DD : Format<93>;
|
||||
def MRM_DE : Format<94>; def MRM_DF : Format<95>; def MRM_E0 : Format<96>;
|
||||
def MRM_E1 : Format<97>; def MRM_E2 : Format<98>; def MRM_E3 : Format<99>;
|
||||
def MRM_E4 : Format<100>; def MRM_E5 : Format<101>; def MRM_E6 : Format<102>;
|
||||
def MRM_E7 : Format<103>; def MRM_E8 : Format<104>; def MRM_E9 : Format<105>;
|
||||
def MRM_EA : Format<106>; def MRM_EB : Format<107>; def MRM_EC : Format<108>;
|
||||
def MRM_ED : Format<109>; def MRM_EE : Format<110>; def MRM_EF : Format<111>;
|
||||
def MRM_F0 : Format<112>; def MRM_F1 : Format<113>; def MRM_F2 : Format<114>;
|
||||
def MRM_F3 : Format<115>; def MRM_F4 : Format<116>; def MRM_F5 : Format<117>;
|
||||
def MRM_F6 : Format<118>; def MRM_F7 : Format<119>; def MRM_F8 : Format<120>;
|
||||
def MRM_F9 : Format<121>; def MRM_FA : Format<122>; def MRM_FB : Format<123>;
|
||||
def MRM_FC : Format<124>; def MRM_FD : Format<125>; def MRM_FE : Format<126>;
|
||||
def MRM_FF : Format<127>;
|
||||
|
||||
// ImmType - This specifies the immediate type used by an instruction. This is
|
||||
// part of the ad-hoc solution used to emit machine instruction encodings by our
|
||||
// machine code emitter.
|
||||
class ImmType<bits<4> val> {
|
||||
bits<4> Value = val;
|
||||
}
|
||||
def NoImm : ImmType<0>;
|
||||
def Imm8 : ImmType<1>;
|
||||
def Imm8PCRel : ImmType<2>;
|
||||
def Imm8Reg : ImmType<3>; // Register encoded in [7:4].
|
||||
def Imm16 : ImmType<4>;
|
||||
def Imm16PCRel : ImmType<5>;
|
||||
def Imm32 : ImmType<6>;
|
||||
def Imm32PCRel : ImmType<7>;
|
||||
def Imm32S : ImmType<8>;
|
||||
def Imm64 : ImmType<9>;
|
||||
|
||||
// FPFormat - This specifies what form this FP instruction has. This is used by
|
||||
// the Floating-Point stackifier pass.
|
||||
class FPFormat<bits<3> val> {
|
||||
bits<3> Value = val;
|
||||
}
|
||||
def NotFP : FPFormat<0>;
|
||||
def ZeroArgFP : FPFormat<1>;
|
||||
def OneArgFP : FPFormat<2>;
|
||||
def OneArgFPRW : FPFormat<3>;
|
||||
def TwoArgFP : FPFormat<4>;
|
||||
def CompareFP : FPFormat<5>;
|
||||
def CondMovFP : FPFormat<6>;
|
||||
def SpecialFP : FPFormat<7>;
|
||||
|
||||
// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
|
||||
// Keep in sync with tables in X86InstrInfo.cpp.
|
||||
class Domain<bits<2> val> {
|
||||
bits<2> Value = val;
|
||||
}
|
||||
def GenericDomain : Domain<0>;
|
||||
def SSEPackedSingle : Domain<1>;
|
||||
def SSEPackedDouble : Domain<2>;
|
||||
def SSEPackedInt : Domain<3>;
|
||||
|
||||
// Class specifying the vector form of the decompressed
|
||||
// displacement of 8-bit.
|
||||
class CD8VForm<bits<3> val> {
|
||||
bits<3> Value = val;
|
||||
}
|
||||
def CD8VF : CD8VForm<0>; // v := VL
|
||||
def CD8VH : CD8VForm<1>; // v := VL/2
|
||||
def CD8VQ : CD8VForm<2>; // v := VL/4
|
||||
def CD8VO : CD8VForm<3>; // v := VL/8
|
||||
// The tuple (subvector) forms.
|
||||
def CD8VT1 : CD8VForm<4>; // v := 1
|
||||
def CD8VT2 : CD8VForm<5>; // v := 2
|
||||
def CD8VT4 : CD8VForm<6>; // v := 4
|
||||
def CD8VT8 : CD8VForm<7>; // v := 8
|
||||
|
||||
// Class specifying the prefix used an opcode extension.
|
||||
class Prefix<bits<3> val> {
|
||||
bits<3> Value = val;
|
||||
}
|
||||
def NoPrfx : Prefix<0>;
|
||||
def PD : Prefix<1>;
|
||||
def XS : Prefix<2>;
|
||||
def XD : Prefix<3>;
|
||||
def PS : Prefix<4>; // Similar to NoPrfx, but disassembler uses this to know
|
||||
// that other instructions with this opcode use PD/XS/XD
|
||||
// and if any of those is not supported they shouldn't
|
||||
// decode to this instruction. e.g. ANDSS/ANDSD don't
|
||||
// exist, but the 0xf2/0xf3 encoding shouldn't
|
||||
// disable to ANDPS.
|
||||
|
||||
// Class specifying the opcode map.
|
||||
class Map<bits<3> val> {
|
||||
bits<3> Value = val;
|
||||
}
|
||||
def OB : Map<0>;
|
||||
def TB : Map<1>;
|
||||
def T8 : Map<2>;
|
||||
def TA : Map<3>;
|
||||
def XOP8 : Map<4>;
|
||||
def XOP9 : Map<5>;
|
||||
def XOPA : Map<6>;
|
||||
def ThreeDNow : Map<7>;
|
||||
|
||||
// Class specifying the encoding
|
||||
class Encoding<bits<2> val> {
|
||||
bits<2> Value = val;
|
||||
}
|
||||
def EncNormal : Encoding<0>;
|
||||
def EncVEX : Encoding<1>;
|
||||
def EncXOP : Encoding<2>;
|
||||
def EncEVEX : Encoding<3>;
|
||||
|
||||
// Operand size for encodings that change based on mode.
|
||||
class OperandSize<bits<2> val> {
|
||||
bits<2> Value = val;
|
||||
}
|
||||
def OpSizeFixed : OperandSize<0>; // Never needs a 0x66 prefix.
|
||||
def OpSize16 : OperandSize<1>; // Needs 0x66 prefix in 32-bit mode.
|
||||
def OpSize32 : OperandSize<2>; // Needs 0x66 prefix in 16-bit mode.
|
||||
|
||||
// Address size for encodings that change based on mode.
|
||||
class AddressSize<bits<2> val> {
|
||||
bits<2> Value = val;
|
||||
}
|
||||
def AdSizeX : AddressSize<0>; // Address size determined using addr operand.
|
||||
def AdSize16 : AddressSize<1>; // Encodes a 16-bit address.
|
||||
def AdSize32 : AddressSize<2>; // Encodes a 32-bit address.
|
||||
def AdSize64 : AddressSize<3>; // Encodes a 64-bit address.
|
||||
|
||||
// Prefix byte classes which are used to indicate to the ad-hoc machine code
|
||||
// emitter that various prefix bytes are required.
|
||||
class OpSize16 { OperandSize OpSize = OpSize16; }
|
||||
class OpSize32 { OperandSize OpSize = OpSize32; }
|
||||
class AdSize16 { AddressSize AdSize = AdSize16; }
|
||||
class AdSize32 { AddressSize AdSize = AdSize32; }
|
||||
class AdSize64 { AddressSize AdSize = AdSize64; }
|
||||
class REX_W { bit hasREX_WPrefix = 1; }
|
||||
class LOCK { bit hasLockPrefix = 1; }
|
||||
class REP { bit hasREPPrefix = 1; }
|
||||
class TB { Map OpMap = TB; }
|
||||
class T8 { Map OpMap = T8; }
|
||||
class TA { Map OpMap = TA; }
|
||||
class XOP8 { Map OpMap = XOP8; Prefix OpPrefix = PS; }
|
||||
class XOP9 { Map OpMap = XOP9; Prefix OpPrefix = PS; }
|
||||
class XOPA { Map OpMap = XOPA; Prefix OpPrefix = PS; }
|
||||
class ThreeDNow { Map OpMap = ThreeDNow; }
|
||||
class OBXS { Prefix OpPrefix = XS; }
|
||||
class PS : TB { Prefix OpPrefix = PS; }
|
||||
class PD : TB { Prefix OpPrefix = PD; }
|
||||
class XD : TB { Prefix OpPrefix = XD; }
|
||||
class XS : TB { Prefix OpPrefix = XS; }
|
||||
class T8PS : T8 { Prefix OpPrefix = PS; }
|
||||
class T8PD : T8 { Prefix OpPrefix = PD; }
|
||||
class T8XD : T8 { Prefix OpPrefix = XD; }
|
||||
class T8XS : T8 { Prefix OpPrefix = XS; }
|
||||
class TAPS : TA { Prefix OpPrefix = PS; }
|
||||
class TAPD : TA { Prefix OpPrefix = PD; }
|
||||
class TAXD : TA { Prefix OpPrefix = XD; }
|
||||
class VEX { Encoding OpEnc = EncVEX; }
|
||||
class VEX_W { bits<2> VEX_WPrefix = 1; }
|
||||
class VEX_WIG { bits<2> VEX_WPrefix = 2; }
|
||||
// Special version of VEX_W that can be changed to VEX.W==0 for EVEX2VEX.
|
||||
// FIXME: We should consider adding separate bits for VEX_WIG and the extra
|
||||
// part of W1X. This would probably simplify the tablegen emitters and
|
||||
// the TSFlags creation below.
|
||||
class VEX_W1X { bits<2> VEX_WPrefix = 3; }
|
||||
class VEX_4V : VEX { bit hasVEX_4V = 1; }
|
||||
class VEX_L { bit hasVEX_L = 1; }
|
||||
class VEX_LIG { bit ignoresVEX_L = 1; }
|
||||
class EVEX { Encoding OpEnc = EncEVEX; }
|
||||
class EVEX_4V : EVEX { bit hasVEX_4V = 1; }
|
||||
class EVEX_K { bit hasEVEX_K = 1; }
|
||||
class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
|
||||
class EVEX_B { bit hasEVEX_B = 1; }
|
||||
class EVEX_RC { bit hasEVEX_RC = 1; }
|
||||
class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
|
||||
class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; }
|
||||
class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; }
|
||||
class NOTRACK { bit hasNoTrackPrefix = 1; }
|
||||
|
||||
// Specify AVX512 8-bit compressed displacement encoding based on the vector
|
||||
// element size in bits (8, 16, 32, 64) and the CDisp8 form.
|
||||
class EVEX_CD8<int esize, CD8VForm form> {
|
||||
int CD8_EltSize = !srl(esize, 3);
|
||||
bits<3> CD8_Form = form.Value;
|
||||
}
|
||||
|
||||
class XOP { Encoding OpEnc = EncXOP; }
|
||||
class XOP_4V : XOP { bit hasVEX_4V = 1; }
|
||||
|
||||
// Specify the alternative register form instruction to replace the current
|
||||
// instruction in case it was picked during generation of memory folding tables
|
||||
class FoldGenData<string _RegisterForm> {
|
||||
string FoldGenRegForm = _RegisterForm;
|
||||
}
|
||||
|
||||
// Provide a specific instruction to be used by the EVEX2VEX conversion.
|
||||
class EVEX2VEXOverride<string VEXInstrName> {
|
||||
string EVEX2VEXOverride = VEXInstrName;
|
||||
}
|
||||
|
||||
// Mark the instruction as "illegal to memory fold/unfold"
|
||||
class NotMemoryFoldable { bit isMemoryFoldable = 0; }
|
||||
|
||||
// Prevent EVEX->VEX conversion from considering this instruction.
|
||||
class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; }
|
||||
|
||||
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
|
||||
string AsmStr, Domain d = GenericDomain>
|
||||
: Instruction {
|
||||
let Namespace = "X86";
|
||||
|
||||
bits<8> Opcode = opcod;
|
||||
Format Form = f;
|
||||
bits<7> FormBits = Form.Value;
|
||||
ImmType ImmT = i;
|
||||
|
||||
dag OutOperandList = outs;
|
||||
dag InOperandList = ins;
|
||||
string AsmString = AsmStr;
|
||||
|
||||
// If this is a pseudo instruction, mark it isCodeGenOnly.
|
||||
let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
|
||||
|
||||
//
|
||||
// Attributes specific to X86 instructions...
|
||||
//
|
||||
bit ForceDisassemble = 0; // Force instruction to disassemble even though it's
|
||||
// isCodeGenonly. Needed to hide an ambiguous
|
||||
// AsmString from the parser, but still disassemble.
|
||||
|
||||
OperandSize OpSize = OpSizeFixed; // Does this instruction's encoding change
|
||||
// based on operand size of the mode?
|
||||
bits<2> OpSizeBits = OpSize.Value;
|
||||
AddressSize AdSize = AdSizeX; // Does this instruction's encoding change
|
||||
// based on address size of the mode?
|
||||
bits<2> AdSizeBits = AdSize.Value;
|
||||
|
||||
Prefix OpPrefix = NoPrfx; // Which prefix byte does this inst have?
|
||||
bits<3> OpPrefixBits = OpPrefix.Value;
|
||||
Map OpMap = OB; // Which opcode map does this inst have?
|
||||
bits<3> OpMapBits = OpMap.Value;
|
||||
bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
|
||||
FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
|
||||
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
|
||||
Domain ExeDomain = d;
|
||||
bit hasREPPrefix = 0; // Does this inst have a REP prefix?
|
||||
Encoding OpEnc = EncNormal; // Encoding used by this instruction
|
||||
bits<2> OpEncBits = OpEnc.Value;
|
||||
bits<2> VEX_WPrefix = 0; // Does this inst set the VEX_W field?
|
||||
bit hasVEX_4V = 0; // Does this inst require the VEX.VVVV field?
|
||||
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
|
||||
bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
|
||||
bit hasEVEX_K = 0; // Does this inst require masking?
|
||||
bit hasEVEX_Z = 0; // Does this inst set the EVEX_Z field?
|
||||
bit hasEVEX_L2 = 0; // Does this inst set the EVEX_L2 field?
|
||||
bit hasEVEX_B = 0; // Does this inst set the EVEX_B field?
|
||||
bits<3> CD8_Form = 0; // Compressed disp8 form - vector-width.
|
||||
// Declare it int rather than bits<4> so that all bits are defined when
|
||||
// assigning to bits<7>.
|
||||
int CD8_EltSize = 0; // Compressed disp8 form - element-size in bytes.
|
||||
bit hasEVEX_RC = 0; // Explicitly specified rounding control in FP instruction.
|
||||
bit hasNoTrackPrefix = 0; // Does this inst has 0x3E (NoTrack) prefix?
|
||||
|
||||
bits<2> EVEX_LL;
|
||||
let EVEX_LL{0} = hasVEX_L;
|
||||
let EVEX_LL{1} = hasEVEX_L2;
|
||||
// Vector size in bytes.
|
||||
bits<7> VectSize = !shl(16, EVEX_LL);
|
||||
|
||||
// The scaling factor for AVX512's compressed displacement is either
|
||||
// - the size of a power-of-two number of elements or
|
||||
// - the size of a single element for broadcasts or
|
||||
// - the total vector size divided by a power-of-two number.
|
||||
// Possible values are: 0 (non-AVX512 inst), 1, 2, 4, 8, 16, 32 and 64.
|
||||
bits<7> CD8_Scale = !if (!eq (OpEnc.Value, EncEVEX.Value),
|
||||
!if (CD8_Form{2},
|
||||
!shl(CD8_EltSize, CD8_Form{1-0}),
|
||||
!if (hasEVEX_B,
|
||||
CD8_EltSize,
|
||||
!srl(VectSize, CD8_Form{1-0}))), 0);
|
||||
|
||||
// Used in the memory folding generation (TableGen backend) to point to an alternative
|
||||
// instruction to replace the current one in case it got picked during generation.
|
||||
string FoldGenRegForm = ?;
|
||||
|
||||
// Used to prevent an explicit EVEX2VEX override for this instruction.
|
||||
string EVEX2VEXOverride = ?;
|
||||
|
||||
bit isMemoryFoldable = 1; // Is it allowed to memory fold/unfold this instruction?
|
||||
bit notEVEX2VEXConvertible = 0; // Prevent EVEX->VEX conversion.
|
||||
|
||||
// TSFlags layout should be kept in sync with X86BaseInfo.h.
|
||||
let TSFlags{6-0} = FormBits;
|
||||
let TSFlags{8-7} = OpSizeBits;
|
||||
let TSFlags{10-9} = AdSizeBits;
|
||||
// No need for 3rd bit, we don't need to distinguish NoPrfx from PS.
|
||||
let TSFlags{12-11} = OpPrefixBits{1-0};
|
||||
let TSFlags{15-13} = OpMapBits;
|
||||
let TSFlags{16} = hasREX_WPrefix;
|
||||
let TSFlags{20-17} = ImmT.Value;
|
||||
let TSFlags{23-21} = FPForm.Value;
|
||||
let TSFlags{24} = hasLockPrefix;
|
||||
let TSFlags{25} = hasREPPrefix;
|
||||
let TSFlags{27-26} = ExeDomain.Value;
|
||||
let TSFlags{29-28} = OpEncBits;
|
||||
let TSFlags{37-30} = Opcode;
|
||||
// Currently no need for second bit in TSFlags - W Ignore is equivalent to 0.
|
||||
let TSFlags{38} = VEX_WPrefix{0};
|
||||
let TSFlags{39} = hasVEX_4V;
|
||||
let TSFlags{40} = hasVEX_L;
|
||||
let TSFlags{41} = hasEVEX_K;
|
||||
let TSFlags{42} = hasEVEX_Z;
|
||||
let TSFlags{43} = hasEVEX_L2;
|
||||
let TSFlags{44} = hasEVEX_B;
|
||||
// If we run out of TSFlags bits, it's possible to encode this in 3 bits.
|
||||
let TSFlags{51-45} = CD8_Scale;
|
||||
let TSFlags{52} = hasEVEX_RC;
|
||||
let TSFlags{53} = hasNoTrackPrefix;
|
||||
}
|
||||
|
||||
class PseudoI<dag oops, dag iops, list<dag> pattern>
|
||||
: X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
|
||||
let Pattern = pattern;
|
||||
}
|
||||
|
||||
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, Domain d = GenericDomain>
|
||||
: X86Inst<o, f, NoImm, outs, ins, asm, d> {
|
||||
let Pattern = pattern;
|
||||
let CodeSize = 3;
|
||||
}
|
||||
class Ii8<bits<8> o, Format f, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, Domain d = GenericDomain>
|
||||
: X86Inst<o, f, Imm8, outs, ins, asm, d> {
|
||||
let Pattern = pattern;
|
||||
let CodeSize = 3;
|
||||
}
|
||||
class Ii8Reg<bits<8> o, Format f, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, Domain d = GenericDomain>
|
||||
: X86Inst<o, f, Imm8Reg, outs, ins, asm, d> {
|
||||
let Pattern = pattern;
|
||||
let CodeSize = 3;
|
||||
}
|
||||
class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
|
||||
let Pattern = pattern;
|
||||
let CodeSize = 3;
|
||||
}
|
||||
class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: X86Inst<o, f, Imm16, outs, ins, asm> {
|
||||
let Pattern = pattern;
|
||||
let CodeSize = 3;
|
||||
}
|
||||
class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: X86Inst<o, f, Imm32, outs, ins, asm> {
|
||||
let Pattern = pattern;
|
||||
let CodeSize = 3;
|
||||
}
|
||||
class Ii32S<bits<8> o, Format f, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: X86Inst<o, f, Imm32S, outs, ins, asm> {
|
||||
let Pattern = pattern;
|
||||
let CodeSize = 3;
|
||||
}
|
||||
|
||||
class Ii64<bits<8> o, Format f, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: X86Inst<o, f, Imm64, outs, ins, asm> {
|
||||
let Pattern = pattern;
|
||||
let CodeSize = 3;
|
||||
}
|
||||
|
||||
class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
|
||||
let Pattern = pattern;
|
||||
let CodeSize = 3;
|
||||
}
|
||||
|
||||
class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
|
||||
let Pattern = pattern;
|
||||
let CodeSize = 3;
|
||||
}
|
||||
|
||||
// FPStack Instruction Templates:
|
||||
// FPI - Floating Point Instruction template.
|
||||
class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
|
||||
: I<o, F, outs, ins, asm, []> {}
|
||||
|
||||
// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
|
||||
class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
|
||||
: PseudoI<outs, ins, pattern> {
|
||||
let FPForm = fp;
|
||||
}
|
||||
|
||||
// Templates for instructions that use a 16- or 32-bit segmented address as
|
||||
// their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
|
||||
//
|
||||
// Iseg16 - 16-bit segment selector, 16-bit offset
|
||||
// Iseg32 - 16-bit segment selector, 32-bit offset
|
||||
|
||||
class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: X86Inst<o, f, Imm16, outs, ins, asm> {
|
||||
let Pattern = pattern;
|
||||
let CodeSize = 3;
|
||||
}
|
||||
|
||||
class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: X86Inst<o, f, Imm32, outs, ins, asm> {
|
||||
let Pattern = pattern;
|
||||
let CodeSize = 3;
|
||||
}
|
||||
|
||||
// SI - SSE 1 & 2 scalar instructions
|
||||
class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, Domain d = GenericDomain>
|
||||
: I<o, F, outs, ins, asm, pattern, d> {
|
||||
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
|
||||
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
|
||||
!if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
|
||||
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
|
||||
[UseSSE1])))));
|
||||
|
||||
// AVX instructions have a 'v' prefix in the mnemonic
|
||||
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
|
||||
asm));
|
||||
}
|
||||
|
||||
// SI - SSE 1 & 2 scalar intrinsics - vex form available on AVX512
|
||||
class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, Domain d = GenericDomain>
|
||||
: I<o, F, outs, ins, asm, pattern, d> {
|
||||
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
|
||||
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
|
||||
!if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
|
||||
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
|
||||
[UseSSE1])))));
|
||||
|
||||
// AVX instructions have a 'v' prefix in the mnemonic
|
||||
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
|
||||
asm));
|
||||
}
|
||||
// SIi8 - SSE 1 & 2 scalar instructions - vex form available on AVX512
|
||||
class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern> {
|
||||
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
|
||||
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
|
||||
[UseSSE2])));
|
||||
|
||||
// AVX instructions have a 'v' prefix in the mnemonic
|
||||
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
|
||||
asm));
|
||||
}
|
||||
|
||||
// PI - SSE 1 & 2 packed instructions
|
||||
class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
|
||||
Domain d>
|
||||
: I<o, F, outs, ins, asm, pattern, d> {
|
||||
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
|
||||
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
|
||||
[UseSSE1])));
|
||||
|
||||
// AVX instructions have a 'v' prefix in the mnemonic
|
||||
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
|
||||
asm));
|
||||
}
|
||||
|
||||
// MMXPI - SSE 1 & 2 packed instructions with MMX operands
|
||||
class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
|
||||
Domain d>
|
||||
: I<o, F, outs, ins, asm, pattern, d> {
|
||||
let Predicates = !if(!eq(OpPrefix.Value, PD.Value), [HasMMX, HasSSE2],
|
||||
[HasMMX, HasSSE1]);
|
||||
}
|
||||
|
||||
// PIi8 - SSE 1 & 2 packed instructions with immediate
|
||||
class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, Domain d>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, d> {
|
||||
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
|
||||
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
|
||||
[UseSSE1])));
|
||||
|
||||
// AVX instructions have a 'v' prefix in the mnemonic
|
||||
let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
|
||||
!if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
|
||||
asm));
|
||||
}
|
||||
|
||||
// SSE1 Instruction Templates:
|
||||
//
|
||||
// SSI - SSE1 instructions with XS prefix.
|
||||
// PSI - SSE1 instructions with PS prefix.
|
||||
// PSIi8 - SSE1 instructions with ImmT == Imm8 and PS prefix.
|
||||
// VSSI - SSE1 instructions with XS prefix in AVX form.
|
||||
// VPSI - SSE1 instructions with PS prefix in AVX form, packed single.
|
||||
|
||||
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
|
||||
class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
|
||||
class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
|
||||
Requires<[UseSSE1]>;
|
||||
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
|
||||
Requires<[UseSSE1]>;
|
||||
class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
|
||||
Requires<[HasAVX]>;
|
||||
class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, PS,
|
||||
Requires<[HasAVX]>;
|
||||
|
||||
// SSE2 Instruction Templates:
|
||||
//
|
||||
// SDI - SSE2 instructions with XD prefix.
|
||||
// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
|
||||
// S2SI - SSE2 instructions with XS prefix.
|
||||
// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
|
||||
// PDI - SSE2 instructions with PD prefix, packed double domain.
|
||||
// PDIi8 - SSE2 instructions with ImmT == Imm8 and PD prefix.
|
||||
// VSDI - SSE2 scalar instructions with XD prefix in AVX form.
|
||||
// VPDI - SSE2 vector instructions with PD prefix in AVX form,
|
||||
// packed double domain.
|
||||
// VS2I - SSE2 scalar instructions with PD prefix in AVX form.
|
||||
// S2I - SSE2 scalar instructions with PD prefix.
|
||||
// MMXSDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix as well as
|
||||
// MMX operands.
|
||||
// MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as
|
||||
// MMX operands.
|
||||
|
||||
class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
|
||||
class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
|
||||
class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
|
||||
class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
|
||||
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
|
||||
Requires<[UseSSE2]>;
|
||||
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
|
||||
Requires<[UseSSE2]>;
|
||||
class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
|
||||
Requires<[UseAVX]>;
|
||||
class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
|
||||
Requires<[HasAVX]>;
|
||||
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
|
||||
PD, Requires<[HasAVX]>;
|
||||
class VS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, !strconcat("v", asm), pattern>, PD,
|
||||
Requires<[UseAVX]>;
|
||||
class S2I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, PD, Requires<[UseSSE2]>;
|
||||
class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX, HasSSE2]>;
|
||||
class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX, HasSSE2]>;
|
||||
|
||||
// SSE3 Instruction Templates:
|
||||
//
|
||||
// S3I - SSE3 instructions with PD prefixes.
|
||||
// S3SI - SSE3 instructions with XS prefix.
|
||||
// S3DI - SSE3 instructions with XD prefix.
|
||||
|
||||
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
|
||||
Requires<[UseSSE3]>;
|
||||
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
|
||||
Requires<[UseSSE3]>;
|
||||
class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
|
||||
Requires<[UseSSE3]>;
|
||||
|
||||
|
||||
// SSSE3 Instruction Templates:
|
||||
//
|
||||
// SS38I - SSSE3 instructions with T8 prefix.
|
||||
// SS3AI - SSSE3 instructions with TA prefix.
|
||||
// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands.
|
||||
// MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands.
|
||||
//
|
||||
// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
|
||||
// uses the MMX registers. The 64-bit versions are grouped with the MMX
|
||||
// classes. They need to be enabled even if AVX is enabled.
|
||||
|
||||
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
|
||||
Requires<[UseSSSE3]>;
|
||||
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
|
||||
Requires<[UseSSSE3]>;
|
||||
class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PS,
|
||||
Requires<[HasMMX, HasSSSE3]>;
|
||||
class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPS,
|
||||
Requires<[HasMMX, HasSSSE3]>;
|
||||
|
||||
// SSE4.1 Instruction Templates:
|
||||
//
|
||||
// SS48I - SSE 4.1 instructions with T8 prefix.
|
||||
// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
|
||||
//
|
||||
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
|
||||
Requires<[UseSSE41]>;
|
||||
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
|
||||
Requires<[UseSSE41]>;
|
||||
|
||||
// SSE4.2 Instruction Templates:
|
||||
//
|
||||
// SS428I - SSE 4.2 instructions with T8 prefix.
|
||||
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
|
||||
Requires<[UseSSE42]>;
|
||||
|
||||
// SS42FI - SSE 4.2 instructions with T8XD prefix.
|
||||
// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
|
||||
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;
|
||||
|
||||
// SS42AI = SSE 4.2 instructions with TA prefix
|
||||
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
|
||||
Requires<[UseSSE42]>;
|
||||
|
||||
// AVX Instruction Templates:
|
||||
// Instructions introduced in AVX (no SSE equivalent forms)
|
||||
//
|
||||
// AVX8I - AVX instructions with T8PD prefix.
|
||||
// AVXAIi8 - AVX instructions with TAPD prefix and ImmT = Imm8.
|
||||
class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
|
||||
Requires<[HasAVX]>;
|
||||
class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
|
||||
Requires<[HasAVX]>;
|
||||
|
||||
// AVX2 Instruction Templates:
|
||||
// Instructions introduced in AVX2 (no SSE equivalent forms)
|
||||
//
|
||||
// AVX28I - AVX2 instructions with T8PD prefix.
|
||||
// AVX2AIi8 - AVX2 instructions with TAPD prefix and ImmT = Imm8.
|
||||
class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
|
||||
Requires<[HasAVX2]>;
|
||||
class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
|
||||
Requires<[HasAVX2]>;
|
||||
|
||||
|
||||
// AVX-512 Instruction Templates:
|
||||
// Instructions introduced in AVX-512 (no SSE equivalent forms)
|
||||
//
|
||||
// AVX5128I - AVX-512 instructions with T8PD prefix.
|
||||
// AVX512AIi8 - AVX-512 instructions with TAPD prefix and ImmT = Imm8.
|
||||
// AVX512PDI - AVX-512 instructions with PD, double packed.
|
||||
// AVX512PSI - AVX-512 instructions with PS, single packed.
|
||||
// AVX512XS8I - AVX-512 instructions with T8 and XS prefixes.
|
||||
// AVX512XSI - AVX-512 instructions with XS prefix, generic domain.
|
||||
// AVX512BI - AVX-512 instructions with PD, int packed domain.
|
||||
// AVX512SI - AVX-512 scalar instructions with PD prefix.
|
||||
|
||||
class AVX5128I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
|
||||
Requires<[HasAVX512]>;
|
||||
class AVX5128IBase : T8PD {
|
||||
Domain ExeDomain = SSEPackedInt;
|
||||
}
|
||||
class AVX512XS8I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8XS,
|
||||
Requires<[HasAVX512]>;
|
||||
class AVX512XSI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, XS,
|
||||
Requires<[HasAVX512]>;
|
||||
class AVX512XDI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, XD,
|
||||
Requires<[HasAVX512]>;
|
||||
class AVX512BI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
|
||||
Requires<[HasAVX512]>;
|
||||
class AVX512BIBase : PD {
|
||||
Domain ExeDomain = SSEPackedInt;
|
||||
}
|
||||
class AVX512BIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
|
||||
Requires<[HasAVX512]>;
|
||||
class AVX512BIi8Base : PD {
|
||||
Domain ExeDomain = SSEPackedInt;
|
||||
ImmType ImmT = Imm8;
|
||||
}
|
||||
class AVX512XSIi8Base : XS {
|
||||
Domain ExeDomain = SSEPackedInt;
|
||||
ImmType ImmT = Imm8;
|
||||
}
|
||||
class AVX512XDIi8Base : XD {
|
||||
Domain ExeDomain = SSEPackedInt;
|
||||
ImmType ImmT = Imm8;
|
||||
}
|
||||
class AVX512PSIi8Base : PS {
|
||||
Domain ExeDomain = SSEPackedSingle;
|
||||
ImmType ImmT = Imm8;
|
||||
}
|
||||
class AVX512PDIi8Base : PD {
|
||||
Domain ExeDomain = SSEPackedDouble;
|
||||
ImmType ImmT = Imm8;
|
||||
}
|
||||
class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
|
||||
Requires<[HasAVX512]>;
|
||||
class AVX512AIi8Base : TAPD {
|
||||
ImmType ImmT = Imm8;
|
||||
}
|
||||
class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>,
|
||||
Requires<[HasAVX512]>;
|
||||
class AVX512PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
|
||||
Requires<[HasAVX512]>;
|
||||
class AVX512PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
|
||||
Requires<[HasAVX512]>;
|
||||
class AVX512PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, Domain d>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
|
||||
class AVX512PI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern, Domain d>
|
||||
: I<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
|
||||
class AVX512FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, T8PD,
|
||||
EVEX_4V, Requires<[HasAVX512]>;
|
||||
class AVX512FMA3Base : T8PD, EVEX_4V;
|
||||
|
||||
class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, Requires<[HasAVX512]>;
|
||||
|
||||
// AES Instruction Templates:
|
||||
//
|
||||
// AES8I
|
||||
// These use the same encoding as the SSE4.2 T8 and TA encodings.
|
||||
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
|
||||
Requires<[NoAVX, HasAES]>;
|
||||
|
||||
class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
|
||||
Requires<[NoAVX, HasAES]>;
|
||||
|
||||
// PCLMUL Instruction Templates
|
||||
class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD;
|
||||
|
||||
// FMA3 Instruction Templates
|
||||
class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, T8PD,
|
||||
VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoVLX]>;
|
||||
class FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, T8PD,
|
||||
VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoAVX512]>;
|
||||
class FMA3S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, T8PD,
|
||||
VEX_4V, FMASC, Requires<[HasFMA, NoAVX512]>;
|
||||
|
||||
// FMA4 Instruction Templates
|
||||
class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
: Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
|
||||
VEX_4V, FMASC, Requires<[HasFMA4, NoVLX]>;
|
||||
class FMA4S<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
: Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
|
||||
VEX_4V, FMASC, Requires<[HasFMA4, NoAVX512]>;
|
||||
class FMA4S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
: Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
|
||||
VEX_4V, FMASC, Requires<[HasFMA4]>;
|
||||
|
||||
// XOP 2, 3 and 4 Operand Instruction Template
|
||||
class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
|
||||
XOP9, Requires<[HasXOP]>;
|
||||
|
||||
// XOP 2 and 3 Operand Instruction Templates with imm byte
|
||||
class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
|
||||
XOP8, Requires<[HasXOP]>;
|
||||
// XOP 4 Operand Instruction Templates with imm byte
|
||||
class IXOPi8Reg<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
|
||||
XOP8, Requires<[HasXOP]>;
|
||||
|
||||
// XOP 5 operand instruction (VEX encoding!)
|
||||
class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
: Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
|
||||
VEX_4V, Requires<[HasXOP]>;
|
||||
|
||||
// X86-64 Instruction templates...
|
||||
//
|
||||
|
||||
class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, REX_W;
|
||||
class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, REX_W;
|
||||
class RIi16 <bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii16<o, F, outs, ins, asm, pattern>, REX_W;
|
||||
class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii32<o, F, outs, ins, asm, pattern>, REX_W;
|
||||
class RIi32S <bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii32S<o, F, outs, ins, asm, pattern>, REX_W;
|
||||
class RIi64<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii64<o, F, outs, ins, asm, pattern>, REX_W;
|
||||
|
||||
class RS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: S2I<o, F, outs, ins, asm, pattern>, REX_W;
|
||||
class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: VS2I<o, F, outs, ins, asm, pattern>, VEX_W;
|
||||
|
||||
// MMX Instruction templates
|
||||
//
|
||||
|
||||
// MMXI - MMX instructions with TB prefix.
|
||||
// MMXI32 - MMX instructions with TB prefix valid only in 32 bit mode.
|
||||
// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
|
||||
// MMX2I - MMX / SSE2 instructions with PD prefix.
|
||||
// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
|
||||
// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
|
||||
// MMXID - MMX instructions with XD prefix.
|
||||
// MMXIS - MMX instructions with XS prefix.
|
||||
class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
|
||||
class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,Not64BitMode]>;
|
||||
class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,In64BitMode]>;
|
||||
class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, PS, REX_W, Requires<[HasMMX]>;
|
||||
class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern>, PD, Requires<[HasMMX]>;
|
||||
class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
|
||||
class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
|
||||
class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
|
||||
1075
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFragmentsSIMD.td
vendored
Normal file
1075
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFragmentsSIMD.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3580
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrInfo.td
vendored
Normal file
3580
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrInfo.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3572
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrInfo_reduce.td
vendored
Normal file
3572
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrInfo_reduce.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
612
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrMMX.td
vendored
Normal file
612
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrMMX.td
vendored
Normal file
@@ -0,0 +1,612 @@
|
||||
//===-- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the X86 MMX instruction set, defining the instructions,
|
||||
// and properties of the instructions which are needed for code generation,
|
||||
// machine code emission, and analysis.
|
||||
//
|
||||
// All instructions that use MMX should be in this file, even if they also use
|
||||
// SSE.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MMX Multiclasses
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Alias instruction that maps zero vector to pxor mmx.
|
||||
// This is expanded by ExpandPostRAPseudos to an pxor.
|
||||
// We set canFoldAsLoad because this can be converted to a constant-pool
|
||||
// load of an all-zeros value if folding it would be beneficial.
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
|
||||
isPseudo = 1, SchedRW = [WriteZero] in {
|
||||
def MMX_SET0 : I<0, Pseudo, (outs VR64:$dst), (ins), "", []>;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
// MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
|
||||
// When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
|
||||
multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
|
||||
X86FoldableSchedWrite sched, bit Commutable = 0,
|
||||
X86MemOperand OType = i64mem> {
|
||||
def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
|
||||
(ins VR64:$src1, VR64:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>,
|
||||
Sched<[sched]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
|
||||
(ins VR64:$src1, OType:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1,
|
||||
(bitconvert (load_mmx addr:$src2))))]>,
|
||||
Sched<[sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
|
||||
string OpcodeStr, Intrinsic IntId,
|
||||
Intrinsic IntId2, X86FoldableSchedWrite sched,
|
||||
X86FoldableSchedWrite schedImm> {
|
||||
def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
|
||||
(ins VR64:$src1, VR64:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>,
|
||||
Sched<[sched]>;
|
||||
def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
|
||||
(ins VR64:$src1, i64mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1,
|
||||
(bitconvert (load_mmx addr:$src2))))]>,
|
||||
Sched<[sched.Folded, ReadAfterLd]>;
|
||||
def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
|
||||
(ins VR64:$src1, i32u8imm:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId2 VR64:$src1, imm:$src2))]>,
|
||||
Sched<[schedImm]>;
|
||||
}
|
||||
}
|
||||
|
||||
/// Unary MMX instructions requiring SSSE3.
|
||||
multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId64, X86FoldableSchedWrite sched> {
|
||||
def rr : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR64:$dst, (IntId64 VR64:$src))]>,
|
||||
Sched<[sched]>;
|
||||
|
||||
def rm : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR64:$dst,
|
||||
(IntId64 (bitconvert (load_mmx addr:$src))))]>,
|
||||
Sched<[sched.Folded]>;
|
||||
}
|
||||
|
||||
/// Binary MMX instructions requiring SSSE3.
|
||||
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
|
||||
multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId64, X86FoldableSchedWrite sched,
|
||||
bit Commutable = 0> {
|
||||
let isCommutable = Commutable in
|
||||
def rr : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
|
||||
(ins VR64:$src1, VR64:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>,
|
||||
Sched<[sched]>;
|
||||
def rm : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst),
|
||||
(ins VR64:$src1, i64mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst,
|
||||
(IntId64 VR64:$src1,
|
||||
(bitconvert (load_mmx addr:$src2))))]>,
|
||||
Sched<[sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
}
|
||||
|
||||
/// PALIGN MMX instructions (require SSSE3).
|
||||
multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
|
||||
X86FoldableSchedWrite sched> {
|
||||
def rri : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
|
||||
(ins VR64:$src1, VR64:$src2, u8imm:$src3),
|
||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
|
||||
Sched<[sched]>;
|
||||
def rmi : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
|
||||
(ins VR64:$src1, i64mem:$src2, u8imm:$src3),
|
||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1,
|
||||
(bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
|
||||
Sched<[sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
|
||||
string asm, X86FoldableSchedWrite sched, Domain d> {
|
||||
def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
|
||||
[(set DstRC:$dst, (Int SrcRC:$src))], d>,
|
||||
Sched<[sched]>;
|
||||
def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
|
||||
[(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>,
|
||||
Sched<[sched.Folded]>;
|
||||
}
|
||||
|
||||
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
|
||||
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
|
||||
PatFrag ld_frag, string asm, Domain d> {
|
||||
def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst),
|
||||
(ins DstRC:$src1, SrcRC:$src2), asm,
|
||||
[(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>,
|
||||
Sched<[WriteCvtI2PS]>;
|
||||
def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst),
|
||||
(ins DstRC:$src1, x86memop:$src2), asm,
|
||||
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>,
|
||||
Sched<[WriteCvtI2PS.Folded]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MMX EMMS Instruction
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let SchedRW = [WriteEMMS] in
|
||||
def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MMX Scalar Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Data Transfer Instructions
|
||||
def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR64:$dst,
|
||||
(x86mmx (scalar_to_vector GR32:$src)))]>,
|
||||
Sched<[WriteVecMoveFromGpr]>;
|
||||
def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR64:$dst,
|
||||
(x86mmx (scalar_to_vector (loadi32 addr:$src))))]>,
|
||||
Sched<[WriteVecLoad]>;
|
||||
|
||||
let Predicates = [HasMMX] in {
|
||||
def : Pat<(x86mmx (MMX_X86movw2d GR32:$src)),
|
||||
(MMX_MOVD64rr GR32:$src)>;
|
||||
def : Pat<(x86mmx (MMX_X86movw2d (i32 0))),
|
||||
(MMX_SET0)>;
|
||||
def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))),
|
||||
(MMX_MOVD64rm addr:$src)>;
|
||||
}
|
||||
|
||||
let mayStore = 1 in
|
||||
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}", []>,
|
||||
Sched<[WriteVecStore]>;
|
||||
|
||||
def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
|
||||
"movd\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst,
|
||||
(MMX_X86movd2w (x86mmx VR64:$src)))]>,
|
||||
Sched<[WriteVecMoveToGpr]>, FoldGenData<"MMX_MOVD64rr">;
|
||||
|
||||
let isBitcast = 1 in
|
||||
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR64:$dst, (bitconvert GR64:$src))]>,
|
||||
Sched<[WriteVecMoveFromGpr]>;
|
||||
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
|
||||
def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst),
|
||||
(ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}",
|
||||
[]>, Sched<[SchedWriteVecMoveLS.MMX.RM]>;
|
||||
|
||||
let isBitcast = 1 in {
|
||||
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
|
||||
(outs GR64:$dst), (ins VR64:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
[(set GR64:$dst, (bitconvert VR64:$src))]>,
|
||||
Sched<[WriteVecMoveToGpr]>;
|
||||
let SchedRW = [WriteVecMove], hasSideEffects = 0, isMoveReg = 1 in {
|
||||
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}", []>;
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1 in
|
||||
def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}", []>,
|
||||
FoldGenData<"MMX_MOVQ64rr">;
|
||||
} // SchedRW, hasSideEffects, isMoveReg
|
||||
} // isBitcast
|
||||
|
||||
def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
|
||||
(MMX_MOVQ64rr_REV VR64:$dst, VR64:$src), 0>;
|
||||
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
|
||||
def MMX_MOVD64from64rm : MMXRI<0x7E, MRMDestMem,
|
||||
(outs), (ins i64mem:$dst, VR64:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}", []>,
|
||||
Sched<[SchedWriteVecMoveLS.MMX.MR]>;
|
||||
|
||||
let SchedRW = [SchedWriteVecMoveLS.MMX.RM] in {
|
||||
let canFoldAsLoad = 1 in
|
||||
def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR64:$dst, (load_mmx addr:$src))]>;
|
||||
} // SchedRW
|
||||
|
||||
let SchedRW = [SchedWriteVecMoveLS.MMX.MR] in
|
||||
def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
|
||||
"movq\t{$src, $dst|$dst, $src}",
|
||||
[(store (x86mmx VR64:$src), addr:$dst)]>;
|
||||
|
||||
let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
|
||||
def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
|
||||
(ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
|
||||
[(set VR64:$dst,
|
||||
(x86mmx (bitconvert
|
||||
(i64 (extractelt (v2i64 VR128:$src),
|
||||
(iPTR 0))))))]>;
|
||||
|
||||
def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2i64
|
||||
(scalar_to_vector
|
||||
(i64 (bitconvert (x86mmx VR64:$src))))))]>;
|
||||
|
||||
let isCodeGenOnly = 1, hasSideEffects = 1 in {
|
||||
def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
|
||||
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
|
||||
[]>;
|
||||
|
||||
def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
|
||||
(ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
|
||||
[]>;
|
||||
}
|
||||
} // SchedRW
|
||||
|
||||
let Predicates = [HasMMX, HasSSE1] in
|
||||
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
|
||||
"movntq\t{$src, $dst|$dst, $src}",
|
||||
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>,
|
||||
Sched<[SchedWriteVecMoveLSNT.MMX.MR]>;
|
||||
|
||||
let Predicates = [HasMMX] in {
|
||||
// movd to MMX register zero-extends
|
||||
def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))),
|
||||
(MMX_MOVD64rr GR32:$src)>;
|
||||
def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
|
||||
(MMX_MOVD64rm addr:$src)>;
|
||||
}
|
||||
|
||||
// Arithmetic Instructions
|
||||
defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
|
||||
SchedWriteVecALU.MMX>;
|
||||
defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w,
|
||||
SchedWriteVecALU.MMX>;
|
||||
defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d,
|
||||
SchedWriteVecALU.MMX>;
|
||||
// -- Addition
|
||||
defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
let Predicates = [HasMMX, HasSSE2] in
|
||||
defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
defm MMX_PADDSW : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
|
||||
defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
|
||||
defm MMX_PHADDW : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w,
|
||||
SchedWritePHAdd.MMX>;
|
||||
defm MMX_PHADDD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d,
|
||||
SchedWritePHAdd.MMX>;
|
||||
defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw,
|
||||
SchedWritePHAdd.MMX>;
|
||||
|
||||
// -- Subtraction
|
||||
defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b,
|
||||
SchedWriteVecALU.MMX>;
|
||||
defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w,
|
||||
SchedWriteVecALU.MMX>;
|
||||
defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d,
|
||||
SchedWriteVecALU.MMX>;
|
||||
let Predicates = [HasMMX, HasSSE2] in
|
||||
defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q,
|
||||
SchedWriteVecALU.MMX>;
|
||||
|
||||
defm MMX_PSUBSB : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b,
|
||||
SchedWriteVecALU.MMX>;
|
||||
defm MMX_PSUBSW : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w,
|
||||
SchedWriteVecALU.MMX>;
|
||||
|
||||
defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b,
|
||||
SchedWriteVecALU.MMX>;
|
||||
defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w,
|
||||
SchedWriteVecALU.MMX>;
|
||||
|
||||
defm MMX_PHSUBW : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w,
|
||||
SchedWritePHAdd.MMX>;
|
||||
defm MMX_PHSUBD : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d,
|
||||
SchedWritePHAdd.MMX>;
|
||||
defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw,
|
||||
SchedWritePHAdd.MMX>;
|
||||
|
||||
// -- Multiplication
|
||||
defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w,
|
||||
SchedWriteVecIMul.MMX, 1>;
|
||||
|
||||
defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w,
|
||||
SchedWriteVecIMul.MMX, 1>;
|
||||
let Predicates = [HasMMX, HasSSE1] in
|
||||
defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w,
|
||||
SchedWriteVecIMul.MMX, 1>;
|
||||
let Predicates = [HasMMX, HasSSE2] in
|
||||
defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq,
|
||||
SchedWriteVecIMul.MMX, 1>;
|
||||
defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
|
||||
int_x86_ssse3_pmul_hr_sw,
|
||||
SchedWriteVecIMul.MMX, 1>;
|
||||
|
||||
// -- Miscellanea
|
||||
defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd,
|
||||
SchedWriteVecIMul.MMX, 1>;
|
||||
|
||||
defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
|
||||
int_x86_ssse3_pmadd_ub_sw,
|
||||
SchedWriteVecIMul.MMX>;
|
||||
let Predicates = [HasMMX, HasSSE1] in {
|
||||
defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
|
||||
defm MMX_PMINUB : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
defm MMX_PMINSW : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
|
||||
defm MMX_PMAXUB : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w,
|
||||
SchedWriteVecALU.MMX, 1>;
|
||||
|
||||
defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw,
|
||||
SchedWritePSADBW.MMX, 1>;
|
||||
}
|
||||
|
||||
defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b,
|
||||
SchedWriteVecALU.MMX>;
|
||||
defm MMX_PSIGNW : SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w,
|
||||
SchedWriteVecALU.MMX>;
|
||||
defm MMX_PSIGND : SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d,
|
||||
SchedWriteVecALU.MMX>;
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm MMX_PALIGNR : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b,
|
||||
SchedWriteShuffle.MMX>;
|
||||
|
||||
// Logical Instructions
|
||||
defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand,
|
||||
SchedWriteVecLogic.MMX, 1>;
|
||||
defm MMX_POR : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por,
|
||||
SchedWriteVecLogic.MMX, 1>;
|
||||
defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor,
|
||||
SchedWriteVecLogic.MMX, 1>;
|
||||
defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn,
|
||||
SchedWriteVecLogic.MMX>;
|
||||
|
||||
// Shift Instructions
|
||||
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
|
||||
int_x86_mmx_psrl_w, int_x86_mmx_psrli_w,
|
||||
SchedWriteVecShift.MMX,
|
||||
SchedWriteVecShiftImm.MMX>;
|
||||
defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
|
||||
int_x86_mmx_psrl_d, int_x86_mmx_psrli_d,
|
||||
SchedWriteVecShift.MMX,
|
||||
SchedWriteVecShiftImm.MMX>;
|
||||
defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
|
||||
int_x86_mmx_psrl_q, int_x86_mmx_psrli_q,
|
||||
SchedWriteVecShift.MMX,
|
||||
SchedWriteVecShiftImm.MMX>;
|
||||
|
||||
defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
|
||||
int_x86_mmx_psll_w, int_x86_mmx_pslli_w,
|
||||
SchedWriteVecShift.MMX,
|
||||
SchedWriteVecShiftImm.MMX>;
|
||||
defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
|
||||
int_x86_mmx_psll_d, int_x86_mmx_pslli_d,
|
||||
SchedWriteVecShift.MMX,
|
||||
SchedWriteVecShiftImm.MMX>;
|
||||
defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
|
||||
int_x86_mmx_psll_q, int_x86_mmx_pslli_q,
|
||||
SchedWriteVecShift.MMX,
|
||||
SchedWriteVecShiftImm.MMX>;
|
||||
|
||||
defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
|
||||
int_x86_mmx_psra_w, int_x86_mmx_psrai_w,
|
||||
SchedWriteVecShift.MMX,
|
||||
SchedWriteVecShiftImm.MMX>;
|
||||
defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
|
||||
int_x86_mmx_psra_d, int_x86_mmx_psrai_d,
|
||||
SchedWriteVecShift.MMX,
|
||||
SchedWriteVecShiftImm.MMX>;
|
||||
|
||||
// Comparison Instructions
|
||||
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b,
|
||||
SchedWriteVecALU.MMX>;
|
||||
defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w,
|
||||
SchedWriteVecALU.MMX>;
|
||||
defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d,
|
||||
SchedWriteVecALU.MMX>;
|
||||
|
||||
defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b,
|
||||
SchedWriteVecALU.MMX>;
|
||||
defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w,
|
||||
SchedWriteVecALU.MMX>;
|
||||
defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d,
|
||||
SchedWriteVecALU.MMX>;
|
||||
|
||||
// -- Unpack Instructions
|
||||
defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
|
||||
int_x86_mmx_punpckhbw,
|
||||
SchedWriteShuffle.MMX>;
|
||||
defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
|
||||
int_x86_mmx_punpckhwd,
|
||||
SchedWriteShuffle.MMX>;
|
||||
defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
|
||||
int_x86_mmx_punpckhdq,
|
||||
SchedWriteShuffle.MMX>;
|
||||
defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
|
||||
int_x86_mmx_punpcklbw,
|
||||
SchedWriteShuffle.MMX,
|
||||
0, i32mem>;
|
||||
defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
|
||||
int_x86_mmx_punpcklwd,
|
||||
SchedWriteShuffle.MMX,
|
||||
0, i32mem>;
|
||||
defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
|
||||
int_x86_mmx_punpckldq,
|
||||
SchedWriteShuffle.MMX,
|
||||
0, i32mem>;
|
||||
|
||||
// -- Pack Instructions
|
||||
defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb,
|
||||
SchedWriteShuffle.MMX>;
|
||||
defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw,
|
||||
SchedWriteShuffle.MMX>;
|
||||
defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb,
|
||||
SchedWriteShuffle.MMX>;
|
||||
|
||||
// -- Shuffle Instructions
|
||||
defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b,
|
||||
SchedWriteVarShuffle.MMX>;
|
||||
|
||||
def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
|
||||
(outs VR64:$dst), (ins VR64:$src1, u8imm:$src2),
|
||||
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>,
|
||||
Sched<[SchedWriteShuffle.MMX]>;
|
||||
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
|
||||
(outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2),
|
||||
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(int_x86_sse_pshuf_w (load_mmx addr:$src1),
|
||||
imm:$src2))]>,
|
||||
Sched<[SchedWriteShuffle.MMX.Folded]>;
|
||||
|
||||
// -- Conversion Instructions
|
||||
defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
|
||||
f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
|
||||
WriteCvtPS2I, SSEPackedSingle>, PS;
|
||||
defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
|
||||
f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
|
||||
WriteCvtPD2I, SSEPackedDouble>, PD;
|
||||
defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
|
||||
f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
|
||||
WriteCvtPS2I, SSEPackedSingle>, PS;
|
||||
defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
|
||||
f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
|
||||
WriteCvtPD2I, SSEPackedDouble>, PD;
|
||||
defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
|
||||
i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
|
||||
WriteCvtI2PD, SSEPackedDouble>, PD;
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
|
||||
int_x86_sse_cvtpi2ps,
|
||||
i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
|
||||
SSEPackedSingle>, PS;
|
||||
}
|
||||
|
||||
// Extract / Insert
|
||||
let Predicates = [HasMMX, HasSSE1] in
|
||||
def MMX_PEXTRWrr: MMXIi8<0xC5, MRMSrcReg,
|
||||
(outs GR32orGR64:$dst), (ins VR64:$src1, i32u8imm:$src2),
|
||||
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1,
|
||||
imm:$src2))]>,
|
||||
Sched<[WriteVecExtract]>;
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
let Predicates = [HasMMX, HasSSE1] in {
|
||||
def MMX_PINSRWrr : MMXIi8<0xC4, MRMSrcReg,
|
||||
(outs VR64:$dst),
|
||||
(ins VR64:$src1, GR32orGR64:$src2, i32u8imm:$src3),
|
||||
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
|
||||
GR32orGR64:$src2, imm:$src3))]>,
|
||||
Sched<[WriteVecInsert]>;
|
||||
|
||||
def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem,
|
||||
(outs VR64:$dst),
|
||||
(ins VR64:$src1, i16mem:$src2, i32u8imm:$src3),
|
||||
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
|
||||
(i32 (anyext (loadi16 addr:$src2))),
|
||||
imm:$src3))]>,
|
||||
Sched<[WriteVecInsertLd, ReadAfterLd]>;
|
||||
}
|
||||
}
|
||||
|
||||
// Mask creation
|
||||
let Predicates = [HasMMX, HasSSE1] in
|
||||
def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
|
||||
(ins VR64:$src),
|
||||
"pmovmskb\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32orGR64:$dst,
|
||||
(int_x86_mmx_pmovmskb VR64:$src))]>,
|
||||
Sched<[WriteMMXMOVMSK]>;
|
||||
|
||||
// Low word of XMM to MMX.
|
||||
def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
|
||||
[SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
|
||||
|
||||
def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
|
||||
(x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
|
||||
|
||||
def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
|
||||
(x86mmx (MMX_MOVQ64rm addr:$src))>;
|
||||
|
||||
// Misc.
|
||||
let SchedRW = [SchedWriteShuffle.MMX] in {
|
||||
let Uses = [EDI], Predicates = [HasMMX, HasSSE1,Not64BitMode] in
|
||||
def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
|
||||
"maskmovq\t{$mask, $src|$src, $mask}",
|
||||
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
|
||||
let Uses = [RDI], Predicates = [HasMMX, HasSSE1,In64BitMode] in
|
||||
def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
|
||||
"maskmovq\t{$mask, $src|$src, $mask}",
|
||||
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
|
||||
}
|
||||
|
||||
// 64-bit bit convert.
|
||||
let Predicates = [HasMMX, HasSSE2] in {
|
||||
def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
|
||||
(MMX_MOVQ2FR64rr VR64:$src)>;
|
||||
def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
|
||||
(MMX_MOVFR642Qrr FR64:$src)>;
|
||||
def : Pat<(x86mmx (MMX_X86movdq2q
|
||||
(bc_v2i64 (v4i32 (X86cvtp2Int (v4f32 VR128:$src)))))),
|
||||
(MMX_CVTPS2PIirr VR128:$src)>;
|
||||
def : Pat<(x86mmx (MMX_X86movdq2q
|
||||
(bc_v2i64 (v4i32 (X86cvttp2si (v4f32 VR128:$src)))))),
|
||||
(MMX_CVTTPS2PIirr VR128:$src)>;
|
||||
def : Pat<(x86mmx (MMX_X86movdq2q
|
||||
(bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))),
|
||||
(MMX_CVTTPS2PIirr VR128:$src)>;
|
||||
def : Pat<(x86mmx (MMX_X86movdq2q
|
||||
(bc_v2i64 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
|
||||
(MMX_CVTPD2PIirr VR128:$src)>;
|
||||
def : Pat<(x86mmx (MMX_X86movdq2q
|
||||
(bc_v2i64 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
|
||||
(MMX_CVTTPD2PIirr VR128:$src)>;
|
||||
}
|
||||
80
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrMPX.td
vendored
Normal file
80
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrMPX.td
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
//===-- X86InstrMPX.td - MPX Instruction Set ---------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the X86 MPX instruction set, defining the
|
||||
// instructions, and properties of the instructions which are needed for code
|
||||
// generation, machine code emission, and analysis.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// FIXME: Investigate a better scheduler class once MPX is used inside LLVM.
|
||||
let SchedRW = [WriteSystem] in {
|
||||
|
||||
multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
|
||||
def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
|
||||
OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
|
||||
Requires<[HasMPX, Not64BitMode]>;
|
||||
def 64rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
|
||||
OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
|
||||
Requires<[HasMPX, In64BitMode]>;
|
||||
}
|
||||
|
||||
defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
|
||||
|
||||
multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
|
||||
def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, anymem:$src2),
|
||||
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
|
||||
Requires<[HasMPX, Not64BitMode]>;
|
||||
def 64rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, anymem:$src2),
|
||||
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
|
||||
Requires<[HasMPX, In64BitMode]>;
|
||||
|
||||
def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2),
|
||||
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
|
||||
Requires<[HasMPX, Not64BitMode]>;
|
||||
def 64rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR64:$src2),
|
||||
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
|
||||
Requires<[HasMPX, In64BitMode]>;
|
||||
}
|
||||
defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS, NotMemoryFoldable;
|
||||
defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD, NotMemoryFoldable;
|
||||
defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD, NotMemoryFoldable;
|
||||
|
||||
def BNDMOVrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
|
||||
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
|
||||
Requires<[HasMPX]>, NotMemoryFoldable;
|
||||
let mayLoad = 1 in {
|
||||
def BNDMOV32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
|
||||
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
|
||||
Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable;
|
||||
def BNDMOV64rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
|
||||
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
|
||||
Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable;
|
||||
}
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1 in
|
||||
def BNDMOVrr_REV : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
|
||||
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
|
||||
Requires<[HasMPX]>, NotMemoryFoldable;
|
||||
let mayStore = 1 in {
|
||||
def BNDMOV32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
|
||||
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
|
||||
Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable;
|
||||
def BNDMOV64mr : I<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src),
|
||||
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
|
||||
Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable;
|
||||
|
||||
def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins anymem:$dst, BNDR:$src),
|
||||
"bndstx\t{$src, $dst|$dst, $src}", []>, PS,
|
||||
Requires<[HasMPX]>;
|
||||
}
|
||||
let mayLoad = 1 in
|
||||
def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
|
||||
"bndldx\t{$src, $dst|$dst, $src}", []>, PS,
|
||||
Requires<[HasMPX]>;
|
||||
} // SchedRW
|
||||
30
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSGX.td
vendored
Normal file
30
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSGX.td
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
//===-- X86InstrSGX.td - SGX Instruction Set Extension -----*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the instructions that make up the Intel SGX instruction
|
||||
// set.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SGX instructions
|
||||
|
||||
let SchedRW = [WriteSystem], Predicates = [HasSGX] in {
|
||||
// ENCLS - Execute an Enclave System Function of Specified Leaf Number
|
||||
def ENCLS : I<0x01, MRM_CF, (outs), (ins),
|
||||
"encls", []>, TB;
|
||||
|
||||
// ENCLU - Execute an Enclave User Function of Specified Leaf Number
|
||||
def ENCLU : I<0x01, MRM_D7, (outs), (ins),
|
||||
"enclu", []>, TB;
|
||||
|
||||
// ENCLV - Execute an Enclave VMM Function of Specified Leaf Number
|
||||
def ENCLV : I<0x01, MRM_C0, (outs), (ins),
|
||||
"enclv", []>, TB;
|
||||
} // SchedRW
|
||||
8256
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSSE.td
vendored
Normal file
8256
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSSE.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
63
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSVM.td
vendored
Normal file
63
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSVM.td
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
//===-- X86InstrSVM.td - SVM Instruction Set Extension -----*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the instructions that make up the AMD SVM instruction
|
||||
// set.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVM instructions
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
// 0F 01 D9
|
||||
def VMMCALL : I<0x01, MRM_D9, (outs), (ins), "vmmcall", []>, TB;
|
||||
|
||||
// 0F 01 DC
|
||||
def STGI : I<0x01, MRM_DC, (outs), (ins), "stgi", []>, TB;
|
||||
|
||||
// 0F 01 DD
|
||||
def CLGI : I<0x01, MRM_DD, (outs), (ins), "clgi", []>, TB;
|
||||
|
||||
// 0F 01 DE
|
||||
let Uses = [EAX] in
|
||||
def SKINIT : I<0x01, MRM_DE, (outs), (ins), "skinit\t{%eax|eax}", []>, TB;
|
||||
|
||||
// 0F 01 D8
|
||||
let Uses = [EAX] in
|
||||
def VMRUN32 : I<0x01, MRM_D8, (outs), (ins), "vmrun\t{%eax|eax}", []>, TB,
|
||||
Requires<[Not64BitMode]>;
|
||||
let Uses = [RAX] in
|
||||
def VMRUN64 : I<0x01, MRM_D8, (outs), (ins), "vmrun\t{%rax|rax}", []>, TB,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
// 0F 01 DA
|
||||
let Uses = [EAX] in
|
||||
def VMLOAD32 : I<0x01, MRM_DA, (outs), (ins), "vmload\t{%eax|eax}", []>, TB,
|
||||
Requires<[Not64BitMode]>;
|
||||
let Uses = [RAX] in
|
||||
def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins), "vmload\t{%rax|rax}", []>, TB,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
// 0F 01 DB
|
||||
let Uses = [EAX] in
|
||||
def VMSAVE32 : I<0x01, MRM_DB, (outs), (ins), "vmsave\t{%eax|eax}", []>, TB,
|
||||
Requires<[Not64BitMode]>;
|
||||
let Uses = [RAX] in
|
||||
def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins), "vmsave\t{%rax|rax}", []>, TB,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
// 0F 01 DF
|
||||
let Uses = [EAX, ECX] in
|
||||
def INVLPGA32 : I<0x01, MRM_DF, (outs), (ins),
|
||||
"invlpga\t{%eax, %ecx|eax, ecx}", []>, TB, Requires<[Not64BitMode]>;
|
||||
let Uses = [RAX, ECX] in
|
||||
def INVLPGA64 : I<0x01, MRM_DF, (outs), (ins),
|
||||
"invlpga\t{%rax, %ecx|rax, ecx}", []>, TB, Requires<[In64BitMode]>;
|
||||
} // SchedRW
|
||||
1031
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrShiftRotate.td
vendored
Normal file
1031
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrShiftRotate.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
743
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSystem.td
vendored
Normal file
743
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSystem.td
vendored
Normal file
@@ -0,0 +1,743 @@
|
||||
//===-- X86InstrSystem.td - System Instructions ------------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the X86 instructions that are generally used in
|
||||
// privileged modes. These are not typically used by the compiler, but are
|
||||
// supported for the assembler and disassembler.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
let Defs = [RAX, RDX] in
|
||||
def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
|
||||
|
||||
let Defs = [RAX, RCX, RDX] in
|
||||
def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB;
|
||||
|
||||
// CPU flow control instructions
|
||||
|
||||
let mayLoad = 1, mayStore = 0, hasSideEffects = 1, isTrap = 1 in {
|
||||
def UD2 : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
|
||||
def UD1 : I<0xB9, RawFrm, (outs), (ins), "ud1", []>, TB;
|
||||
def UD0 : I<0xFF, RawFrm, (outs), (ins), "ud0", []>, TB;
|
||||
}
|
||||
|
||||
def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
|
||||
def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
|
||||
|
||||
// Interrupt and SysCall Instructions.
|
||||
let Uses = [EFLAGS] in
|
||||
def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>, Requires<[Not64BitMode]>;
|
||||
|
||||
def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>;
|
||||
} // SchedRW
|
||||
|
||||
// The long form of "int $3" turns into int3 as a size optimization.
|
||||
// FIXME: This doesn't work because InstAlias can't match immediate constants.
|
||||
//def : InstAlias<"int\t$3", (INT3)>;
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
|
||||
def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap",
|
||||
[(int_x86_int imm:$trap)]>;
|
||||
|
||||
|
||||
def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
|
||||
def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB;
|
||||
def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
|
||||
|
||||
def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB;
|
||||
def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexitq", []>, TB,
|
||||
Requires<[In64BitMode]>;
|
||||
} // SchedRW
|
||||
|
||||
def : Pat<(debugtrap),
|
||||
(INT3)>, Requires<[NotPS4]>;
|
||||
def : Pat<(debugtrap),
|
||||
(INT (i8 0x41))>, Requires<[IsPS4]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Input/Output Instructions.
|
||||
//
|
||||
let SchedRW = [WriteSystem] in {
|
||||
let Defs = [AL], Uses = [DX] in
|
||||
def IN8rr : I<0xEC, RawFrm, (outs), (ins), "in{b}\t{%dx, %al|al, dx}", []>;
|
||||
let Defs = [AX], Uses = [DX] in
|
||||
def IN16rr : I<0xED, RawFrm, (outs), (ins), "in{w}\t{%dx, %ax|ax, dx}", []>,
|
||||
OpSize16;
|
||||
let Defs = [EAX], Uses = [DX] in
|
||||
def IN32rr : I<0xED, RawFrm, (outs), (ins), "in{l}\t{%dx, %eax|eax, dx}", []>,
|
||||
OpSize32;
|
||||
|
||||
let Defs = [AL] in
|
||||
def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins u8imm:$port),
|
||||
"in{b}\t{$port, %al|al, $port}", []>;
|
||||
let Defs = [AX] in
|
||||
def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
|
||||
"in{w}\t{$port, %ax|ax, $port}", []>, OpSize16;
|
||||
let Defs = [EAX] in
|
||||
def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
|
||||
"in{l}\t{$port, %eax|eax, $port}", []>, OpSize32;
|
||||
|
||||
let Uses = [DX, AL] in
|
||||
def OUT8rr : I<0xEE, RawFrm, (outs), (ins), "out{b}\t{%al, %dx|dx, al}", []>;
|
||||
let Uses = [DX, AX] in
|
||||
def OUT16rr : I<0xEF, RawFrm, (outs), (ins), "out{w}\t{%ax, %dx|dx, ax}", []>,
|
||||
OpSize16;
|
||||
let Uses = [DX, EAX] in
|
||||
def OUT32rr : I<0xEF, RawFrm, (outs), (ins), "out{l}\t{%eax, %dx|dx, eax}", []>,
|
||||
OpSize32;
|
||||
|
||||
let Uses = [AL] in
|
||||
def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins u8imm:$port),
|
||||
"out{b}\t{%al, $port|$port, al}", []>;
|
||||
let Uses = [AX] in
|
||||
def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
|
||||
"out{w}\t{%ax, $port|$port, ax}", []>, OpSize16;
|
||||
let Uses = [EAX] in
|
||||
def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
|
||||
"out{l}\t{%eax, $port|$port, eax}", []>, OpSize32;
|
||||
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Moves to and from debug registers
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
|
||||
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
Requires<[Not64BitMode]>;
|
||||
def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
|
||||
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
|
||||
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
Requires<[Not64BitMode]>;
|
||||
def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
|
||||
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
Requires<[In64BitMode]>;
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Moves to and from control registers
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
|
||||
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
Requires<[Not64BitMode]>;
|
||||
def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
|
||||
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
|
||||
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
Requires<[Not64BitMode]>;
|
||||
def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
|
||||
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
Requires<[In64BitMode]>;
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Segment override instruction prefixes
|
||||
|
||||
//let SchedRW = [WriteNop] in {
|
||||
//def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
|
||||
//def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
|
||||
//def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
|
||||
//def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
|
||||
//def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
|
||||
//def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
|
||||
//} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Moves to and from segment registers.
|
||||
//
|
||||
|
||||
let SchedRW = [WriteMove] in {
|
||||
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
|
||||
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
|
||||
def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
|
||||
"mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
|
||||
def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
|
||||
"mov{q}\t{$src, $dst|$dst, $src}", []>;
|
||||
let mayStore = 1 in {
|
||||
def MOV16ms : I<0x8C, MRMDestMem, (outs), (ins i16mem:$dst, SEGMENT_REG:$src),
|
||||
"mov{w}\t{$src, $dst|$dst, $src}", []>;
|
||||
}
|
||||
def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
|
||||
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
|
||||
def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
|
||||
"mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
|
||||
def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
|
||||
"mov{q}\t{$src, $dst|$dst, $src}", []>;
|
||||
let mayLoad = 1 in {
|
||||
def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
|
||||
"mov{w}\t{$src, $dst|$dst, $src}", []>;
|
||||
}
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Segmentation support instructions.
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
|
||||
"lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
OpSize16, NotMemoryFoldable;
|
||||
def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
|
||||
"lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
OpSize16, NotMemoryFoldable;
|
||||
|
||||
// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
|
||||
let mayLoad = 1 in
|
||||
def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
|
||||
"lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
OpSize32, NotMemoryFoldable;
|
||||
def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
|
||||
"lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
OpSize32, NotMemoryFoldable;
|
||||
// i16mem operand in LAR64rm and GR32 operand in LAR64rr is not a typo.
|
||||
let mayLoad = 1 in
|
||||
def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
|
||||
"lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
|
||||
def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
|
||||
"lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
|
||||
|
||||
// i16mem operand in LSL32rm and GR32 operand in LSL32rr is not a typo.
|
||||
let mayLoad = 1 in
|
||||
def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
|
||||
"lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
OpSize16, NotMemoryFoldable;
|
||||
def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
|
||||
"lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
OpSize16, NotMemoryFoldable;
|
||||
// i16mem operand in LSL64rm and GR32 operand in LSL64rr is not a typo.
|
||||
let mayLoad = 1 in
|
||||
def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
|
||||
"lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
OpSize32, NotMemoryFoldable;
|
||||
def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
|
||||
"lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
|
||||
OpSize32, NotMemoryFoldable;
|
||||
let mayLoad = 1 in
|
||||
def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
|
||||
"lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
|
||||
def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
|
||||
"lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
|
||||
|
||||
def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
|
||||
|
||||
def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
|
||||
"str{w}\t$dst", []>, TB, OpSize16;
|
||||
def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
|
||||
"str{l}\t$dst", []>, TB, OpSize32;
|
||||
def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
|
||||
"str{q}\t$dst", []>, TB;
|
||||
let mayStore = 1 in
|
||||
def STRm : I<0x00, MRM1m, (outs), (ins i16mem:$dst), "str{w}\t$dst", []>, TB;
|
||||
|
||||
def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), "ltr{w}\t$src", []>, TB, NotMemoryFoldable;
|
||||
let mayLoad = 1 in
|
||||
def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), "ltr{w}\t$src", []>, TB, NotMemoryFoldable;
|
||||
|
||||
def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), "push{w}\t{%cs|cs}", []>,
|
||||
OpSize16, Requires<[Not64BitMode]>;
|
||||
def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins), "push{l}\t{%cs|cs}", []>,
|
||||
OpSize32, Requires<[Not64BitMode]>;
|
||||
def PUSHSS16 : I<0x16, RawFrm, (outs), (ins), "push{w}\t{%ss|ss}", []>,
|
||||
OpSize16, Requires<[Not64BitMode]>;
|
||||
def PUSHSS32 : I<0x16, RawFrm, (outs), (ins), "push{l}\t{%ss|ss}", []>,
|
||||
OpSize32, Requires<[Not64BitMode]>;
|
||||
def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins), "push{w}\t{%ds|ds}", []>,
|
||||
OpSize16, Requires<[Not64BitMode]>;
|
||||
def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins), "push{l}\t{%ds|ds}", []>,
|
||||
OpSize32, Requires<[Not64BitMode]>;
|
||||
def PUSHES16 : I<0x06, RawFrm, (outs), (ins), "push{w}\t{%es|es}", []>,
|
||||
OpSize16, Requires<[Not64BitMode]>;
|
||||
def PUSHES32 : I<0x06, RawFrm, (outs), (ins), "push{l}\t{%es|es}", []>,
|
||||
OpSize32, Requires<[Not64BitMode]>;
|
||||
def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), "push{w}\t{%fs|fs}", []>,
|
||||
OpSize16, TB;
|
||||
def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), "push{l}\t{%fs|fs}", []>, TB,
|
||||
OpSize32, Requires<[Not64BitMode]>;
|
||||
def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), "push{w}\t{%gs|gs}", []>,
|
||||
OpSize16, TB;
|
||||
def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), "push{l}\t{%gs|gs}", []>, TB,
|
||||
OpSize32, Requires<[Not64BitMode]>;
|
||||
def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), "push{q}\t{%fs|fs}", []>, TB,
|
||||
OpSize32, Requires<[In64BitMode]>;
|
||||
def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), "push{q}\t{%gs|gs}", []>, TB,
|
||||
OpSize32, Requires<[In64BitMode]>;
|
||||
|
||||
// No "pop cs" instruction.
|
||||
def POPSS16 : I<0x17, RawFrm, (outs), (ins), "pop{w}\t{%ss|ss}", []>,
|
||||
OpSize16, Requires<[Not64BitMode]>;
|
||||
def POPSS32 : I<0x17, RawFrm, (outs), (ins), "pop{l}\t{%ss|ss}", []>,
|
||||
OpSize32, Requires<[Not64BitMode]>;
|
||||
|
||||
def POPDS16 : I<0x1F, RawFrm, (outs), (ins), "pop{w}\t{%ds|ds}", []>,
|
||||
OpSize16, Requires<[Not64BitMode]>;
|
||||
def POPDS32 : I<0x1F, RawFrm, (outs), (ins), "pop{l}\t{%ds|ds}", []>,
|
||||
OpSize32, Requires<[Not64BitMode]>;
|
||||
|
||||
def POPES16 : I<0x07, RawFrm, (outs), (ins), "pop{w}\t{%es|es}", []>,
|
||||
OpSize16, Requires<[Not64BitMode]>;
|
||||
def POPES32 : I<0x07, RawFrm, (outs), (ins), "pop{l}\t{%es|es}", []>,
|
||||
OpSize32, Requires<[Not64BitMode]>;
|
||||
|
||||
def POPFS16 : I<0xa1, RawFrm, (outs), (ins), "pop{w}\t{%fs|fs}", []>,
|
||||
OpSize16, TB;
|
||||
def POPFS32 : I<0xa1, RawFrm, (outs), (ins), "pop{l}\t{%fs|fs}", []>, TB,
|
||||
OpSize32, Requires<[Not64BitMode]>;
|
||||
def POPFS64 : I<0xa1, RawFrm, (outs), (ins), "pop{q}\t{%fs|fs}", []>, TB,
|
||||
OpSize32, Requires<[In64BitMode]>;
|
||||
|
||||
def POPGS16 : I<0xa9, RawFrm, (outs), (ins), "pop{w}\t{%gs|gs}", []>,
|
||||
OpSize16, TB;
|
||||
def POPGS32 : I<0xa9, RawFrm, (outs), (ins), "pop{l}\t{%gs|gs}", []>, TB,
|
||||
OpSize32, Requires<[Not64BitMode]>;
|
||||
def POPGS64 : I<0xa9, RawFrm, (outs), (ins), "pop{q}\t{%gs|gs}", []>, TB,
|
||||
OpSize32, Requires<[In64BitMode]>;
|
||||
|
||||
def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
|
||||
"lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize16,
|
||||
Requires<[Not64BitMode]>;
|
||||
def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
|
||||
"lds{l}\t{$src, $dst|$dst, $src}", []>, OpSize32,
|
||||
Requires<[Not64BitMode]>;
|
||||
|
||||
def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
|
||||
"lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
|
||||
def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
|
||||
"lss{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
|
||||
def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
|
||||
"lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
|
||||
|
||||
def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
|
||||
"les{w}\t{$src, $dst|$dst, $src}", []>, OpSize16,
|
||||
Requires<[Not64BitMode]>;
|
||||
def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
|
||||
"les{l}\t{$src, $dst|$dst, $src}", []>, OpSize32,
|
||||
Requires<[Not64BitMode]>;
|
||||
|
||||
def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
|
||||
"lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
|
||||
def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
|
||||
"lfs{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
|
||||
def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
|
||||
"lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
|
||||
|
||||
def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
|
||||
"lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
|
||||
def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
|
||||
"lgs{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
|
||||
|
||||
def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
|
||||
"lgs\t{$src, $dst|$dst, $src}", []>, TB;
|
||||
|
||||
def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), "verr\t$seg", []>, TB, NotMemoryFoldable;
|
||||
def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", []>, TB, NotMemoryFoldable;
|
||||
let mayLoad = 1 in {
|
||||
def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), "verr\t$seg", []>, TB, NotMemoryFoldable;
|
||||
def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", []>, TB, NotMemoryFoldable;
|
||||
}
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Descriptor-table support instructions
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
def SGDT16m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
|
||||
"sgdt{w}\t$dst", []>, TB, OpSize16, Requires<[Not64BitMode]>;
|
||||
def SGDT32m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
|
||||
"sgdt{l}\t$dst", []>, OpSize32, TB, Requires <[Not64BitMode]>;
|
||||
def SGDT64m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
|
||||
"sgdt{q}\t$dst", []>, TB, Requires <[In64BitMode]>;
|
||||
def SIDT16m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
|
||||
"sidt{w}\t$dst", []>, TB, OpSize16, Requires<[Not64BitMode]>;
|
||||
def SIDT32m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
|
||||
"sidt{l}\t$dst", []>, OpSize32, TB, Requires <[Not64BitMode]>;
|
||||
def SIDT64m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
|
||||
"sidt{q}\t$dst", []>, TB, Requires <[In64BitMode]>;
|
||||
def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
|
||||
"sldt{w}\t$dst", []>, TB, OpSize16;
|
||||
let mayStore = 1 in
|
||||
def SLDT16m : I<0x00, MRM0m, (outs), (ins i16mem:$dst),
|
||||
"sldt{w}\t$dst", []>, TB;
|
||||
def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
|
||||
"sldt{l}\t$dst", []>, OpSize32, TB;
|
||||
|
||||
// LLDT is not interpreted specially in 64-bit mode because there is no sign
|
||||
// extension.
|
||||
def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
|
||||
"sldt{q}\t$dst", []>, TB, Requires<[In64BitMode]>;
|
||||
|
||||
def LGDT16m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
|
||||
"lgdt{w}\t$src", []>, TB, OpSize16, Requires<[Not64BitMode]>;
|
||||
def LGDT32m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
|
||||
"lgdt{l}\t$src", []>, OpSize32, TB, Requires<[Not64BitMode]>;
|
||||
def LGDT64m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
|
||||
"lgdt{q}\t$src", []>, TB, Requires<[In64BitMode]>;
|
||||
def LIDT16m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
|
||||
"lidt{w}\t$src", []>, TB, OpSize16, Requires<[Not64BitMode]>;
|
||||
def LIDT32m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
|
||||
"lidt{l}\t$src", []>, OpSize32, TB, Requires<[Not64BitMode]>;
|
||||
def LIDT64m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
|
||||
"lidt{q}\t$src", []>, TB, Requires<[In64BitMode]>;
|
||||
def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
|
||||
"lldt{w}\t$src", []>, TB, NotMemoryFoldable;
|
||||
let mayLoad = 1 in
|
||||
def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
|
||||
"lldt{w}\t$src", []>, TB, NotMemoryFoldable;
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Specialized register support
|
||||
let SchedRW = [WriteSystem] in {
|
||||
let Uses = [EAX, ECX, EDX] in
|
||||
def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
|
||||
let Defs = [EAX, EDX], Uses = [ECX] in
|
||||
def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
|
||||
|
||||
let Defs = [RAX, RDX], Uses = [ECX] in
|
||||
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)]>, TB;
|
||||
|
||||
def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
|
||||
"smsw{w}\t$dst", []>, OpSize16, TB;
|
||||
def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
|
||||
"smsw{l}\t$dst", []>, OpSize32, TB;
|
||||
// no m form encodable; use SMSW16m
|
||||
def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
|
||||
"smsw{q}\t$dst", []>, TB;
|
||||
|
||||
// For memory operands, there is only a 16-bit form
|
||||
def SMSW16m : I<0x01, MRM4m, (outs), (ins i16mem:$dst),
|
||||
"smsw{w}\t$dst", []>, TB;
|
||||
|
||||
def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
|
||||
"lmsw{w}\t$src", []>, TB, NotMemoryFoldable;
|
||||
let mayLoad = 1 in
|
||||
def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
|
||||
"lmsw{w}\t$src", []>, TB, NotMemoryFoldable;
|
||||
|
||||
let Defs = [EAX, EBX, ECX, EDX], Uses = [EAX, ECX] in
|
||||
def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Cache instructions
|
||||
let SchedRW = [WriteSystem] in {
|
||||
def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
|
||||
def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [(int_x86_wbinvd)]>, TB;
|
||||
|
||||
// wbnoinvd is like wbinvd, except without invalidation
|
||||
// encoding: like wbinvd + an 0xF3 prefix
|
||||
def WBNOINVD : I<0x09, RawFrm, (outs), (ins), "wbnoinvd",
|
||||
[(int_x86_wbnoinvd)]>, XS,
|
||||
Requires<[HasWBNOINVD]>;
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// CET instructions
|
||||
// Use with caution, availability is not predicated on features.
|
||||
let SchedRW = [WriteSystem] in {
|
||||
let Uses = [SSP] in {
|
||||
let Defs = [SSP] in {
|
||||
def INCSSPD : I<0xAE, MRM5r, (outs), (ins GR32:$src), "incsspd\t$src",
|
||||
[(int_x86_incsspd GR32:$src)]>, XS;
|
||||
def INCSSPQ : RI<0xAE, MRM5r, (outs), (ins GR64:$src), "incsspq\t$src",
|
||||
[(int_x86_incsspq GR64:$src)]>, XS;
|
||||
} // Defs SSP
|
||||
|
||||
let Constraints = "$src = $dst" in {
|
||||
def RDSSPD : I<0x1E, MRM1r, (outs GR32:$dst), (ins GR32:$src),
|
||||
"rdsspd\t$dst",
|
||||
[(set GR32:$dst, (int_x86_rdsspd GR32:$src))]>, XS;
|
||||
def RDSSPQ : RI<0x1E, MRM1r, (outs GR64:$dst), (ins GR64:$src),
|
||||
"rdsspq\t$dst",
|
||||
[(set GR64:$dst, (int_x86_rdsspq GR64:$src))]>, XS;
|
||||
}
|
||||
|
||||
let Defs = [SSP] in {
|
||||
def SAVEPREVSSP : I<0x01, MRM_EA, (outs), (ins), "saveprevssp",
|
||||
[(int_x86_saveprevssp)]>, XS;
|
||||
def RSTORSSP : I<0x01, MRM5m, (outs), (ins i32mem:$src),
|
||||
"rstorssp\t$src",
|
||||
[(int_x86_rstorssp addr:$src)]>, XS;
|
||||
} // Defs SSP
|
||||
} // Uses SSP
|
||||
|
||||
def WRSSD : I<0xF6, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
||||
"wrssd\t{$src, $dst|$dst, $src}",
|
||||
[(int_x86_wrssd GR32:$src, addr:$dst)]>, T8PS;
|
||||
def WRSSQ : RI<0xF6, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
|
||||
"wrssq\t{$src, $dst|$dst, $src}",
|
||||
[(int_x86_wrssq GR64:$src, addr:$dst)]>, T8PS;
|
||||
def WRUSSD : I<0xF5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
||||
"wrussd\t{$src, $dst|$dst, $src}",
|
||||
[(int_x86_wrussd GR32:$src, addr:$dst)]>, T8PD;
|
||||
def WRUSSQ : RI<0xF5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
|
||||
"wrussq\t{$src, $dst|$dst, $src}",
|
||||
[(int_x86_wrussq GR64:$src, addr:$dst)]>, T8PD;
|
||||
|
||||
let Defs = [SSP] in {
|
||||
let Uses = [SSP] in {
|
||||
def SETSSBSY : I<0x01, MRM_E8, (outs), (ins), "setssbsy",
|
||||
[(int_x86_setssbsy)]>, XS;
|
||||
} // Uses SSP
|
||||
|
||||
def CLRSSBSY : I<0xAE, MRM6m, (outs), (ins i32mem:$src),
|
||||
"clrssbsy\t$src",
|
||||
[(int_x86_clrssbsy addr:$src)]>, XS;
|
||||
} // Defs SSP
|
||||
} // SchedRW
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
def ENDBR64 : I<0x1E, MRM_FA, (outs), (ins), "endbr64", []>, XS;
|
||||
def ENDBR32 : I<0x1E, MRM_FB, (outs), (ins), "endbr32", []>, XS;
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// XSAVE instructions
|
||||
let SchedRW = [WriteSystem] in {
|
||||
let Predicates = [HasXSAVE] in {
|
||||
let Defs = [EDX, EAX], Uses = [ECX] in
|
||||
def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
|
||||
|
||||
let Uses = [EDX, EAX, ECX] in
|
||||
def XSETBV : I<0x01, MRM_D1, (outs), (ins),
|
||||
"xsetbv",
|
||||
[(int_x86_xsetbv ECX, EDX, EAX)]>, TB;
|
||||
|
||||
} // HasXSAVE
|
||||
|
||||
let Uses = [EDX, EAX] in {
|
||||
def XSAVE : I<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
|
||||
"xsave\t$dst",
|
||||
[(int_x86_xsave addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>;
|
||||
def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
|
||||
"xsave64\t$dst",
|
||||
[(int_x86_xsave64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
|
||||
def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaquemem:$dst),
|
||||
"xrstor\t$dst",
|
||||
[(int_x86_xrstor addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>;
|
||||
def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaquemem:$dst),
|
||||
"xrstor64\t$dst",
|
||||
[(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
|
||||
def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaquemem:$dst),
|
||||
"xsaveopt\t$dst",
|
||||
[(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT]>;
|
||||
def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaquemem:$dst),
|
||||
"xsaveopt64\t$dst",
|
||||
[(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT, In64BitMode]>;
|
||||
def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
|
||||
"xsavec\t$dst",
|
||||
[(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC]>;
|
||||
def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
|
||||
"xsavec64\t$dst",
|
||||
[(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC, In64BitMode]>;
|
||||
def XSAVES : I<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
|
||||
"xsaves\t$dst",
|
||||
[(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
|
||||
def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
|
||||
"xsaves64\t$dst",
|
||||
[(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE, In64BitMode]>;
|
||||
def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
|
||||
"xrstors\t$dst",
|
||||
[(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
|
||||
def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
|
||||
"xrstors64\t$dst",
|
||||
[(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES, In64BitMode]>;
|
||||
} // Uses
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VIA PadLock crypto instructions
|
||||
let Defs = [RAX, RDI], Uses = [RDX, RDI], SchedRW = [WriteSystem] in
|
||||
def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB;
|
||||
|
||||
def : InstAlias<"xstorerng", (XSTORE)>;
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
|
||||
def XCRYPTECB : I<0xa7, MRM_C8, (outs), (ins), "xcryptecb", []>, TB;
|
||||
def XCRYPTCBC : I<0xa7, MRM_D0, (outs), (ins), "xcryptcbc", []>, TB;
|
||||
def XCRYPTCTR : I<0xa7, MRM_D8, (outs), (ins), "xcryptctr", []>, TB;
|
||||
def XCRYPTCFB : I<0xa7, MRM_E0, (outs), (ins), "xcryptcfb", []>, TB;
|
||||
def XCRYPTOFB : I<0xa7, MRM_E8, (outs), (ins), "xcryptofb", []>, TB;
|
||||
}
|
||||
|
||||
let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
|
||||
def XSHA1 : I<0xa6, MRM_C8, (outs), (ins), "xsha1", []>, TB;
|
||||
def XSHA256 : I<0xa6, MRM_D0, (outs), (ins), "xsha256", []>, TB;
|
||||
}
|
||||
let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
|
||||
def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB;
|
||||
} // SchedRW
|
||||
|
||||
/*
|
||||
//==-----------------------------------------------------------------------===//
|
||||
// PKU - enable protection key
|
||||
let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
|
||||
def WRPKRU : PseudoI<(outs), (ins GR32:$src),
|
||||
[(int_x86_wrpkru GR32:$src)]>;
|
||||
def RDPKRU : PseudoI<(outs GR32:$dst), (ins),
|
||||
[(set GR32:$dst, (int_x86_rdpkru))]>;
|
||||
}
|
||||
*/
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
let Defs = [EAX, EDX], Uses = [ECX] in
|
||||
def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB;
|
||||
let Uses = [EAX, ECX, EDX] in
|
||||
def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB;
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// FS/GS Base Instructions
|
||||
let Predicates = [HasFSGSBase, In64BitMode], SchedRW = [WriteSystem] in {
|
||||
def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins),
|
||||
"rdfsbase{l}\t$dst",
|
||||
[(set GR32:$dst, (int_x86_rdfsbase_32))]>, XS;
|
||||
def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins),
|
||||
"rdfsbase{q}\t$dst",
|
||||
[(set GR64:$dst, (int_x86_rdfsbase_64))]>, XS;
|
||||
def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins),
|
||||
"rdgsbase{l}\t$dst",
|
||||
[(set GR32:$dst, (int_x86_rdgsbase_32))]>, XS;
|
||||
def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins),
|
||||
"rdgsbase{q}\t$dst",
|
||||
[(set GR64:$dst, (int_x86_rdgsbase_64))]>, XS;
|
||||
def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$src),
|
||||
"wrfsbase{l}\t$src",
|
||||
[(int_x86_wrfsbase_32 GR32:$src)]>, XS;
|
||||
def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$src),
|
||||
"wrfsbase{q}\t$src",
|
||||
[(int_x86_wrfsbase_64 GR64:$src)]>, XS;
|
||||
def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$src),
|
||||
"wrgsbase{l}\t$src",
|
||||
[(int_x86_wrgsbase_32 GR32:$src)]>, XS;
|
||||
def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$src),
|
||||
"wrgsbase{q}\t$src",
|
||||
[(int_x86_wrgsbase_64 GR64:$src)]>, XS;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// INVPCID Instruction
|
||||
let SchedRW = [WriteSystem] in {
|
||||
def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
|
||||
"invpcid\t{$src2, $src1|$src1, $src2}",
|
||||
[(int_x86_invpcid GR32:$src1, addr:$src2)]>, T8PD,
|
||||
Requires<[Not64BitMode, HasINVPCID]>;
|
||||
def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
|
||||
"invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
|
||||
Requires<[In64BitMode, HasINVPCID]>;
|
||||
} // SchedRW
|
||||
|
||||
let Predicates = [In64BitMode, HasINVPCID] in {
|
||||
// The instruction can only use a 64 bit register as the register argument
|
||||
// in 64 bit mode, while the intrinsic only accepts a 32 bit argument
|
||||
// corresponding to it.
|
||||
// The accepted values for now are 0,1,2,3 anyways (see Intel SDM -- INVCPID
|
||||
// type),/ so it doesn't hurt us that one can't supply a 64 bit value here.
|
||||
def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2),
|
||||
(INVPCID64
|
||||
(SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit),
|
||||
addr:$src2)>;
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SMAP Instruction
|
||||
let Defs = [EFLAGS], SchedRW = [WriteSystem] in {
|
||||
def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
|
||||
def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SMX Instruction
|
||||
let SchedRW = [WriteSystem] in {
|
||||
let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in {
|
||||
def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, TB;
|
||||
} // Uses, Defs
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TS flag control instruction.
|
||||
let SchedRW = [WriteSystem] in {
|
||||
def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// IF (inside EFLAGS) management instructions.
|
||||
let SchedRW = [WriteSystem], Uses = [EFLAGS], Defs = [EFLAGS] in {
|
||||
def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
|
||||
def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// RDPID Instruction
|
||||
let SchedRW = [WriteSystem] in {
|
||||
def RDPID32 : I<0xC7, MRM7r, (outs GR32:$dst), (ins),
|
||||
"rdpid\t$dst", [(set GR32:$dst, (int_x86_rdpid))]>, XS,
|
||||
Requires<[Not64BitMode, HasRDPID]>;
|
||||
def RDPID64 : I<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdpid\t$dst", []>, XS,
|
||||
Requires<[In64BitMode, HasRDPID]>;
|
||||
} // SchedRW
|
||||
|
||||
let Predicates = [In64BitMode, HasRDPID] in {
|
||||
// Due to silly instruction definition, we have to compensate for the
|
||||
// instruction outputting a 64-bit register.
|
||||
def : Pat<(int_x86_rdpid),
|
||||
(EXTRACT_SUBREG (RDPID64), sub_32bit)>;
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PTWRITE Instruction - Write Data to a Processor Trace Packet
|
||||
let SchedRW = [WriteSystem] in {
|
||||
def PTWRITEm: I<0xAE, MRM4m, (outs), (ins i32mem:$dst),
|
||||
"ptwrite{l}\t$dst", [(int_x86_ptwrite32 (loadi32 addr:$dst))]>, XS,
|
||||
Requires<[HasPTWRITE]>;
|
||||
def PTWRITE64m : RI<0xAE, MRM4m, (outs), (ins i64mem:$dst),
|
||||
"ptwrite{q}\t$dst", [(int_x86_ptwrite64 (loadi64 addr:$dst))]>, XS,
|
||||
Requires<[In64BitMode, HasPTWRITE]>;
|
||||
|
||||
def PTWRITEr : I<0xAE, MRM4r, (outs), (ins GR32:$dst),
|
||||
"ptwrite{l}\t$dst", [(int_x86_ptwrite32 GR32:$dst)]>, XS,
|
||||
Requires<[HasPTWRITE]>;
|
||||
def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst),
|
||||
"ptwrite{q}\t$dst", [(int_x86_ptwrite64 GR64:$dst)]>, XS,
|
||||
Requires<[In64BitMode, HasPTWRITE]>;
|
||||
} // SchedRW
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Platform Configuration instruction
|
||||
|
||||
// From ISA docs:
|
||||
// "This instruction is used to execute functions for configuring platform
|
||||
// features.
|
||||
// EAX: Leaf function to be invoked.
|
||||
// RBX/RCX/RDX: Leaf-specific purpose."
|
||||
// "Successful execution of the leaf clears RAX (set to zero) and ZF, CF, PF,
|
||||
// AF, OF, and SF are cleared. In case of failure, the failure reason is
|
||||
// indicated in RAX with ZF set to 1 and CF, PF, AF, OF, and SF are cleared."
|
||||
// Thus all these mentioned registers are considered clobbered.
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX, RDX, EFLAGS] in
|
||||
def PCONFIG : I<0x01, MRM_C5, (outs), (ins), "pconfig", []>, TB,
|
||||
Requires<[HasPCONFIG]>;
|
||||
} // SchedRW
|
||||
60
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrTSX.td
vendored
Normal file
60
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrTSX.td
vendored
Normal file
@@ -0,0 +1,60 @@
|
||||
//===-- X86InstrVMX.td - TSX Instruction Set Extension -----*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the instructions that make up the Intel TSX instruction
|
||||
// set.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TSX instructions
|
||||
|
||||
def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
|
||||
[SDNPHasChain, SDNPSideEffect]>;
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
|
||||
//let usesCustomInserter = 1 in
|
||||
//def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
|
||||
// "# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>,
|
||||
// Requires<[HasRTM]>;
|
||||
|
||||
let isBranch = 1, isTerminator = 1, Defs = [EAX] in {
|
||||
def XBEGIN_2 : Ii16PCRel<0xc7, MRM_F8, (outs), (ins brtarget16:$dst),
|
||||
"xbegin\t$dst", []>, OpSize16;
|
||||
def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget32:$dst),
|
||||
"xbegin\t$dst", []>, OpSize32;
|
||||
}
|
||||
|
||||
// Pseudo instruction to fake the definition of EAX on the fallback code path.
|
||||
//let isPseudo = 1, Defs = [EAX] in {
|
||||
//def XABORT_DEF : I<0, Pseudo, (outs), (ins), "# XABORT DEF", []>;
|
||||
//}
|
||||
|
||||
def XEND : I<0x01, MRM_D5, (outs), (ins),
|
||||
"xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
|
||||
|
||||
let Defs = [EFLAGS] in
|
||||
def XTEST : I<0x01, MRM_D6, (outs), (ins),
|
||||
"xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasRTM]>;
|
||||
|
||||
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
|
||||
"xabort\t$imm",
|
||||
[(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
|
||||
} // SchedRW
|
||||
|
||||
// HLE prefixes
|
||||
let SchedRW = [WriteSystem] in {
|
||||
|
||||
let isAsmParserOnly = 1 in {
|
||||
def XACQUIRE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "xacquire", []>;
|
||||
def XRELEASE_PREFIX : I<0xF3, RawFrm, (outs), (ins), "xrelease", []>;
|
||||
}
|
||||
|
||||
} // SchedRW
|
||||
88
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrVMX.td
vendored
Normal file
88
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrVMX.td
vendored
Normal file
@@ -0,0 +1,88 @@
|
||||
//===-- X86InstrVMX.td - VMX Instruction Set Extension -----*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the instructions that make up the Intel VMX instruction
|
||||
// set.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VMX instructions
|
||||
|
||||
let SchedRW = [WriteSystem] in {
|
||||
// 66 0F 38 80
|
||||
def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
|
||||
"invept\t{$src2, $src1|$src1, $src2}", []>, T8PD,
|
||||
Requires<[Not64BitMode]>;
|
||||
def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
|
||||
"invept\t{$src2, $src1|$src1, $src2}", []>, T8PD,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
// 66 0F 38 81
|
||||
def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
|
||||
"invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
|
||||
Requires<[Not64BitMode]>;
|
||||
def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
|
||||
"invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
// 0F 01 C1
|
||||
def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
|
||||
def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
|
||||
"vmclear\t$vmcs", []>, PD;
|
||||
|
||||
// OF 01 D4
|
||||
def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB;
|
||||
|
||||
// 0F 01 C2
|
||||
def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
|
||||
|
||||
// 0F 01 C3
|
||||
def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
|
||||
def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
|
||||
"vmptrld\t$vmcs", []>, PS;
|
||||
def VMPTRSTm : I<0xC7, MRM7m, (outs), (ins i64mem:$vmcs),
|
||||
"vmptrst\t$vmcs", []>, PS;
|
||||
def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
|
||||
"vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
|
||||
NotMemoryFoldable;
|
||||
def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
|
||||
"vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
|
||||
NotMemoryFoldable;
|
||||
|
||||
let mayStore = 1 in {
|
||||
def VMREAD64mr : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
|
||||
"vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
|
||||
NotMemoryFoldable;
|
||||
def VMREAD32mr : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
||||
"vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
|
||||
NotMemoryFoldable;
|
||||
} // mayStore
|
||||
|
||||
def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
|
||||
"vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
|
||||
NotMemoryFoldable;
|
||||
def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
|
||||
"vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
|
||||
NotMemoryFoldable;
|
||||
|
||||
let mayLoad = 1 in {
|
||||
def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
|
||||
"vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
|
||||
NotMemoryFoldable;
|
||||
def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
|
||||
"vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
|
||||
NotMemoryFoldable;
|
||||
} // mayLoad
|
||||
|
||||
// 0F 01 C4
|
||||
def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
|
||||
def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
|
||||
"vmxon\t$vmxon", []>, XS;
|
||||
} // SchedRW
|
||||
511
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrVecCompiler.td
vendored
Normal file
511
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrVecCompiler.td
vendored
Normal file
@@ -0,0 +1,511 @@
|
||||
//===- X86InstrVecCompiler.td - Vector Compiler Patterns ---*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the various vector pseudo instructions used by the
|
||||
// compiler, as well as Pat patterns used during instruction selection.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// No op bitconverts
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Bitcasts between 128-bit vector types. Return the original type since
|
||||
// no instruction is needed for the conversion
|
||||
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
|
||||
// Bitcasts between 256-bit vector types. Return the original type since
|
||||
// no instruction is needed for the conversion
|
||||
def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>;
|
||||
def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>;
|
||||
def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
|
||||
def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
|
||||
def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
|
||||
def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>;
|
||||
def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>;
|
||||
def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
|
||||
def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>;
|
||||
def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>;
|
||||
def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>;
|
||||
def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>;
|
||||
def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>;
|
||||
def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>;
|
||||
def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>;
|
||||
def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
|
||||
def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
|
||||
def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>;
|
||||
def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
|
||||
def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
|
||||
def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
|
||||
def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>;
|
||||
def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
|
||||
def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
|
||||
def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
|
||||
def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
|
||||
def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>;
|
||||
def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>;
|
||||
def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
|
||||
def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
|
||||
|
||||
// Bitcasts between 512-bit vector types. Return the original type since
|
||||
// no instruction is needed for the conversion.
|
||||
def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
|
||||
def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
|
||||
def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
|
||||
def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
|
||||
def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
|
||||
def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
|
||||
def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
|
||||
def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
|
||||
def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
|
||||
def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
|
||||
def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
|
||||
def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
|
||||
def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
|
||||
def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
|
||||
def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
|
||||
def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
|
||||
def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
|
||||
def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
|
||||
def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
|
||||
def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
|
||||
def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
|
||||
def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
|
||||
def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
|
||||
def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
|
||||
def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
|
||||
def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
|
||||
def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
|
||||
def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
|
||||
def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
|
||||
def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Non-instruction patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// A vector extract of the first f32/f64 position is a subregister copy
|
||||
def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
|
||||
(COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
|
||||
def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
|
||||
(COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
|
||||
|
||||
// Implicitly promote a 32-bit scalar to a vector.
|
||||
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
|
||||
(COPY_TO_REGCLASS FR32:$src, VR128)>;
|
||||
// Implicitly promote a 64-bit scalar to a vector.
|
||||
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
|
||||
(COPY_TO_REGCLASS FR64:$src, VR128)>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Subvector tricks
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Patterns for insert_subvector/extract_subvector to/from index=0
|
||||
multiclass subvector_subreg_lowering<RegisterClass subRC, ValueType subVT,
|
||||
RegisterClass RC, ValueType VT,
|
||||
SubRegIndex subIdx> {
|
||||
def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
|
||||
(subVT (EXTRACT_SUBREG RC:$src, subIdx))>;
|
||||
|
||||
def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
|
||||
(VT (INSERT_SUBREG (IMPLICIT_DEF), subRC:$src, subIdx))>;
|
||||
}
|
||||
|
||||
// A 128-bit subvector extract from the first 256-bit vector position is a
|
||||
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
|
||||
// insert to the first 256-bit vector position is a subregister copy that needs
|
||||
// no instruction.
|
||||
defm : subvector_subreg_lowering<VR128, v4i32, VR256, v8i32, sub_xmm>;
|
||||
defm : subvector_subreg_lowering<VR128, v4f32, VR256, v8f32, sub_xmm>;
|
||||
defm : subvector_subreg_lowering<VR128, v2i64, VR256, v4i64, sub_xmm>;
|
||||
defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>;
|
||||
defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>;
|
||||
defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>;
|
||||
|
||||
// A 128-bit subvector extract from the first 512-bit vector position is a
|
||||
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
|
||||
// insert to the first 512-bit vector position is a subregister copy that needs
|
||||
// no instruction.
|
||||
defm : subvector_subreg_lowering<VR128, v4i32, VR512, v16i32, sub_xmm>;
|
||||
defm : subvector_subreg_lowering<VR128, v4f32, VR512, v16f32, sub_xmm>;
|
||||
defm : subvector_subreg_lowering<VR128, v2i64, VR512, v8i64, sub_xmm>;
|
||||
defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>;
|
||||
defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>;
|
||||
defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>;
|
||||
|
||||
// A 128-bit subvector extract from the first 512-bit vector position is a
|
||||
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
|
||||
// insert to the first 512-bit vector position is a subregister copy that needs
|
||||
// no instruction.
|
||||
defm : subvector_subreg_lowering<VR256, v8i32, VR512, v16i32, sub_ymm>;
|
||||
defm : subvector_subreg_lowering<VR256, v8f32, VR512, v16f32, sub_ymm>;
|
||||
defm : subvector_subreg_lowering<VR256, v4i64, VR512, v8i64, sub_ymm>;
|
||||
defm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>;
|
||||
defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
|
||||
defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
|
||||
|
||||
|
||||
multiclass subvector_store_lowering<string AlignedStr, string UnalignedStr,
|
||||
RegisterClass RC, ValueType DstTy,
|
||||
ValueType SrcTy, SubRegIndex SubIdx> {
|
||||
def : Pat<(alignedstore (DstTy (extract_subvector
|
||||
(SrcTy RC:$src), (iPTR 0))), addr:$dst),
|
||||
(!cast<Instruction>("VMOV"#AlignedStr#"mr") addr:$dst,
|
||||
(DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
|
||||
|
||||
def : Pat<(store (DstTy (extract_subvector
|
||||
(SrcTy RC:$src), (iPTR 0))), addr:$dst),
|
||||
(!cast<Instruction>("VMOV"#UnalignedStr#"mr") addr:$dst,
|
||||
(DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
defm : subvector_store_lowering<"APD", "UPD", VR256X, v2f64, v4f64, sub_xmm>;
|
||||
defm : subvector_store_lowering<"APS", "UPS", VR256X, v4f32, v8f32, sub_xmm>;
|
||||
defm : subvector_store_lowering<"DQA", "DQU", VR256X, v2i64, v4i64, sub_xmm>;
|
||||
defm : subvector_store_lowering<"DQA", "DQU", VR256X, v4i32, v8i32, sub_xmm>;
|
||||
defm : subvector_store_lowering<"DQA", "DQU", VR256X, v8i16, v16i16, sub_xmm>;
|
||||
defm : subvector_store_lowering<"DQA", "DQU", VR256X, v16i8, v32i8, sub_xmm>;
|
||||
}
|
||||
|
||||
let Predicates = [HasVLX] in {
|
||||
// Special patterns for storing subvector extracts of lower 128-bits
|
||||
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
|
||||
defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR256X, v2f64, v4f64,
|
||||
sub_xmm>;
|
||||
defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR256X, v4f32, v8f32,
|
||||
sub_xmm>;
|
||||
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v2i64,
|
||||
v4i64, sub_xmm>;
|
||||
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v4i32,
|
||||
v8i32, sub_xmm>;
|
||||
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v8i16,
|
||||
v16i16, sub_xmm>;
|
||||
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v16i8,
|
||||
v32i8, sub_xmm>;
|
||||
|
||||
// Special patterns for storing subvector extracts of lower 128-bits of 512.
|
||||
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
|
||||
defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR512, v2f64, v8f64,
|
||||
sub_xmm>;
|
||||
defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR512, v4f32, v16f32,
|
||||
sub_xmm>;
|
||||
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v2i64,
|
||||
v8i64, sub_xmm>;
|
||||
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v4i32,
|
||||
v16i32, sub_xmm>;
|
||||
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v8i16,
|
||||
v32i16, sub_xmm>;
|
||||
defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v16i8,
|
||||
v64i8, sub_xmm>;
|
||||
|
||||
// Special patterns for storing subvector extracts of lower 256-bits of 512.
|
||||
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
|
||||
defm : subvector_store_lowering<"APDZ256", "UPDZ256", VR512, v4f64, v8f64,
|
||||
sub_ymm>;
|
||||
defm : subvector_store_lowering<"APSZ256", "UPSZ256", VR512, v8f32, v16f32,
|
||||
sub_ymm>;
|
||||
defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v4i64,
|
||||
v8i64, sub_ymm>;
|
||||
defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v8i32,
|
||||
v16i32, sub_ymm>;
|
||||
defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v16i16,
|
||||
v32i16, sub_ymm>;
|
||||
defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v32i8,
|
||||
v64i8, sub_ymm>;
|
||||
}
|
||||
|
||||
// If we're inserting into an all zeros vector, just use a plain move which
|
||||
// will zero the upper bits. A post-isel hook will take care of removing
|
||||
// any moves that we can prove are unnecessary.
|
||||
multiclass subvec_zero_lowering<string MoveStr,
|
||||
RegisterClass RC, ValueType DstTy,
|
||||
ValueType SrcTy, ValueType ZeroTy,
|
||||
SubRegIndex SubIdx> {
|
||||
def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
|
||||
(SrcTy RC:$src), (iPTR 0))),
|
||||
(SUBREG_TO_REG (i64 0),
|
||||
(SrcTy (!cast<Instruction>("VMOV"#MoveStr#"rr") RC:$src)), SubIdx)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, sub_xmm>;
|
||||
}
|
||||
|
||||
let Predicates = [HasVLX] in {
|
||||
defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32, sub_xmm>;
|
||||
|
||||
defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32, sub_xmm>;
|
||||
|
||||
defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32, sub_ymm>;
|
||||
defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32, sub_ymm>;
|
||||
defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32, sub_ymm>;
|
||||
defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32, sub_ymm>;
|
||||
defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32, sub_ymm>;
|
||||
defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32, sub_ymm>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, sub_xmm>;
|
||||
defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, sub_xmm>;
|
||||
|
||||
defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32, sub_ymm>;
|
||||
defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32, sub_ymm>;
|
||||
defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32, sub_ymm>;
|
||||
defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32, sub_ymm>;
|
||||
defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32, sub_ymm>;
|
||||
defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32, sub_ymm>;
|
||||
}
|
||||
|
||||
class maskzeroupper<ValueType vt, RegisterClass RC> :
|
||||
PatLeaf<(vt RC:$src), [{
|
||||
return isMaskZeroExtended(N);
|
||||
}]>;
|
||||
|
||||
def maskzeroupperv1i1 : maskzeroupper<v1i1, VK1>;
|
||||
def maskzeroupperv2i1 : maskzeroupper<v2i1, VK2>;
|
||||
def maskzeroupperv4i1 : maskzeroupper<v4i1, VK4>;
|
||||
def maskzeroupperv8i1 : maskzeroupper<v8i1, VK8>;
|
||||
def maskzeroupperv16i1 : maskzeroupper<v16i1, VK16>;
|
||||
def maskzeroupperv32i1 : maskzeroupper<v32i1, VK32>;
|
||||
|
||||
// The patterns determine if we can depend on the upper bits of a mask register
|
||||
// being zeroed by the previous operation so that we can skip explicit
|
||||
// zeroing.
|
||||
let Predicates = [HasBWI] in {
|
||||
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
||||
maskzeroupperv1i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK32)>;
|
||||
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
||||
maskzeroupperv8i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK8:$src, VK32)>;
|
||||
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
||||
maskzeroupperv16i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK16:$src, VK32)>;
|
||||
|
||||
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
||||
maskzeroupperv1i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK64)>;
|
||||
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
||||
maskzeroupperv8i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK8:$src, VK64)>;
|
||||
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
||||
maskzeroupperv16i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK16:$src, VK64)>;
|
||||
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
||||
maskzeroupperv32i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK32:$src, VK64)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
||||
maskzeroupperv1i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK16)>;
|
||||
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
||||
maskzeroupperv8i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK8:$src, VK16)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasDQI] in {
|
||||
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
|
||||
maskzeroupperv1i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK8)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasVLX, HasDQI] in {
|
||||
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
|
||||
maskzeroupperv2i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK2:$src, VK8)>;
|
||||
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
|
||||
maskzeroupperv4i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK4:$src, VK8)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasVLX] in {
|
||||
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
||||
maskzeroupperv2i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK2:$src, VK16)>;
|
||||
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
||||
maskzeroupperv4i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK4:$src, VK16)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasBWI, HasVLX] in {
|
||||
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
||||
maskzeroupperv2i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK2:$src, VK32)>;
|
||||
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
||||
maskzeroupperv4i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK4:$src, VK32)>;
|
||||
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
||||
maskzeroupperv2i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK2:$src, VK64)>;
|
||||
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
||||
maskzeroupperv4i1:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK4:$src, VK64)>;
|
||||
}
|
||||
|
||||
// If the bits are not zero we have to fall back to explicitly zeroing by
|
||||
// using shifts.
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
||||
(v1i1 VK1:$mask), (iPTR 0))),
|
||||
(KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK1:$mask, VK16),
|
||||
(i8 15)), (i8 15))>;
|
||||
|
||||
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
||||
(v2i1 VK2:$mask), (iPTR 0))),
|
||||
(KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16),
|
||||
(i8 14)), (i8 14))>;
|
||||
|
||||
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
||||
(v4i1 VK4:$mask), (iPTR 0))),
|
||||
(KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16),
|
||||
(i8 12)), (i8 12))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512, NoDQI] in {
|
||||
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
||||
(v8i1 VK8:$mask), (iPTR 0))),
|
||||
(KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16),
|
||||
(i8 8)), (i8 8))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasDQI] in {
|
||||
def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
|
||||
(v8i1 VK8:$mask), (iPTR 0))),
|
||||
(COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>;
|
||||
|
||||
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
|
||||
(v1i1 VK1:$mask), (iPTR 0))),
|
||||
(KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK1:$mask, VK8),
|
||||
(i8 7)), (i8 7))>;
|
||||
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
|
||||
(v2i1 VK2:$mask), (iPTR 0))),
|
||||
(KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8),
|
||||
(i8 6)), (i8 6))>;
|
||||
def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
|
||||
(v4i1 VK4:$mask), (iPTR 0))),
|
||||
(KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK4:$mask, VK8),
|
||||
(i8 4)), (i8 4))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasBWI] in {
|
||||
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
||||
(v16i1 VK16:$mask), (iPTR 0))),
|
||||
(COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK32)>;
|
||||
|
||||
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
||||
(v16i1 VK16:$mask), (iPTR 0))),
|
||||
(COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK64)>;
|
||||
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
||||
(v32i1 VK32:$mask), (iPTR 0))),
|
||||
(COPY_TO_REGCLASS (KMOVDkk VK32:$mask), VK64)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasBWI, NoDQI] in {
|
||||
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
||||
(v8i1 VK8:$mask), (iPTR 0))),
|
||||
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32),
|
||||
(i8 24)), (i8 24))>;
|
||||
|
||||
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
||||
(v8i1 VK8:$mask), (iPTR 0))),
|
||||
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64),
|
||||
(i8 56)), (i8 56))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasBWI, HasDQI] in {
|
||||
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
||||
(v8i1 VK8:$mask), (iPTR 0))),
|
||||
(COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK32)>;
|
||||
|
||||
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
||||
(v8i1 VK8:$mask), (iPTR 0))),
|
||||
(COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasBWI, HasVLX] in {
|
||||
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
||||
(v1i1 VK1:$mask), (iPTR 0))),
|
||||
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK1:$mask, VK32),
|
||||
(i8 31)), (i8 31))>;
|
||||
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
||||
(v2i1 VK2:$mask), (iPTR 0))),
|
||||
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK2:$mask, VK32),
|
||||
(i8 30)), (i8 30))>;
|
||||
def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
|
||||
(v4i1 VK4:$mask), (iPTR 0))),
|
||||
(KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK4:$mask, VK32),
|
||||
(i8 28)), (i8 28))>;
|
||||
|
||||
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
||||
(v1i1 VK1:$mask), (iPTR 0))),
|
||||
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK1:$mask, VK64),
|
||||
(i8 63)), (i8 63))>;
|
||||
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
||||
(v2i1 VK2:$mask), (iPTR 0))),
|
||||
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK2:$mask, VK64),
|
||||
(i8 62)), (i8 62))>;
|
||||
def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
|
||||
(v4i1 VK4:$mask), (iPTR 0))),
|
||||
(KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
|
||||
(i8 60)), (i8 60))>;
|
||||
}
|
||||
446
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrXOP.td
vendored
Normal file
446
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrXOP.td
vendored
Normal file
@@ -0,0 +1,446 @@
|
||||
//===-- X86InstrXOP.td - XOP Instruction Set ---------------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes XOP (eXtended OPerations)
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
|
||||
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWritePHAdd.XMM]>;
|
||||
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
|
||||
Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, loadv2i64>;
|
||||
defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, loadv2i64>;
|
||||
defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, loadv2i64>;
|
||||
defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, loadv2i64>;
|
||||
defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, loadv2i64>;
|
||||
defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, loadv2i64>;
|
||||
defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, loadv2i64>;
|
||||
defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, loadv2i64>;
|
||||
defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, loadv2i64>;
|
||||
defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, loadv2i64>;
|
||||
defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, loadv2i64>;
|
||||
defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, loadv2i64>;
|
||||
defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, loadv2i64>;
|
||||
defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, loadv2i64>;
|
||||
defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, loadv2i64>;
|
||||
}
|
||||
|
||||
// Scalar load 2 addr operand instructions
|
||||
multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
||||
Operand memop, ComplexPattern mem_cpat,
|
||||
X86FoldableSchedWrite sched> {
|
||||
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
|
||||
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, XOP,
|
||||
Sched<[sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
||||
PatFrag memop, X86FoldableSchedWrite sched> {
|
||||
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
|
||||
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
|
||||
Sched<[sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
||||
PatFrag memop, X86FoldableSchedWrite sched> {
|
||||
def Yrr : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst, (Int VR256:$src))]>, XOP, VEX_L, Sched<[sched]>;
|
||||
def Yrm : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L,
|
||||
Sched<[sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
|
||||
ssmem, sse_load_f32, SchedWriteFRnd.Scl>;
|
||||
defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32,
|
||||
SchedWriteFRnd.XMM>;
|
||||
defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32,
|
||||
SchedWriteFRnd.YMM>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
|
||||
sdmem, sse_load_f64, SchedWriteFRnd.Scl>;
|
||||
defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64,
|
||||
SchedWriteFRnd.XMM>;
|
||||
defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64,
|
||||
SchedWriteFRnd.YMM>;
|
||||
}
|
||||
|
||||
multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType vt128, X86FoldableSchedWrite sched> {
|
||||
def rr : IXOP<opc, MRMSrcReg4VOp3, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2))))]>,
|
||||
XOP, Sched<[sched]>;
|
||||
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 VR128:$src1),
|
||||
(vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
|
||||
XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd]>;
|
||||
def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst),
|
||||
(ins i128mem:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))),
|
||||
(vt128 VR128:$src2))))]>,
|
||||
XOP, Sched<[sched.Folded, ReadAfterLd]>;
|
||||
// For disassembler
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
|
||||
def rr_REV : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>,
|
||||
XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
defm VPROTB : xop3op<0x90, "vprotb", rotl, v16i8, SchedWriteVarVecShift.XMM>;
|
||||
defm VPROTD : xop3op<0x92, "vprotd", rotl, v4i32, SchedWriteVarVecShift.XMM>;
|
||||
defm VPROTQ : xop3op<0x93, "vprotq", rotl, v2i64, SchedWriteVarVecShift.XMM>;
|
||||
defm VPROTW : xop3op<0x91, "vprotw", rotl, v8i16, SchedWriteVarVecShift.XMM>;
|
||||
defm VPSHAB : xop3op<0x98, "vpshab", X86vpsha, v16i8, SchedWriteVarVecShift.XMM>;
|
||||
defm VPSHAD : xop3op<0x9A, "vpshad", X86vpsha, v4i32, SchedWriteVarVecShift.XMM>;
|
||||
defm VPSHAQ : xop3op<0x9B, "vpshaq", X86vpsha, v2i64, SchedWriteVarVecShift.XMM>;
|
||||
defm VPSHAW : xop3op<0x99, "vpshaw", X86vpsha, v8i16, SchedWriteVarVecShift.XMM>;
|
||||
defm VPSHLB : xop3op<0x94, "vpshlb", X86vpshl, v16i8, SchedWriteVarVecShift.XMM>;
|
||||
defm VPSHLD : xop3op<0x96, "vpshld", X86vpshl, v4i32, SchedWriteVarVecShift.XMM>;
|
||||
defm VPSHLQ : xop3op<0x97, "vpshlq", X86vpshl, v2i64, SchedWriteVarVecShift.XMM>;
|
||||
defm VPSHLW : xop3op<0x95, "vpshlw", X86vpshl, v8i16, SchedWriteVarVecShift.XMM>;
|
||||
}
|
||||
|
||||
multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType vt128, X86FoldableSchedWrite sched> {
|
||||
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>,
|
||||
XOP, Sched<[sched]>;
|
||||
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins i128mem:$src1, u8imm:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>,
|
||||
XOP, Sched<[sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
defm VPROTB : xop3opimm<0xC0, "vprotb", X86vrotli, v16i8,
|
||||
SchedWriteVecShiftImm.XMM>;
|
||||
defm VPROTD : xop3opimm<0xC2, "vprotd", X86vrotli, v4i32,
|
||||
SchedWriteVecShiftImm.XMM>;
|
||||
defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vrotli, v2i64,
|
||||
SchedWriteVecShiftImm.XMM>;
|
||||
defm VPROTW : xop3opimm<0xC1, "vprotw", X86vrotli, v8i16,
|
||||
SchedWriteVecShiftImm.XMM>;
|
||||
}
|
||||
|
||||
// Instruction where second source can be memory, but third must be register
|
||||
multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
||||
X86FoldableSchedWrite sched> {
|
||||
let isCommutable = 1 in
|
||||
def rr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP_4V,
|
||||
Sched<[sched]>;
|
||||
def rm : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
|
||||
VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd",
|
||||
int_x86_xop_vpmadcswd, SchedWriteVecIMul.XMM>;
|
||||
defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd",
|
||||
int_x86_xop_vpmadcsswd, SchedWriteVecIMul.XMM>;
|
||||
defm VPMACSWW : xop4opm2<0x95, "vpmacsww",
|
||||
int_x86_xop_vpmacsww, SchedWriteVecIMul.XMM>;
|
||||
defm VPMACSWD : xop4opm2<0x96, "vpmacswd",
|
||||
int_x86_xop_vpmacswd, SchedWriteVecIMul.XMM>;
|
||||
defm VPMACSSWW : xop4opm2<0x85, "vpmacssww",
|
||||
int_x86_xop_vpmacssww, SchedWriteVecIMul.XMM>;
|
||||
defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd",
|
||||
int_x86_xop_vpmacsswd, SchedWriteVecIMul.XMM>;
|
||||
defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql",
|
||||
int_x86_xop_vpmacssdql, SchedWritePMULLD.XMM>;
|
||||
defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh",
|
||||
int_x86_xop_vpmacssdqh, SchedWritePMULLD.XMM>;
|
||||
defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd",
|
||||
int_x86_xop_vpmacssdd, SchedWritePMULLD.XMM>;
|
||||
defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql",
|
||||
int_x86_xop_vpmacsdql, SchedWritePMULLD.XMM>;
|
||||
defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh",
|
||||
int_x86_xop_vpmacsdqh, SchedWritePMULLD.XMM>;
|
||||
defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd",
|
||||
int_x86_xop_vpmacsdd, SchedWritePMULLD.XMM>;
|
||||
}
|
||||
|
||||
// IFMA patterns - for cases where we can safely ignore the overflow bits from
|
||||
// the multiply or easily match with existing intrinsics.
|
||||
let Predicates = [HasXOP] in {
|
||||
def : Pat<(v8i16 (add (mul (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
|
||||
(v8i16 VR128:$src3))),
|
||||
(VPMACSWWrr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(v4i32 (add (mul (v4i32 VR128:$src1), (v4i32 VR128:$src2)),
|
||||
(v4i32 VR128:$src3))),
|
||||
(VPMACSDDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(v2i64 (add (X86pmuldq (bc_v2i64 (X86PShufd (v4i32 VR128:$src1), (i8 -11))),
|
||||
(bc_v2i64 (X86PShufd (v4i32 VR128:$src2), (i8 -11)))),
|
||||
(v2i64 VR128:$src3))),
|
||||
(VPMACSDQHrr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(v2i64 (add (X86pmuldq (v2i64 VR128:$src1), (v2i64 VR128:$src2)),
|
||||
(v2i64 VR128:$src3))),
|
||||
(VPMACSDQLrr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
def : Pat<(v4i32 (add (X86vpmaddwd (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
|
||||
(v4i32 VR128:$src3))),
|
||||
(VPMADCSWDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
|
||||
}
|
||||
|
||||
// Transforms to swizzle an immediate to help matching memory operand in first
|
||||
// operand.
|
||||
def CommuteVPCOMCC : SDNodeXForm<imm, [{
|
||||
uint8_t Imm = N->getZExtValue() & 0x7;
|
||||
Imm = X86::getSwappedVPCOMImm(Imm);
|
||||
return getI8Imm(Imm, SDLoc(N));
|
||||
}]>;
|
||||
|
||||
// Instruction where second source can be memory, third must be imm8
|
||||
multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
|
||||
X86FoldableSchedWrite sched> {
|
||||
let ExeDomain = SSEPackedInt in { // SSE integer instructions
|
||||
let isCommutable = 1 in
|
||||
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, XOPCC:$cc),
|
||||
!strconcat("vpcom${cc}", Suffix,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
|
||||
imm:$cc)))]>,
|
||||
XOP_4V, Sched<[sched]>;
|
||||
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, XOPCC:$cc),
|
||||
!strconcat("vpcom${cc}", Suffix,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 VR128:$src1),
|
||||
(vt128 (bitconvert (loadv2i64 addr:$src2))),
|
||||
imm:$cc)))]>,
|
||||
XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
|
||||
let isAsmParserOnly = 1, hasSideEffects = 0 in {
|
||||
def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
|
||||
!strconcat("vpcom", Suffix,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, XOP_4V, Sched<[sched]>, NotMemoryFoldable;
|
||||
let mayLoad = 1 in
|
||||
def mi_alt : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
|
||||
!strconcat("vpcom", Suffix,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>,
|
||||
NotMemoryFoldable;
|
||||
}
|
||||
}
|
||||
|
||||
def : Pat<(OpNode (bitconvert (loadv2i64 addr:$src2)),
|
||||
(vt128 VR128:$src1), imm:$cc),
|
||||
(!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2,
|
||||
(CommuteVPCOMCC imm:$cc))>;
|
||||
}
|
||||
|
||||
defm VPCOMB : xopvpcom<0xCC, "b", X86vpcom, v16i8, SchedWriteVecALU.XMM>;
|
||||
defm VPCOMW : xopvpcom<0xCD, "w", X86vpcom, v8i16, SchedWriteVecALU.XMM>;
|
||||
defm VPCOMD : xopvpcom<0xCE, "d", X86vpcom, v4i32, SchedWriteVecALU.XMM>;
|
||||
defm VPCOMQ : xopvpcom<0xCF, "q", X86vpcom, v2i64, SchedWriteVecALU.XMM>;
|
||||
defm VPCOMUB : xopvpcom<0xEC, "ub", X86vpcomu, v16i8, SchedWriteVecALU.XMM>;
|
||||
defm VPCOMUW : xopvpcom<0xED, "uw", X86vpcomu, v8i16, SchedWriteVecALU.XMM>;
|
||||
defm VPCOMUD : xopvpcom<0xEE, "ud", X86vpcomu, v4i32, SchedWriteVecALU.XMM>;
|
||||
defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64, SchedWriteVecALU.XMM>;
|
||||
|
||||
multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType vt128, X86FoldableSchedWrite sched> {
|
||||
def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
|
||||
(vt128 VR128:$src3))))]>,
|
||||
XOP_4V, Sched<[sched]>;
|
||||
def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
|
||||
(vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
|
||||
XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
|
||||
(vt128 VR128:$src3))))]>,
|
||||
XOP_4V, Sched<[sched.Folded, ReadAfterLd,
|
||||
// 128mem:$src2
|
||||
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
|
||||
ReadDefault,
|
||||
// VR128:$src3
|
||||
ReadAfterLd]>;
|
||||
// For disassembler
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
|
||||
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
defm VPPERM : xop4op<0xA3, "vpperm", X86vpperm, v16i8,
|
||||
SchedWriteVarShuffle.XMM>;
|
||||
}
|
||||
|
||||
// Instruction where either second or third source can be memory
|
||||
multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand x86memop, ValueType VT,
|
||||
X86FoldableSchedWrite sched> {
|
||||
def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
|
||||
(X86andnp RC:$src3, RC:$src2))))]>, XOP_4V,
|
||||
Sched<[sched]>;
|
||||
def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, x86memop:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
|
||||
(X86andnp (load addr:$src3), RC:$src2))))]>,
|
||||
XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
|
||||
(X86andnp RC:$src3, (load addr:$src2)))))]>,
|
||||
XOP_4V, Sched<[sched.Folded, ReadAfterLd,
|
||||
// x86memop:$src2
|
||||
ReadDefault, ReadDefault, ReadDefault, ReadDefault,
|
||||
ReadDefault,
|
||||
// RC::$src3
|
||||
ReadAfterLd]>;
|
||||
// For disassembler
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
|
||||
def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
defm VPCMOV : xop4op_int<0xA2, "vpcmov", VR128, i128mem, v2i64,
|
||||
SchedWriteShuffle.XMM>;
|
||||
defm VPCMOVY : xop4op_int<0xA2, "vpcmov", VR256, i256mem, v4i64,
|
||||
SchedWriteShuffle.YMM>, VEX_L;
|
||||
}
|
||||
|
||||
multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
|
||||
X86MemOperand intmemop, X86MemOperand fpmemop,
|
||||
ValueType VT, PatFrag FPLdFrag, PatFrag IntLdFrag,
|
||||
X86FoldableSchedWrite sched> {
|
||||
def rr : IXOP5<Opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
|
||||
[(set RC:$dst,
|
||||
(VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>,
|
||||
Sched<[sched]>;
|
||||
def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
|
||||
[(set RC:$dst,
|
||||
(VT (X86vpermil2 RC:$src1, RC:$src2,
|
||||
(bitconvert (IntLdFrag addr:$src3)),
|
||||
(i8 imm:$src4))))]>, VEX_W,
|
||||
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
|
||||
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
|
||||
[(set RC:$dst,
|
||||
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
|
||||
RC:$src3, (i8 imm:$src4))))]>,
|
||||
Sched<[sched.Folded, ReadAfterLd,
|
||||
// fpmemop:$src2
|
||||
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
|
||||
// RC:$src3
|
||||
ReadAfterLd]>;
|
||||
// For disassembler
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
|
||||
def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
|
||||
[]>, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedDouble in {
|
||||
defm VPERMIL2PD : xop_vpermil2<0x49, "vpermil2pd", VR128, i128mem, f128mem,
|
||||
v2f64, loadv2f64, loadv2i64,
|
||||
SchedWriteFVarShuffle.XMM>;
|
||||
defm VPERMIL2PDY : xop_vpermil2<0x49, "vpermil2pd", VR256, i256mem, f256mem,
|
||||
v4f64, loadv4f64, loadv4i64,
|
||||
SchedWriteFVarShuffle.YMM>, VEX_L;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedSingle in {
|
||||
defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem,
|
||||
v4f32, loadv4f32, loadv2i64,
|
||||
SchedWriteFVarShuffle.XMM>;
|
||||
defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem,
|
||||
v8f32, loadv8f32, loadv4i64,
|
||||
SchedWriteFVarShuffle.YMM>, VEX_L;
|
||||
}
|
||||
|
||||
77
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86PfmCounters.td
vendored
Normal file
77
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86PfmCounters.td
vendored
Normal file
@@ -0,0 +1,77 @@
|
||||
//===-- X86PfmCounters.td - X86 Hardware Counters ----------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This describes the available hardware counters for various subtargets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let SchedModel = SandyBridgeModel in {
|
||||
def SBCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
|
||||
def SBPort0Counter : PfmIssueCounter<SBPort0, ["uops_dispatched_port:port_0"]>;
|
||||
def SBPort1Counter : PfmIssueCounter<SBPort1, ["uops_dispatched_port:port_1"]>;
|
||||
def SBPort23Counter : PfmIssueCounter<SBPort23,
|
||||
["uops_dispatched_port:port_2",
|
||||
"uops_dispatched_port:port_3"]>;
|
||||
def SBPort4Counter : PfmIssueCounter<SBPort4, ["uops_dispatched_port:port_4"]>;
|
||||
def SBPort5Counter : PfmIssueCounter<SBPort5, ["uops_dispatched_port:port_5"]>;
|
||||
}
|
||||
|
||||
let SchedModel = HaswellModel in {
|
||||
def HWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
|
||||
def HWPort0Counter : PfmIssueCounter<HWPort0, ["uops_dispatched_port:port_0"]>;
|
||||
def HWPort1Counter : PfmIssueCounter<HWPort1, ["uops_dispatched_port:port_1"]>;
|
||||
def HWPort2Counter : PfmIssueCounter<HWPort2, ["uops_dispatched_port:port_2"]>;
|
||||
def HWPort3Counter : PfmIssueCounter<HWPort3, ["uops_dispatched_port:port_3"]>;
|
||||
def HWPort4Counter : PfmIssueCounter<HWPort4, ["uops_dispatched_port:port_4"]>;
|
||||
def HWPort5Counter : PfmIssueCounter<HWPort5, ["uops_dispatched_port:port_5"]>;
|
||||
def HWPort6Counter : PfmIssueCounter<HWPort6, ["uops_dispatched_port:port_6"]>;
|
||||
def HWPort7Counter : PfmIssueCounter<HWPort7, ["uops_dispatched_port:port_7"]>;
|
||||
}
|
||||
|
||||
let SchedModel = BroadwellModel in {
|
||||
def BWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
|
||||
def BWPort0Counter : PfmIssueCounter<BWPort0, ["uops_executed_port:port_0"]>;
|
||||
def BWPort1Counter : PfmIssueCounter<BWPort1, ["uops_executed_port:port_1"]>;
|
||||
def BWPort2Counter : PfmIssueCounter<BWPort2, ["uops_executed_port:port_2"]>;
|
||||
def BWPort3Counter : PfmIssueCounter<BWPort3, ["uops_executed_port:port_3"]>;
|
||||
def BWPort4Counter : PfmIssueCounter<BWPort4, ["uops_executed_port:port_4"]>;
|
||||
def BWPort5Counter : PfmIssueCounter<BWPort5, ["uops_executed_port:port_5"]>;
|
||||
def BWPort6Counter : PfmIssueCounter<BWPort6, ["uops_executed_port:port_6"]>;
|
||||
def BWPort7Counter : PfmIssueCounter<BWPort7, ["uops_executed_port:port_7"]>;
|
||||
}
|
||||
|
||||
let SchedModel = SkylakeClientModel in {
|
||||
def SKLCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
|
||||
def SKLPort0Counter : PfmIssueCounter<SKLPort0, ["uops_dispatched_port:port_0"]>;
|
||||
def SKLPort1Counter : PfmIssueCounter<SKLPort1, ["uops_dispatched_port:port_1"]>;
|
||||
def SKLPort2Counter : PfmIssueCounter<SKLPort2, ["uops_dispatched_port:port_2"]>;
|
||||
def SKLPort3Counter : PfmIssueCounter<SKLPort3, ["uops_dispatched_port:port_3"]>;
|
||||
def SKLPort4Counter : PfmIssueCounter<SKLPort4, ["uops_dispatched_port:port_4"]>;
|
||||
def SKLPort5Counter : PfmIssueCounter<SKLPort5, ["uops_dispatched_port:port_5"]>;
|
||||
def SKLPort6Counter : PfmIssueCounter<SKLPort6, ["uops_dispatched_port:port_6"]>;
|
||||
def SKLPort7Counter : PfmIssueCounter<SKLPort7, ["uops_dispatched_port:port_7"]>;
|
||||
}
|
||||
|
||||
let SchedModel = SkylakeServerModel in {
|
||||
def SKXCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
|
||||
def SKXPort0Counter : PfmIssueCounter<SKXPort0, ["uops_dispatched_port:port_0"]>;
|
||||
def SKXPort1Counter : PfmIssueCounter<SKXPort1, ["uops_dispatched_port:port_1"]>;
|
||||
def SKXPort2Counter : PfmIssueCounter<SKXPort2, ["uops_dispatched_port:port_2"]>;
|
||||
def SKXPort3Counter : PfmIssueCounter<SKXPort3, ["uops_dispatched_port:port_3"]>;
|
||||
def SKXPort4Counter : PfmIssueCounter<SKXPort4, ["uops_dispatched_port:port_4"]>;
|
||||
def SKXPort5Counter : PfmIssueCounter<SKXPort5, ["uops_dispatched_port:port_5"]>;
|
||||
def SKXPort6Counter : PfmIssueCounter<SKXPort6, ["uops_dispatched_port:port_6"]>;
|
||||
def SKXPort7Counter : PfmIssueCounter<SKXPort7, ["uops_dispatched_port:port_7"]>;
|
||||
}
|
||||
|
||||
let SchedModel = BtVer2Model in {
|
||||
def JCycleCounter : PfmCycleCounter<"cpu_clk_unhalted">;
|
||||
def JFPU0Counter : PfmIssueCounter<JFPU0, ["dispatched_fpu:pipe0"]>;
|
||||
def JFPU1Counter : PfmIssueCounter<JFPU1, ["dispatched_fpu:pipe1"]>;
|
||||
}
|
||||
17
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86RegisterBanks.td
vendored
Normal file
17
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86RegisterBanks.td
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
//=- X86RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// General Purpose Registers: RAX, RCX,...
|
||||
def GPRRegBank : RegisterBank<"GPR", [GR64]>;
|
||||
|
||||
/// Floating Point/Vector Registers
|
||||
def VECRRegBank : RegisterBank<"VECR", [VR512]>;
|
||||
591
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86RegisterInfo.td
vendored
Normal file
591
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86RegisterInfo.td
vendored
Normal file
@@ -0,0 +1,591 @@
|
||||
//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the X86 Register file, defining the registers themselves,
|
||||
// aliases between the registers, and the register classes built out of the
|
||||
// registers.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class X86Reg<string n, bits<16> Enc, list<Register> subregs = []> : Register<n> {
|
||||
let Namespace = "X86";
|
||||
let HWEncoding = Enc;
|
||||
let SubRegs = subregs;
|
||||
}
|
||||
|
||||
// Subregister indices.
|
||||
let Namespace = "X86" in {
|
||||
def sub_8bit : SubRegIndex<8>;
|
||||
def sub_8bit_hi : SubRegIndex<8, 8>;
|
||||
def sub_8bit_hi_phony : SubRegIndex<8, 8>;
|
||||
def sub_16bit : SubRegIndex<16>;
|
||||
def sub_16bit_hi : SubRegIndex<16, 16>;
|
||||
def sub_32bit : SubRegIndex<32>;
|
||||
def sub_xmm : SubRegIndex<128>;
|
||||
def sub_ymm : SubRegIndex<256>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register definitions...
|
||||
//
|
||||
|
||||
// In the register alias definitions below, we define which registers alias
|
||||
// which others. We only specify which registers the small registers alias,
|
||||
// because the register file generator is smart enough to figure out that
|
||||
// AL aliases AX if we tell it that AX aliased AL (for example).
|
||||
|
||||
// Dwarf numbering is different for 32-bit and 64-bit, and there are
|
||||
// variations by target as well. Currently the first entry is for X86-64,
|
||||
// second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
|
||||
// and debug information on X86-32/Darwin)
|
||||
|
||||
// 8-bit registers
|
||||
// Low registers
|
||||
def AL : X86Reg<"al", 0>;
|
||||
def DL : X86Reg<"dl", 2>;
|
||||
def CL : X86Reg<"cl", 1>;
|
||||
def BL : X86Reg<"bl", 3>;
|
||||
|
||||
// High registers. On x86-64, these cannot be used in any instruction
|
||||
// with a REX prefix.
|
||||
def AH : X86Reg<"ah", 4>;
|
||||
def DH : X86Reg<"dh", 6>;
|
||||
def CH : X86Reg<"ch", 5>;
|
||||
def BH : X86Reg<"bh", 7>;
|
||||
|
||||
// X86-64 only, requires REX.
|
||||
let CostPerUse = 1 in {
|
||||
def SIL : X86Reg<"sil", 6>;
|
||||
def DIL : X86Reg<"dil", 7>;
|
||||
def BPL : X86Reg<"bpl", 5>;
|
||||
def SPL : X86Reg<"spl", 4>;
|
||||
def R8B : X86Reg<"r8b", 8>;
|
||||
def R9B : X86Reg<"r9b", 9>;
|
||||
def R10B : X86Reg<"r10b", 10>;
|
||||
def R11B : X86Reg<"r11b", 11>;
|
||||
def R12B : X86Reg<"r12b", 12>;
|
||||
def R13B : X86Reg<"r13b", 13>;
|
||||
def R14B : X86Reg<"r14b", 14>;
|
||||
def R15B : X86Reg<"r15b", 15>;
|
||||
}
|
||||
|
||||
let isArtificial = 1 in {
|
||||
// High byte of the low 16 bits of the super-register:
|
||||
def SIH : X86Reg<"", -1>;
|
||||
def DIH : X86Reg<"", -1>;
|
||||
def BPH : X86Reg<"", -1>;
|
||||
def SPH : X86Reg<"", -1>;
|
||||
def R8BH : X86Reg<"", -1>;
|
||||
def R9BH : X86Reg<"", -1>;
|
||||
def R10BH : X86Reg<"", -1>;
|
||||
def R11BH : X86Reg<"", -1>;
|
||||
def R12BH : X86Reg<"", -1>;
|
||||
def R13BH : X86Reg<"", -1>;
|
||||
def R14BH : X86Reg<"", -1>;
|
||||
def R15BH : X86Reg<"", -1>;
|
||||
// High word of the low 32 bits of the super-register:
|
||||
def HAX : X86Reg<"", -1>;
|
||||
def HDX : X86Reg<"", -1>;
|
||||
def HCX : X86Reg<"", -1>;
|
||||
def HBX : X86Reg<"", -1>;
|
||||
def HSI : X86Reg<"", -1>;
|
||||
def HDI : X86Reg<"", -1>;
|
||||
def HBP : X86Reg<"", -1>;
|
||||
def HSP : X86Reg<"", -1>;
|
||||
def HIP : X86Reg<"", -1>;
|
||||
def R8WH : X86Reg<"", -1>;
|
||||
def R9WH : X86Reg<"", -1>;
|
||||
def R10WH : X86Reg<"", -1>;
|
||||
def R11WH : X86Reg<"", -1>;
|
||||
def R12WH : X86Reg<"", -1>;
|
||||
def R13WH : X86Reg<"", -1>;
|
||||
def R14WH : X86Reg<"", -1>;
|
||||
def R15WH : X86Reg<"", -1>;
|
||||
}
|
||||
|
||||
// 16-bit registers
|
||||
let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in {
|
||||
def AX : X86Reg<"ax", 0, [AL,AH]>;
|
||||
def DX : X86Reg<"dx", 2, [DL,DH]>;
|
||||
def CX : X86Reg<"cx", 1, [CL,CH]>;
|
||||
def BX : X86Reg<"bx", 3, [BL,BH]>;
|
||||
}
|
||||
let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CoveredBySubRegs = 1 in {
|
||||
def SI : X86Reg<"si", 6, [SIL,SIH]>;
|
||||
def DI : X86Reg<"di", 7, [DIL,DIH]>;
|
||||
def BP : X86Reg<"bp", 5, [BPL,BPH]>;
|
||||
def SP : X86Reg<"sp", 4, [SPL,SPH]>;
|
||||
}
|
||||
def IP : X86Reg<"ip", 0>;
|
||||
|
||||
// X86-64 only, requires REX.
|
||||
let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CostPerUse = 1,
|
||||
CoveredBySubRegs = 1 in {
|
||||
def R8W : X86Reg<"r8w", 8, [R8B,R8BH]>;
|
||||
def R9W : X86Reg<"r9w", 9, [R9B,R9BH]>;
|
||||
def R10W : X86Reg<"r10w", 10, [R10B,R10BH]>;
|
||||
def R11W : X86Reg<"r11w", 11, [R11B,R11BH]>;
|
||||
def R12W : X86Reg<"r12w", 12, [R12B,R12BH]>;
|
||||
def R13W : X86Reg<"r13w", 13, [R13B,R13BH]>;
|
||||
def R14W : X86Reg<"r14w", 14, [R14B,R14BH]>;
|
||||
def R15W : X86Reg<"r15w", 15, [R15B,R15BH]>;
|
||||
}
|
||||
|
||||
// 32-bit registers
|
||||
let SubRegIndices = [sub_16bit, sub_16bit_hi], CoveredBySubRegs = 1 in {
|
||||
def EAX : X86Reg<"eax", 0, [AX, HAX]>, DwarfRegNum<[-2, 0, 0]>;
|
||||
def EDX : X86Reg<"edx", 2, [DX, HDX]>, DwarfRegNum<[-2, 2, 2]>;
|
||||
def ECX : X86Reg<"ecx", 1, [CX, HCX]>, DwarfRegNum<[-2, 1, 1]>;
|
||||
def EBX : X86Reg<"ebx", 3, [BX, HBX]>, DwarfRegNum<[-2, 3, 3]>;
|
||||
def ESI : X86Reg<"esi", 6, [SI, HSI]>, DwarfRegNum<[-2, 6, 6]>;
|
||||
def EDI : X86Reg<"edi", 7, [DI, HDI]>, DwarfRegNum<[-2, 7, 7]>;
|
||||
def EBP : X86Reg<"ebp", 5, [BP, HBP]>, DwarfRegNum<[-2, 4, 5]>;
|
||||
def ESP : X86Reg<"esp", 4, [SP, HSP]>, DwarfRegNum<[-2, 5, 4]>;
|
||||
def EIP : X86Reg<"eip", 0, [IP, HIP]>, DwarfRegNum<[-2, 8, 8]>;
|
||||
}
|
||||
|
||||
// X86-64 only, requires REX
|
||||
let SubRegIndices = [sub_16bit, sub_16bit_hi], CostPerUse = 1,
|
||||
CoveredBySubRegs = 1 in {
|
||||
def R8D : X86Reg<"r8d", 8, [R8W,R8WH]>;
|
||||
def R9D : X86Reg<"r9d", 9, [R9W,R9WH]>;
|
||||
def R10D : X86Reg<"r10d", 10, [R10W,R10WH]>;
|
||||
def R11D : X86Reg<"r11d", 11, [R11W,R11WH]>;
|
||||
def R12D : X86Reg<"r12d", 12, [R12W,R12WH]>;
|
||||
def R13D : X86Reg<"r13d", 13, [R13W,R13WH]>;
|
||||
def R14D : X86Reg<"r14d", 14, [R14W,R14WH]>;
|
||||
def R15D : X86Reg<"r15d", 15, [R15W,R15WH]>;
|
||||
}
|
||||
|
||||
// 64-bit registers, X86-64 only
|
||||
let SubRegIndices = [sub_32bit] in {
|
||||
def RAX : X86Reg<"rax", 0, [EAX]>, DwarfRegNum<[0, -2, -2]>;
|
||||
def RDX : X86Reg<"rdx", 2, [EDX]>, DwarfRegNum<[1, -2, -2]>;
|
||||
def RCX : X86Reg<"rcx", 1, [ECX]>, DwarfRegNum<[2, -2, -2]>;
|
||||
def RBX : X86Reg<"rbx", 3, [EBX]>, DwarfRegNum<[3, -2, -2]>;
|
||||
def RSI : X86Reg<"rsi", 6, [ESI]>, DwarfRegNum<[4, -2, -2]>;
|
||||
def RDI : X86Reg<"rdi", 7, [EDI]>, DwarfRegNum<[5, -2, -2]>;
|
||||
def RBP : X86Reg<"rbp", 5, [EBP]>, DwarfRegNum<[6, -2, -2]>;
|
||||
def RSP : X86Reg<"rsp", 4, [ESP]>, DwarfRegNum<[7, -2, -2]>;
|
||||
|
||||
// These also require REX.
|
||||
let CostPerUse = 1 in {
|
||||
def R8 : X86Reg<"r8", 8, [R8D]>, DwarfRegNum<[ 8, -2, -2]>;
|
||||
def R9 : X86Reg<"r9", 9, [R9D]>, DwarfRegNum<[ 9, -2, -2]>;
|
||||
def R10 : X86Reg<"r10", 10, [R10D]>, DwarfRegNum<[10, -2, -2]>;
|
||||
def R11 : X86Reg<"r11", 11, [R11D]>, DwarfRegNum<[11, -2, -2]>;
|
||||
def R12 : X86Reg<"r12", 12, [R12D]>, DwarfRegNum<[12, -2, -2]>;
|
||||
def R13 : X86Reg<"r13", 13, [R13D]>, DwarfRegNum<[13, -2, -2]>;
|
||||
def R14 : X86Reg<"r14", 14, [R14D]>, DwarfRegNum<[14, -2, -2]>;
|
||||
def R15 : X86Reg<"r15", 15, [R15D]>, DwarfRegNum<[15, -2, -2]>;
|
||||
def RIP : X86Reg<"rip", 0, [EIP]>, DwarfRegNum<[16, -2, -2]>;
|
||||
}}
|
||||
|
||||
// MMX Registers. These are actually aliased to ST0 .. ST7
|
||||
def MM0 : X86Reg<"mm0", 0>, DwarfRegNum<[41, 29, 29]>;
|
||||
def MM1 : X86Reg<"mm1", 1>, DwarfRegNum<[42, 30, 30]>;
|
||||
def MM2 : X86Reg<"mm2", 2>, DwarfRegNum<[43, 31, 31]>;
|
||||
def MM3 : X86Reg<"mm3", 3>, DwarfRegNum<[44, 32, 32]>;
|
||||
def MM4 : X86Reg<"mm4", 4>, DwarfRegNum<[45, 33, 33]>;
|
||||
def MM5 : X86Reg<"mm5", 5>, DwarfRegNum<[46, 34, 34]>;
|
||||
def MM6 : X86Reg<"mm6", 6>, DwarfRegNum<[47, 35, 35]>;
|
||||
def MM7 : X86Reg<"mm7", 7>, DwarfRegNum<[48, 36, 36]>;
|
||||
|
||||
// Pseudo Floating Point registers
|
||||
def FP0 : X86Reg<"fp0", 0>;
|
||||
def FP1 : X86Reg<"fp1", 0>;
|
||||
def FP2 : X86Reg<"fp2", 0>;
|
||||
def FP3 : X86Reg<"fp3", 0>;
|
||||
def FP4 : X86Reg<"fp4", 0>;
|
||||
def FP5 : X86Reg<"fp5", 0>;
|
||||
def FP6 : X86Reg<"fp6", 0>;
|
||||
def FP7 : X86Reg<"fp7", 0>;
|
||||
|
||||
// XMM Registers, used by the various SSE instruction set extensions.
|
||||
def XMM0: X86Reg<"xmm0", 0>, DwarfRegNum<[17, 21, 21]>;
|
||||
def XMM1: X86Reg<"xmm1", 1>, DwarfRegNum<[18, 22, 22]>;
|
||||
def XMM2: X86Reg<"xmm2", 2>, DwarfRegNum<[19, 23, 23]>;
|
||||
def XMM3: X86Reg<"xmm3", 3>, DwarfRegNum<[20, 24, 24]>;
|
||||
def XMM4: X86Reg<"xmm4", 4>, DwarfRegNum<[21, 25, 25]>;
|
||||
def XMM5: X86Reg<"xmm5", 5>, DwarfRegNum<[22, 26, 26]>;
|
||||
def XMM6: X86Reg<"xmm6", 6>, DwarfRegNum<[23, 27, 27]>;
|
||||
def XMM7: X86Reg<"xmm7", 7>, DwarfRegNum<[24, 28, 28]>;
|
||||
|
||||
// X86-64 only
|
||||
let CostPerUse = 1 in {
|
||||
def XMM8: X86Reg<"xmm8", 8>, DwarfRegNum<[25, -2, -2]>;
|
||||
def XMM9: X86Reg<"xmm9", 9>, DwarfRegNum<[26, -2, -2]>;
|
||||
def XMM10: X86Reg<"xmm10", 10>, DwarfRegNum<[27, -2, -2]>;
|
||||
def XMM11: X86Reg<"xmm11", 11>, DwarfRegNum<[28, -2, -2]>;
|
||||
def XMM12: X86Reg<"xmm12", 12>, DwarfRegNum<[29, -2, -2]>;
|
||||
def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>;
|
||||
def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>;
|
||||
def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>;
|
||||
|
||||
def XMM16: X86Reg<"xmm16", 16>, DwarfRegNum<[67, -2, -2]>;
|
||||
def XMM17: X86Reg<"xmm17", 17>, DwarfRegNum<[68, -2, -2]>;
|
||||
def XMM18: X86Reg<"xmm18", 18>, DwarfRegNum<[69, -2, -2]>;
|
||||
def XMM19: X86Reg<"xmm19", 19>, DwarfRegNum<[70, -2, -2]>;
|
||||
def XMM20: X86Reg<"xmm20", 20>, DwarfRegNum<[71, -2, -2]>;
|
||||
def XMM21: X86Reg<"xmm21", 21>, DwarfRegNum<[72, -2, -2]>;
|
||||
def XMM22: X86Reg<"xmm22", 22>, DwarfRegNum<[73, -2, -2]>;
|
||||
def XMM23: X86Reg<"xmm23", 23>, DwarfRegNum<[74, -2, -2]>;
|
||||
def XMM24: X86Reg<"xmm24", 24>, DwarfRegNum<[75, -2, -2]>;
|
||||
def XMM25: X86Reg<"xmm25", 25>, DwarfRegNum<[76, -2, -2]>;
|
||||
def XMM26: X86Reg<"xmm26", 26>, DwarfRegNum<[77, -2, -2]>;
|
||||
def XMM27: X86Reg<"xmm27", 27>, DwarfRegNum<[78, -2, -2]>;
|
||||
def XMM28: X86Reg<"xmm28", 28>, DwarfRegNum<[79, -2, -2]>;
|
||||
def XMM29: X86Reg<"xmm29", 29>, DwarfRegNum<[80, -2, -2]>;
|
||||
def XMM30: X86Reg<"xmm30", 30>, DwarfRegNum<[81, -2, -2]>;
|
||||
def XMM31: X86Reg<"xmm31", 31>, DwarfRegNum<[82, -2, -2]>;
|
||||
|
||||
} // CostPerUse
|
||||
|
||||
// YMM0-15 registers, used by AVX instructions and
|
||||
// YMM16-31 registers, used by AVX-512 instructions.
|
||||
let SubRegIndices = [sub_xmm] in {
|
||||
foreach Index = 0-31 in {
|
||||
def YMM#Index : X86Reg<"ymm"#Index, Index, [!cast<X86Reg>("XMM"#Index)]>,
|
||||
DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
|
||||
}
|
||||
}
|
||||
|
||||
// ZMM Registers, used by AVX-512 instructions.
|
||||
let SubRegIndices = [sub_ymm] in {
|
||||
foreach Index = 0-31 in {
|
||||
def ZMM#Index : X86Reg<"zmm"#Index, Index, [!cast<X86Reg>("YMM"#Index)]>,
|
||||
DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
|
||||
}
|
||||
}
|
||||
|
||||
// Mask Registers, used by AVX-512 instructions.
|
||||
def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, 93, 93]>;
|
||||
def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, 94, 94]>;
|
||||
def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, 95, 95]>;
|
||||
def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, 96, 96]>;
|
||||
def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, 97, 97]>;
|
||||
def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, 98, 98]>;
|
||||
def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, 99, 99]>;
|
||||
def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, 100, 100]>;
|
||||
|
||||
// Floating point stack registers. These don't map one-to-one to the FP
|
||||
// pseudo registers, but we still mark them as aliasing FP registers. That
|
||||
// way both kinds can be live without exceeding the stack depth. ST registers
|
||||
// are only live around inline assembly.
|
||||
def ST0 : X86Reg<"st(0)", 0>, DwarfRegNum<[33, 12, 11]>;
|
||||
def ST1 : X86Reg<"st(1)", 1>, DwarfRegNum<[34, 13, 12]>;
|
||||
def ST2 : X86Reg<"st(2)", 2>, DwarfRegNum<[35, 14, 13]>;
|
||||
def ST3 : X86Reg<"st(3)", 3>, DwarfRegNum<[36, 15, 14]>;
|
||||
def ST4 : X86Reg<"st(4)", 4>, DwarfRegNum<[37, 16, 15]>;
|
||||
def ST5 : X86Reg<"st(5)", 5>, DwarfRegNum<[38, 17, 16]>;
|
||||
def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]>;
|
||||
def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
|
||||
|
||||
// Floating-point status word
|
||||
def FPSW : X86Reg<"fpsw", 0>;
|
||||
|
||||
// Status flags register.
|
||||
//
|
||||
// Note that some flags that are commonly thought of as part of the status
|
||||
// flags register are modeled separately. Typically this is due to instructions
|
||||
// reading and updating those flags independently of all the others. We don't
|
||||
// want to create false dependencies between these instructions and so we use
|
||||
// a separate register to model them.
|
||||
def EFLAGS : X86Reg<"flags", 0>;
|
||||
|
||||
// The direction flag.
|
||||
def DF : X86Reg<"dirflag", 0>;
|
||||
|
||||
|
||||
// Segment registers
|
||||
def CS : X86Reg<"cs", 1>;
|
||||
def DS : X86Reg<"ds", 3>;
|
||||
def SS : X86Reg<"ss", 2>;
|
||||
def ES : X86Reg<"es", 0>;
|
||||
def FS : X86Reg<"fs", 4>;
|
||||
def GS : X86Reg<"gs", 5>;
|
||||
|
||||
// Debug registers
|
||||
def DR0 : X86Reg<"dr0", 0>;
|
||||
def DR1 : X86Reg<"dr1", 1>;
|
||||
def DR2 : X86Reg<"dr2", 2>;
|
||||
def DR3 : X86Reg<"dr3", 3>;
|
||||
def DR4 : X86Reg<"dr4", 4>;
|
||||
def DR5 : X86Reg<"dr5", 5>;
|
||||
def DR6 : X86Reg<"dr6", 6>;
|
||||
def DR7 : X86Reg<"dr7", 7>;
|
||||
def DR8 : X86Reg<"dr8", 8>;
|
||||
def DR9 : X86Reg<"dr9", 9>;
|
||||
def DR10 : X86Reg<"dr10", 10>;
|
||||
def DR11 : X86Reg<"dr11", 11>;
|
||||
def DR12 : X86Reg<"dr12", 12>;
|
||||
def DR13 : X86Reg<"dr13", 13>;
|
||||
def DR14 : X86Reg<"dr14", 14>;
|
||||
def DR15 : X86Reg<"dr15", 15>;
|
||||
|
||||
// Control registers
|
||||
def CR0 : X86Reg<"cr0", 0>;
|
||||
def CR1 : X86Reg<"cr1", 1>;
|
||||
def CR2 : X86Reg<"cr2", 2>;
|
||||
def CR3 : X86Reg<"cr3", 3>;
|
||||
def CR4 : X86Reg<"cr4", 4>;
|
||||
def CR5 : X86Reg<"cr5", 5>;
|
||||
def CR6 : X86Reg<"cr6", 6>;
|
||||
def CR7 : X86Reg<"cr7", 7>;
|
||||
def CR8 : X86Reg<"cr8", 8>;
|
||||
def CR9 : X86Reg<"cr9", 9>;
|
||||
def CR10 : X86Reg<"cr10", 10>;
|
||||
def CR11 : X86Reg<"cr11", 11>;
|
||||
def CR12 : X86Reg<"cr12", 12>;
|
||||
def CR13 : X86Reg<"cr13", 13>;
|
||||
def CR14 : X86Reg<"cr14", 14>;
|
||||
def CR15 : X86Reg<"cr15", 15>;
|
||||
|
||||
// Pseudo index registers
|
||||
def EIZ : X86Reg<"eiz", 4>;
|
||||
def RIZ : X86Reg<"riz", 4>;
|
||||
|
||||
// Bound registers, used in MPX instructions
|
||||
def BND0 : X86Reg<"bnd0", 0>;
|
||||
def BND1 : X86Reg<"bnd1", 1>;
|
||||
def BND2 : X86Reg<"bnd2", 2>;
|
||||
def BND3 : X86Reg<"bnd3", 3>;
|
||||
|
||||
// CET registers - Shadow Stack Pointer
|
||||
def SSP : X86Reg<"ssp", 0>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register Class Definitions... now that we have all of the pieces, define the
|
||||
// top-level register classes. The order specified in the register list is
|
||||
// implicitly defined to be the register allocation order.
|
||||
//
|
||||
|
||||
// List call-clobbered registers before callee-save registers. RBX, RBP, (and
|
||||
// R12, R13, R14, and R15 for X86-64) are callee-save registers.
|
||||
// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
|
||||
// R8B, ... R15B.
|
||||
// Allocate R12 and R13 last, as these require an extra byte when
|
||||
// encoded in x86_64 instructions.
|
||||
// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
|
||||
// 64-bit mode. The main complication is that they cannot be encoded in an
|
||||
// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
|
||||
// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
|
||||
// cannot be encoded.
|
||||
def GR8 : RegisterClass<"X86", [i8], 8,
|
||||
(add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
|
||||
R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
|
||||
let AltOrders = [(sub GR8, AH, BH, CH, DH)];
|
||||
let AltOrderSelect = [{
|
||||
return MF.getSubtarget<X86Subtarget>().is64Bit();
|
||||
}];
|
||||
}
|
||||
|
||||
let isAllocatable = 0 in
|
||||
def GRH8 : RegisterClass<"X86", [i8], 8,
|
||||
(add SIH, DIH, BPH, SPH, R8BH, R9BH, R10BH, R11BH,
|
||||
R12BH, R13BH, R14BH, R15BH)>;
|
||||
|
||||
def GR16 : RegisterClass<"X86", [i16], 16,
|
||||
(add AX, CX, DX, SI, DI, BX, BP, SP,
|
||||
R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)>;
|
||||
|
||||
let isAllocatable = 0 in
|
||||
def GRH16 : RegisterClass<"X86", [i16], 16,
|
||||
(add HAX, HCX, HDX, HSI, HDI, HBX, HBP, HSP, HIP,
|
||||
R8WH, R9WH, R10WH, R11WH, R12WH, R13WH, R14WH,
|
||||
R15WH)>;
|
||||
|
||||
def GR32 : RegisterClass<"X86", [i32], 32,
|
||||
(add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
|
||||
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>;
|
||||
|
||||
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
|
||||
// RIP isn't really a register and it can't be used anywhere except in an
|
||||
// address, but it doesn't cause trouble.
|
||||
// FIXME: it *does* cause trouble - CheckBaseRegAndIndexReg() has extra
|
||||
// tests because of the inclusion of RIP in this register class.
|
||||
def GR64 : RegisterClass<"X86", [i64], 64,
|
||||
(add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
|
||||
RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
|
||||
|
||||
// Segment registers for use by MOV instructions (and others) that have a
|
||||
// segment register as one operand. Always contain a 16-bit segment
|
||||
// descriptor.
|
||||
def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>;
|
||||
|
||||
// Debug registers.
|
||||
def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 15)>;
|
||||
|
||||
// Control registers.
|
||||
def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>;
|
||||
|
||||
// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
|
||||
// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
|
||||
// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers
|
||||
// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
|
||||
// and GR64_ABCD are classes for registers that support 8-bit h-register
|
||||
// operations.
|
||||
def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>;
|
||||
def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
|
||||
def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>;
|
||||
def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>;
|
||||
def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>;
|
||||
def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
|
||||
def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
|
||||
R8, R9, R11, RIP)>;
|
||||
def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
|
||||
R8, R9, R10, R11, RIP)>;
|
||||
|
||||
// GR8_NOREX - GR8 registers which do not require a REX prefix.
|
||||
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
|
||||
(add AL, CL, DL, AH, CH, DH, BL, BH)> {
|
||||
let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
|
||||
let AltOrderSelect = [{
|
||||
return MF.getSubtarget<X86Subtarget>().is64Bit();
|
||||
}];
|
||||
}
|
||||
// GR16_NOREX - GR16 registers which do not require a REX prefix.
|
||||
def GR16_NOREX : RegisterClass<"X86", [i16], 16,
|
||||
(add AX, CX, DX, SI, DI, BX, BP, SP)>;
|
||||
// GR32_NOREX - GR32 registers which do not require a REX prefix.
|
||||
def GR32_NOREX : RegisterClass<"X86", [i32], 32,
|
||||
(add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)>;
|
||||
// GR64_NOREX - GR64 registers which do not require a REX prefix.
|
||||
def GR64_NOREX : RegisterClass<"X86", [i64], 64,
|
||||
(add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>;
|
||||
|
||||
// GR32_NOSP - GR32 registers except ESP.
|
||||
def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>;
|
||||
|
||||
// GR64_NOSP - GR64 registers except RSP (and RIP).
|
||||
def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)>;
|
||||
|
||||
// GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
|
||||
// ESP.
|
||||
def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
|
||||
(and GR32_NOREX, GR32_NOSP)>;
|
||||
|
||||
// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
|
||||
def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
|
||||
(and GR64_NOREX, GR64_NOSP)>;
|
||||
|
||||
// Register classes used for ABIs that use 32-bit address accesses,
|
||||
// while using the whole x84_64 ISA.
|
||||
|
||||
// In such cases, it is fine to use RIP as we are sure the 32 high
|
||||
// bits are not set. We do not need variants for NOSP as RIP is not
|
||||
// allowed there.
|
||||
// RIP is not spilled anywhere for now, so stick to 32-bit alignment
|
||||
// to save on memory space.
|
||||
// FIXME: We could allow all 64bit registers, but we would need
|
||||
// something to check that the 32 high bits are not set,
|
||||
// which we do not have right now.
|
||||
def LOW32_ADDR_ACCESS : RegisterClass<"X86", [i32], 32, (add GR32, RIP)>;
|
||||
|
||||
// When RBP is used as a base pointer in a 32-bit addresses environment,
|
||||
// this is also safe to use the full register to access addresses.
|
||||
// Since RBP will never be spilled, stick to a 32 alignment to save
|
||||
// on memory consumption.
|
||||
def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32,
|
||||
(add LOW32_ADDR_ACCESS, RBP)>;
|
||||
|
||||
// A class to support the 'A' assembler constraint: [ER]AX then [ER]DX.
|
||||
def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
|
||||
def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>;
|
||||
|
||||
// Scalar SSE2 floating point registers.
|
||||
def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
|
||||
|
||||
def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
|
||||
|
||||
|
||||
// FIXME: This sets up the floating point register files as though they are f64
|
||||
// values, though they really are f80 values. This will cause us to spill
|
||||
// values as 64-bit quantities instead of 80-bit quantities, which is much much
|
||||
// faster on common hardware. In reality, this should be controlled by a
|
||||
// command line option or something.
|
||||
|
||||
def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
|
||||
def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
|
||||
def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
|
||||
|
||||
// Floating point stack registers (these are not allocatable by the
|
||||
// register allocator - the floating point stackifier is responsible
|
||||
// for transforming FPn allocations to STn registers)
|
||||
def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
// Generic vector registers: VR64 and VR128.
|
||||
// Ensure that float types are declared first - only float is legal on SSE1.
|
||||
def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
|
||||
def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
|
||||
128, (add FR32)>;
|
||||
def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
|
||||
256, (sequence "YMM%u", 0, 15)>;
|
||||
|
||||
// Special classes that help the assembly parser choose some alternate
|
||||
// instructions to favor 2-byte VEX encodings.
|
||||
def VR128L : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
|
||||
128, (sequence "XMM%u", 0, 7)>;
|
||||
def VR128H : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
|
||||
128, (sequence "XMM%u", 8, 15)>;
|
||||
def VR256L : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
|
||||
256, (sequence "YMM%u", 0, 7)>;
|
||||
def VR256H : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
|
||||
256, (sequence "YMM%u", 8, 15)>;
|
||||
|
||||
// Status flags registers.
|
||||
def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
|
||||
let CopyCost = -1; // Don't allow copying of status registers.
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
|
||||
let CopyCost = -1; // Don't allow copying of status registers.
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> {
|
||||
let CopyCost = -1; // Don't allow copying of status registers.
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
// AVX-512 vector/mask registers.
|
||||
def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
|
||||
512, (sequence "ZMM%u", 0, 31)>;
|
||||
|
||||
// Scalar AVX-512 floating point registers.
|
||||
def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
|
||||
|
||||
def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
|
||||
|
||||
// Extended VR128 and VR256 for AVX-512 instructions
|
||||
def VR128X : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
|
||||
128, (add FR32X)>;
|
||||
def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
|
||||
256, (sequence "YMM%u", 0, 31)>;
|
||||
|
||||
// Mask registers
|
||||
def VK1 : RegisterClass<"X86", [v1i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;}
|
||||
def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;}
|
||||
def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;}
|
||||
def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;}
|
||||
def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
|
||||
def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
|
||||
def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
|
||||
|
||||
def VK1WM : RegisterClass<"X86", [v1i1], 16, (sub VK1, K0)> {let Size = 16;}
|
||||
def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;}
|
||||
def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;}
|
||||
def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;}
|
||||
def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
|
||||
def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
|
||||
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
|
||||
|
||||
// Bound registers
|
||||
def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
|
||||
1692
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedBroadwell.td
vendored
Normal file
1692
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedBroadwell.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1975
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedHaswell.td
vendored
Normal file
1975
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedHaswell.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
49
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedPredicates.td
vendored
Normal file
49
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedPredicates.td
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
//===-- X86SchedPredicates.td - X86 Scheduling Predicates --*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines scheduling predicate definitions that are common to
|
||||
// all X86 subtargets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// A predicate used to identify dependency-breaking instructions that clear the
|
||||
// content of the destination register. Note that this predicate only checks if
|
||||
// input registers are the same. This predicate doesn't make any assumptions on
|
||||
// the expected instruction opcodes, because different processors may implement
|
||||
// different zero-idioms.
|
||||
def ZeroIdiomPredicate : CheckSameRegOperand<1, 2>;
|
||||
|
||||
// A predicate used to check if an instruction is a LEA, and if it uses all
|
||||
// three source operands: base, index, and offset.
|
||||
def IsThreeOperandsLEAPredicate: CheckAll<[
|
||||
CheckOpcode<[LEA32r, LEA64r, LEA64_32r, LEA16r]>,
|
||||
|
||||
// isRegOperand(Base)
|
||||
CheckIsRegOperand<1>,
|
||||
CheckNot<CheckInvalidRegOperand<1>>,
|
||||
|
||||
// isRegOperand(Index)
|
||||
CheckIsRegOperand<3>,
|
||||
CheckNot<CheckInvalidRegOperand<3>>,
|
||||
|
||||
// hasLEAOffset(Offset)
|
||||
CheckAny<[
|
||||
CheckAll<[
|
||||
CheckIsImmOperand<4>,
|
||||
CheckNot<CheckZeroOperand<4>>
|
||||
]>,
|
||||
CheckNonPortable<"MI.getOperand(4).isGlobal()">
|
||||
]>
|
||||
]>;
|
||||
|
||||
// This predicate evaluates to true only if the input machine instruction is a
|
||||
// 3-operands LEA. Tablegen automatically generates a new method for it in
|
||||
// X86GenInstrInfo.
|
||||
def IsThreeOperandsLEAFn :
|
||||
TIIPredicate<"X86", "isThreeOperandsLEA", IsThreeOperandsLEAPredicate>;
|
||||
1159
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedSandyBridge.td
vendored
Normal file
1159
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedSandyBridge.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1850
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedSkylakeClient.td
vendored
Normal file
1850
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedSkylakeClient.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2580
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedSkylakeServer.td
vendored
Normal file
2580
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedSkylakeServer.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
661
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86Schedule.td
vendored
Normal file
661
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86Schedule.td
vendored
Normal file
@@ -0,0 +1,661 @@
|
||||
//===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// InstrSchedModel annotations for out-of-order CPUs.
|
||||
|
||||
// Instructions with folded loads need to read the memory operand immediately,
|
||||
// but other register operands don't have to be read until the load is ready.
|
||||
// These operands are marked with ReadAfterLd.
|
||||
def ReadAfterLd : SchedRead;
|
||||
|
||||
// Instructions with both a load and a store folded are modeled as a folded
|
||||
// load + WriteRMW.
|
||||
def WriteRMW : SchedWrite;
|
||||
|
||||
// Helper to set SchedWrite ExePorts/Latency/ResourceCycles/NumMicroOps.
|
||||
multiclass X86WriteRes<SchedWrite SchedRW,
|
||||
list<ProcResourceKind> ExePorts,
|
||||
int Lat, list<int> Res, int UOps> {
|
||||
def : WriteRes<SchedRW, ExePorts> {
|
||||
let Latency = Lat;
|
||||
let ResourceCycles = Res;
|
||||
let NumMicroOps = UOps;
|
||||
}
|
||||
}
|
||||
|
||||
// Most instructions can fold loads, so almost every SchedWrite comes in two
|
||||
// variants: With and without a folded load.
|
||||
// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
|
||||
// with a folded load.
|
||||
class X86FoldableSchedWrite : SchedWrite {
|
||||
// The SchedWrite to use when a load is folded into the instruction.
|
||||
SchedWrite Folded;
|
||||
}
|
||||
|
||||
// Multiclass that produces a linked pair of SchedWrites.
|
||||
multiclass X86SchedWritePair {
|
||||
// Register-Memory operation.
|
||||
def Ld : SchedWrite;
|
||||
// Register-Register operation.
|
||||
def NAME : X86FoldableSchedWrite {
|
||||
let Folded = !cast<SchedWrite>(NAME#"Ld");
|
||||
}
|
||||
}
|
||||
|
||||
// Helpers to mark SchedWrites as unsupported.
|
||||
multiclass X86WriteResUnsupported<SchedWrite SchedRW> {
|
||||
let Unsupported = 1 in {
|
||||
def : WriteRes<SchedRW, []>;
|
||||
}
|
||||
}
|
||||
multiclass X86WriteResPairUnsupported<X86FoldableSchedWrite SchedRW> {
|
||||
let Unsupported = 1 in {
|
||||
def : WriteRes<SchedRW, []>;
|
||||
def : WriteRes<SchedRW.Folded, []>;
|
||||
}
|
||||
}
|
||||
|
||||
// Multiclass that wraps X86FoldableSchedWrite for each vector width.
|
||||
class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
|
||||
X86FoldableSchedWrite s128,
|
||||
X86FoldableSchedWrite s256,
|
||||
X86FoldableSchedWrite s512> {
|
||||
X86FoldableSchedWrite Scl = sScl; // Scalar float/double operations.
|
||||
X86FoldableSchedWrite MMX = sScl; // MMX operations.
|
||||
X86FoldableSchedWrite XMM = s128; // XMM operations.
|
||||
X86FoldableSchedWrite YMM = s256; // YMM operations.
|
||||
X86FoldableSchedWrite ZMM = s512; // ZMM operations.
|
||||
}
|
||||
|
||||
// Multiclass that wraps X86SchedWriteWidths for each fp vector type.
|
||||
class X86SchedWriteSizes<X86SchedWriteWidths sPS,
|
||||
X86SchedWriteWidths sPD> {
|
||||
X86SchedWriteWidths PS = sPS;
|
||||
X86SchedWriteWidths PD = sPD;
|
||||
}
|
||||
|
||||
// Multiclass that wraps move/load/store triple for a vector width.
|
||||
class X86SchedWriteMoveLS<SchedWrite MoveRR,
|
||||
SchedWrite LoadRM,
|
||||
SchedWrite StoreMR> {
|
||||
SchedWrite RR = MoveRR;
|
||||
SchedWrite RM = LoadRM;
|
||||
SchedWrite MR = StoreMR;
|
||||
}
|
||||
|
||||
// Multiclass that wraps X86SchedWriteMoveLS for each vector width.
|
||||
class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
|
||||
X86SchedWriteMoveLS s128,
|
||||
X86SchedWriteMoveLS s256,
|
||||
X86SchedWriteMoveLS s512> {
|
||||
X86SchedWriteMoveLS Scl = sScl; // Scalar float/double operations.
|
||||
X86SchedWriteMoveLS MMX = sScl; // MMX operations.
|
||||
X86SchedWriteMoveLS XMM = s128; // XMM operations.
|
||||
X86SchedWriteMoveLS YMM = s256; // YMM operations.
|
||||
X86SchedWriteMoveLS ZMM = s512; // ZMM operations.
|
||||
}
|
||||
|
||||
// Loads, stores, and moves, not folded with other operations.
|
||||
def WriteLoad : SchedWrite;
|
||||
def WriteStore : SchedWrite;
|
||||
def WriteStoreNT : SchedWrite;
|
||||
def WriteMove : SchedWrite;
|
||||
|
||||
// Arithmetic.
|
||||
defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
|
||||
defm WriteADC : X86SchedWritePair; // Integer ALU + flags op.
|
||||
def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
|
||||
def WriteADCRMW : WriteSequence<[WriteADCLd, WriteStore]>;
|
||||
defm WriteIMul : X86SchedWritePair; // Integer multiplication.
|
||||
defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
|
||||
def WriteIMulH : SchedWrite; // Integer multiplication, high part.
|
||||
def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
|
||||
|
||||
def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
|
||||
def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
|
||||
|
||||
// Integer division.
|
||||
defm WriteDiv8 : X86SchedWritePair;
|
||||
defm WriteDiv16 : X86SchedWritePair;
|
||||
defm WriteDiv32 : X86SchedWritePair;
|
||||
defm WriteDiv64 : X86SchedWritePair;
|
||||
defm WriteIDiv8 : X86SchedWritePair;
|
||||
defm WriteIDiv16 : X86SchedWritePair;
|
||||
defm WriteIDiv32 : X86SchedWritePair;
|
||||
defm WriteIDiv64 : X86SchedWritePair;
|
||||
|
||||
defm WriteBSF : X86SchedWritePair; // Bit scan forward.
|
||||
defm WriteBSR : X86SchedWritePair; // Bit scan reverse.
|
||||
defm WritePOPCNT : X86SchedWritePair; // Bit population count.
|
||||
defm WriteLZCNT : X86SchedWritePair; // Leading zero count.
|
||||
defm WriteTZCNT : X86SchedWritePair; // Trailing zero count.
|
||||
defm WriteCMOV : X86SchedWritePair; // Conditional move.
|
||||
defm WriteCMOV2 : X86SchedWritePair; // Conditional (CF + ZF flag) move.
|
||||
def WriteFCMOV : SchedWrite; // X87 conditional move.
|
||||
def WriteSETCC : SchedWrite; // Set register based on condition code.
|
||||
def WriteSETCCStore : SchedWrite;
|
||||
def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
|
||||
def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support
|
||||
|
||||
// Integer shifts and rotates.
|
||||
defm WriteShift : X86SchedWritePair;
|
||||
// Double shift instructions.
|
||||
def WriteSHDrri : SchedWrite;
|
||||
def WriteSHDrrcl : SchedWrite;
|
||||
def WriteSHDmri : SchedWrite;
|
||||
def WriteSHDmrcl : SchedWrite;
|
||||
|
||||
// BMI1 BEXTR, BMI2 BZHI
|
||||
defm WriteBEXTR : X86SchedWritePair;
|
||||
defm WriteBZHI : X86SchedWritePair;
|
||||
|
||||
// Idioms that clear a register, like xorps %xmm0, %xmm0.
|
||||
// These can often bypass execution ports completely.
|
||||
def WriteZero : SchedWrite;
|
||||
|
||||
// Branches don't produce values, so they have no latency, but they still
|
||||
// consume resources. Indirect branches can fold loads.
|
||||
defm WriteJump : X86SchedWritePair;
|
||||
|
||||
// Floating point. This covers both scalar and vector operations.
|
||||
def WriteFLD0 : SchedWrite;
|
||||
def WriteFLD1 : SchedWrite;
|
||||
def WriteFLDC : SchedWrite;
|
||||
def WriteFLoad : SchedWrite;
|
||||
def WriteFLoadX : SchedWrite;
|
||||
def WriteFLoadY : SchedWrite;
|
||||
def WriteFMaskedLoad : SchedWrite;
|
||||
def WriteFMaskedLoadY : SchedWrite;
|
||||
def WriteFStore : SchedWrite;
|
||||
def WriteFStoreX : SchedWrite;
|
||||
def WriteFStoreY : SchedWrite;
|
||||
def WriteFStoreNT : SchedWrite;
|
||||
def WriteFStoreNTX : SchedWrite;
|
||||
def WriteFStoreNTY : SchedWrite;
|
||||
def WriteFMaskedStore : SchedWrite;
|
||||
def WriteFMaskedStoreY : SchedWrite;
|
||||
def WriteFMove : SchedWrite;
|
||||
def WriteFMoveX : SchedWrite;
|
||||
def WriteFMoveY : SchedWrite;
|
||||
|
||||
defm WriteFAdd : X86SchedWritePair; // Floating point add/sub.
|
||||
defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM).
|
||||
defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM).
|
||||
defm WriteFAddZ : X86SchedWritePair; // Floating point add/sub (ZMM).
|
||||
defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub.
|
||||
defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM).
|
||||
defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM).
|
||||
defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM).
|
||||
defm WriteFCmp : X86SchedWritePair; // Floating point compare.
|
||||
defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM).
|
||||
defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM).
|
||||
defm WriteFCmpZ : X86SchedWritePair; // Floating point compare (ZMM).
|
||||
defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare.
|
||||
defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM).
|
||||
defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM).
|
||||
defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM).
|
||||
defm WriteFCom : X86SchedWritePair; // Floating point compare to flags.
|
||||
defm WriteFMul : X86SchedWritePair; // Floating point multiplication.
|
||||
defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM).
|
||||
defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM).
|
||||
defm WriteFMulZ : X86SchedWritePair; // Floating point multiplication (YMM).
|
||||
defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication.
|
||||
defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM).
|
||||
defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM).
|
||||
defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM).
|
||||
defm WriteFDiv : X86SchedWritePair; // Floating point division.
|
||||
defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM).
|
||||
defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM).
|
||||
defm WriteFDivZ : X86SchedWritePair; // Floating point division (ZMM).
|
||||
defm WriteFDiv64 : X86SchedWritePair; // Floating point double division.
|
||||
defm WriteFDiv64X : X86SchedWritePair; // Floating point double division (XMM).
|
||||
defm WriteFDiv64Y : X86SchedWritePair; // Floating point double division (YMM).
|
||||
defm WriteFDiv64Z : X86SchedWritePair; // Floating point double division (ZMM).
|
||||
defm WriteFSqrt : X86SchedWritePair; // Floating point square root.
|
||||
defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
|
||||
defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
|
||||
defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM).
|
||||
defm WriteFSqrt64 : X86SchedWritePair; // Floating point double square root.
|
||||
defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM).
|
||||
defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM).
|
||||
defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM).
|
||||
defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root.
|
||||
defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate.
|
||||
defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM).
|
||||
defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM).
|
||||
defm WriteFRcpZ : X86SchedWritePair; // Floating point reciprocal estimate (ZMM).
|
||||
defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
|
||||
defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM).
|
||||
defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM).
|
||||
defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM).
|
||||
defm WriteFMA : X86SchedWritePair; // Fused Multiply Add.
|
||||
defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM).
|
||||
defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM).
|
||||
defm WriteFMAZ : X86SchedWritePair; // Fused Multiply Add (ZMM).
|
||||
defm WriteDPPD : X86SchedWritePair; // Floating point double dot product.
|
||||
defm WriteDPPS : X86SchedWritePair; // Floating point single dot product.
|
||||
defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM).
|
||||
defm WriteDPPSZ : X86SchedWritePair; // Floating point single dot product (ZMM).
|
||||
defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
|
||||
defm WriteFRnd : X86SchedWritePair; // Floating point rounding.
|
||||
defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM).
|
||||
defm WriteFRndZ : X86SchedWritePair; // Floating point rounding (ZMM).
|
||||
defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
|
||||
defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM).
|
||||
defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM).
|
||||
defm WriteFTest : X86SchedWritePair; // Floating point TEST instructions.
|
||||
defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM).
|
||||
defm WriteFTestZ : X86SchedWritePair; // Floating point TEST instructions (ZMM).
|
||||
defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles.
|
||||
defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM).
|
||||
defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM).
|
||||
defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles.
|
||||
defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM).
|
||||
defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM).
|
||||
defm WriteFBlend : X86SchedWritePair; // Floating point vector blends.
|
||||
defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM).
|
||||
defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM).
|
||||
defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends.
|
||||
defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM).
|
||||
defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM).
|
||||
|
||||
// FMA Scheduling helper class.
|
||||
class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
|
||||
|
||||
// Horizontal Add/Sub (float and integer)
|
||||
defm WriteFHAdd : X86SchedWritePair;
|
||||
defm WriteFHAddY : X86SchedWritePair;
|
||||
defm WriteFHAddZ : X86SchedWritePair;
|
||||
defm WritePHAdd : X86SchedWritePair;
|
||||
defm WritePHAddX : X86SchedWritePair;
|
||||
defm WritePHAddY : X86SchedWritePair;
|
||||
defm WritePHAddZ : X86SchedWritePair;
|
||||
|
||||
// Vector integer operations.
|
||||
def WriteVecLoad : SchedWrite;
|
||||
def WriteVecLoadX : SchedWrite;
|
||||
def WriteVecLoadY : SchedWrite;
|
||||
def WriteVecLoadNT : SchedWrite;
|
||||
def WriteVecLoadNTY : SchedWrite;
|
||||
def WriteVecMaskedLoad : SchedWrite;
|
||||
def WriteVecMaskedLoadY : SchedWrite;
|
||||
def WriteVecStore : SchedWrite;
|
||||
def WriteVecStoreX : SchedWrite;
|
||||
def WriteVecStoreY : SchedWrite;
|
||||
def WriteVecStoreNT : SchedWrite;
|
||||
def WriteVecStoreNTY : SchedWrite;
|
||||
def WriteVecMaskedStore : SchedWrite;
|
||||
def WriteVecMaskedStoreY : SchedWrite;
|
||||
def WriteVecMove : SchedWrite;
|
||||
def WriteVecMoveX : SchedWrite;
|
||||
def WriteVecMoveY : SchedWrite;
|
||||
def WriteVecMoveToGpr : SchedWrite;
|
||||
def WriteVecMoveFromGpr : SchedWrite;
|
||||
|
||||
defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals.
|
||||
defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).
|
||||
defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM).
|
||||
defm WriteVecALUZ : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM).
|
||||
defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
|
||||
defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM).
|
||||
defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM).
|
||||
defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM).
|
||||
defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions.
|
||||
defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM).
|
||||
defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM).
|
||||
defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default).
|
||||
defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM).
|
||||
defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM).
|
||||
defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM).
|
||||
defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default).
|
||||
defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM).
|
||||
defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM).
|
||||
defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM).
|
||||
defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply (default).
|
||||
defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM).
|
||||
defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM).
|
||||
defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM).
|
||||
defm WritePMULLD : X86SchedWritePair; // Vector PMULLD.
|
||||
defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM).
|
||||
defm WritePMULLDZ : X86SchedWritePair; // Vector PMULLD (ZMM).
|
||||
defm WriteShuffle : X86SchedWritePair; // Vector shuffles.
|
||||
defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM).
|
||||
defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM).
|
||||
defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM).
|
||||
defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles.
|
||||
defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM).
|
||||
defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM).
|
||||
defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM).
|
||||
defm WriteBlend : X86SchedWritePair; // Vector blends.
|
||||
defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM).
|
||||
defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM).
|
||||
defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
|
||||
defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM).
|
||||
defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM).
|
||||
defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
|
||||
defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM).
|
||||
defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM).
|
||||
defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM).
|
||||
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
|
||||
defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM).
|
||||
defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM).
|
||||
defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
|
||||
|
||||
// Vector insert/extract operations.
|
||||
defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
|
||||
def WriteVecExtract : SchedWrite; // Extract vector element to gpr.
|
||||
def WriteVecExtractSt : SchedWrite; // Extract vector element and store.
|
||||
|
||||
// MOVMSK operations.
|
||||
def WriteFMOVMSK : SchedWrite;
|
||||
def WriteVecMOVMSK : SchedWrite;
|
||||
def WriteVecMOVMSKY : SchedWrite;
|
||||
def WriteMMXMOVMSK : SchedWrite;
|
||||
|
||||
// Conversion between integer and float.
|
||||
defm WriteCvtSD2I : X86SchedWritePair; // Double -> Integer.
|
||||
defm WriteCvtPD2I : X86SchedWritePair; // Double -> Integer (XMM).
|
||||
defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM).
|
||||
defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM).
|
||||
|
||||
defm WriteCvtSS2I : X86SchedWritePair; // Float -> Integer.
|
||||
defm WriteCvtPS2I : X86SchedWritePair; // Float -> Integer (XMM).
|
||||
defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM).
|
||||
defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM).
|
||||
|
||||
defm WriteCvtI2SD : X86SchedWritePair; // Integer -> Double.
|
||||
defm WriteCvtI2PD : X86SchedWritePair; // Integer -> Double (XMM).
|
||||
defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM).
|
||||
defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM).
|
||||
|
||||
defm WriteCvtI2SS : X86SchedWritePair; // Integer -> Float.
|
||||
defm WriteCvtI2PS : X86SchedWritePair; // Integer -> Float (XMM).
|
||||
defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM).
|
||||
defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM).
|
||||
|
||||
defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion.
|
||||
defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM).
|
||||
defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM).
|
||||
defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM).
|
||||
|
||||
defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion.
|
||||
defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM).
|
||||
defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM).
|
||||
defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM).
|
||||
|
||||
defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion.
|
||||
defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM).
|
||||
defm WriteCvtPH2PSZ : X86SchedWritePair; // Half -> Float size conversion (ZMM).
|
||||
|
||||
def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion.
|
||||
def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM).
|
||||
def WriteCvtPS2PHZ : SchedWrite; // // Float -> Half size conversion (ZMM).
|
||||
def WriteCvtPS2PHSt : SchedWrite; // // Float -> Half + store size conversion.
|
||||
def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM).
|
||||
def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
|
||||
|
||||
// CRC32 instruction.
|
||||
defm WriteCRC32 : X86SchedWritePair;
|
||||
|
||||
// Strings instructions.
|
||||
// Packed Compare Implicit Length Strings, Return Mask
|
||||
defm WritePCmpIStrM : X86SchedWritePair;
|
||||
// Packed Compare Explicit Length Strings, Return Mask
|
||||
defm WritePCmpEStrM : X86SchedWritePair;
|
||||
// Packed Compare Implicit Length Strings, Return Index
|
||||
defm WritePCmpIStrI : X86SchedWritePair;
|
||||
// Packed Compare Explicit Length Strings, Return Index
|
||||
defm WritePCmpEStrI : X86SchedWritePair;
|
||||
|
||||
// AES instructions.
|
||||
defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption.
|
||||
defm WriteAESIMC : X86SchedWritePair; // InvMixColumn.
|
||||
defm WriteAESKeyGen : X86SchedWritePair; // Key Generation.
|
||||
|
||||
// Carry-less multiplication instructions.
|
||||
defm WriteCLMul : X86SchedWritePair;
|
||||
|
||||
// EMMS/FEMMS
|
||||
def WriteEMMS : SchedWrite;
|
||||
|
||||
// Load/store MXCSR
|
||||
def WriteLDMXCSR : SchedWrite;
|
||||
def WriteSTMXCSR : SchedWrite;
|
||||
|
||||
// Catch-all for expensive system instructions.
|
||||
def WriteSystem : SchedWrite;
|
||||
|
||||
// AVX2.
|
||||
defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles.
|
||||
defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles.
|
||||
defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
|
||||
defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
|
||||
defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts.
|
||||
defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM).
|
||||
defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM).
|
||||
|
||||
// Old microcoded instructions that nobody use.
|
||||
def WriteMicrocoded : SchedWrite;
|
||||
|
||||
// Fence instructions.
|
||||
def WriteFence : SchedWrite;
|
||||
|
||||
// Nop, not very useful expect it provides a model for nops!
|
||||
def WriteNop : SchedWrite;
|
||||
|
||||
// Move/Load/Store wrappers.
|
||||
def WriteFMoveLS
|
||||
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStore>;
|
||||
def WriteFMoveLSX
|
||||
: X86SchedWriteMoveLS<WriteFMoveX, WriteFLoadX, WriteFStoreX>;
|
||||
def WriteFMoveLSY
|
||||
: X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreY>;
|
||||
def SchedWriteFMoveLS
|
||||
: X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
|
||||
WriteFMoveLSY, WriteFMoveLSY>;
|
||||
|
||||
def WriteFMoveLSNT
|
||||
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
|
||||
def WriteFMoveLSNTX
|
||||
: X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>;
|
||||
def WriteFMoveLSNTY
|
||||
: X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>;
|
||||
def SchedWriteFMoveLSNT
|
||||
: X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX,
|
||||
WriteFMoveLSNTY, WriteFMoveLSNTY>;
|
||||
|
||||
def WriteVecMoveLS
|
||||
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
|
||||
def WriteVecMoveLSX
|
||||
: X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadX, WriteVecStoreX>;
|
||||
def WriteVecMoveLSY
|
||||
: X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadY, WriteVecStoreY>;
|
||||
def SchedWriteVecMoveLS
|
||||
: X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
|
||||
WriteVecMoveLSY, WriteVecMoveLSY>;
|
||||
|
||||
def WriteVecMoveLSNT
|
||||
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
|
||||
def WriteVecMoveLSNTX
|
||||
: X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>;
|
||||
def WriteVecMoveLSNTY
|
||||
: X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>;
|
||||
def SchedWriteVecMoveLSNT
|
||||
: X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
|
||||
WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
|
||||
|
||||
// Vector width wrappers.
|
||||
def SchedWriteFAdd
|
||||
: X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>;
|
||||
def SchedWriteFAdd64
|
||||
: X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Z>;
|
||||
def SchedWriteFHAdd
|
||||
: X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddZ>;
|
||||
def SchedWriteFCmp
|
||||
: X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpZ>;
|
||||
def SchedWriteFCmp64
|
||||
: X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Z>;
|
||||
def SchedWriteFMul
|
||||
: X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulZ>;
|
||||
def SchedWriteFMul64
|
||||
: X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Z>;
|
||||
def SchedWriteFMA
|
||||
: X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAZ>;
|
||||
def SchedWriteDPPD
|
||||
: X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>;
|
||||
def SchedWriteDPPS
|
||||
: X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSZ>;
|
||||
def SchedWriteFDiv
|
||||
: X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>;
|
||||
def SchedWriteFDiv64
|
||||
: X86SchedWriteWidths<WriteFDiv64, WriteFDiv64X, WriteFDiv64Y, WriteFDiv64Z>;
|
||||
def SchedWriteFSqrt
|
||||
: X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX,
|
||||
WriteFSqrtY, WriteFSqrtZ>;
|
||||
def SchedWriteFSqrt64
|
||||
: X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X,
|
||||
WriteFSqrt64Y, WriteFSqrt64Z>;
|
||||
def SchedWriteFRcp
|
||||
: X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpZ>;
|
||||
def SchedWriteFRsqrt
|
||||
: X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtZ>;
|
||||
def SchedWriteFRnd
|
||||
: X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndZ>;
|
||||
def SchedWriteFLogic
|
||||
: X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicZ>;
|
||||
def SchedWriteFTest
|
||||
: X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestZ>;
|
||||
|
||||
def SchedWriteFShuffle
|
||||
: X86SchedWriteWidths<WriteFShuffle, WriteFShuffle,
|
||||
WriteFShuffleY, WriteFShuffleZ>;
|
||||
def SchedWriteFVarShuffle
|
||||
: X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle,
|
||||
WriteFVarShuffleY, WriteFVarShuffleZ>;
|
||||
def SchedWriteFBlend
|
||||
: X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendZ>;
|
||||
def SchedWriteFVarBlend
|
||||
: X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
|
||||
WriteFVarBlendY, WriteFVarBlendZ>;
|
||||
|
||||
def SchedWriteCvtDQ2PD
|
||||
: X86SchedWriteWidths<WriteCvtI2SD, WriteCvtI2PD,
|
||||
WriteCvtI2PDY, WriteCvtI2PDZ>;
|
||||
def SchedWriteCvtDQ2PS
|
||||
: X86SchedWriteWidths<WriteCvtI2SS, WriteCvtI2PS,
|
||||
WriteCvtI2PSY, WriteCvtI2PSZ>;
|
||||
def SchedWriteCvtPD2DQ
|
||||
: X86SchedWriteWidths<WriteCvtSD2I, WriteCvtPD2I,
|
||||
WriteCvtPD2IY, WriteCvtPD2IZ>;
|
||||
def SchedWriteCvtPS2DQ
|
||||
: X86SchedWriteWidths<WriteCvtSS2I, WriteCvtPS2I,
|
||||
WriteCvtPS2IY, WriteCvtPS2IZ>;
|
||||
def SchedWriteCvtPS2PD
|
||||
: X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD,
|
||||
WriteCvtPS2PDY, WriteCvtPS2PDZ>;
|
||||
def SchedWriteCvtPD2PS
|
||||
: X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS,
|
||||
WriteCvtPD2PSY, WriteCvtPD2PSZ>;
|
||||
|
||||
def SchedWriteVecALU
|
||||
: X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUZ>;
|
||||
def SchedWritePHAdd
|
||||
: X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddZ>;
|
||||
def SchedWriteVecLogic
|
||||
: X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX,
|
||||
WriteVecLogicY, WriteVecLogicZ>;
|
||||
def SchedWriteVecTest
|
||||
: X86SchedWriteWidths<WriteVecTest, WriteVecTest,
|
||||
WriteVecTestY, WriteVecTestZ>;
|
||||
def SchedWriteVecShift
|
||||
: X86SchedWriteWidths<WriteVecShift, WriteVecShiftX,
|
||||
WriteVecShiftY, WriteVecShiftZ>;
|
||||
def SchedWriteVecShiftImm
|
||||
: X86SchedWriteWidths<WriteVecShiftImm, WriteVecShiftImmX,
|
||||
WriteVecShiftImmY, WriteVecShiftImmZ>;
|
||||
def SchedWriteVarVecShift
|
||||
: X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift,
|
||||
WriteVarVecShiftY, WriteVarVecShiftZ>;
|
||||
def SchedWriteVecIMul
|
||||
: X86SchedWriteWidths<WriteVecIMul, WriteVecIMulX,
|
||||
WriteVecIMulY, WriteVecIMulZ>;
|
||||
def SchedWritePMULLD
|
||||
: X86SchedWriteWidths<WritePMULLD, WritePMULLD,
|
||||
WritePMULLDY, WritePMULLDZ>;
|
||||
def SchedWriteMPSAD
|
||||
: X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
|
||||
WriteMPSADY, WriteMPSADZ>;
|
||||
def SchedWritePSADBW
|
||||
: X86SchedWriteWidths<WritePSADBW, WritePSADBWX,
|
||||
WritePSADBWY, WritePSADBWZ>;
|
||||
|
||||
def SchedWriteShuffle
|
||||
: X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
|
||||
WriteShuffleY, WriteShuffleZ>;
|
||||
def SchedWriteVarShuffle
|
||||
: X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX,
|
||||
WriteVarShuffleY, WriteVarShuffleZ>;
|
||||
def SchedWriteBlend
|
||||
: X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendZ>;
|
||||
def SchedWriteVarBlend
|
||||
: X86SchedWriteWidths<WriteVarBlend, WriteVarBlend,
|
||||
WriteVarBlendY, WriteVarBlendZ>;
|
||||
|
||||
// Vector size wrappers.
|
||||
def SchedWriteFAddSizes
|
||||
: X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd64>;
|
||||
def SchedWriteFCmpSizes
|
||||
: X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp64>;
|
||||
def SchedWriteFMulSizes
|
||||
: X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>;
|
||||
def SchedWriteFDivSizes
|
||||
: X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv64>;
|
||||
def SchedWriteFSqrtSizes
|
||||
: X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>;
|
||||
def SchedWriteFLogicSizes
|
||||
: X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic>;
|
||||
def SchedWriteFShuffleSizes
|
||||
: X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Generic Processor Scheduler Models.
|
||||
|
||||
// IssueWidth is analogous to the number of decode units. Core and its
|
||||
// descendants, including Nehalem and SandyBridge have 4 decoders.
|
||||
// Resources beyond the decoder operate on micro-ops and are buffered
|
||||
// so adjacent micro-ops don't directly compete.
|
||||
//
|
||||
// MicroOpBufferSize > 1 indicates that RAW dependencies can be
|
||||
// decoded in the same cycle. The value 32 is a reasonably arbitrary
|
||||
// number of in-flight instructions.
|
||||
//
|
||||
// HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef
|
||||
// indicates high latency opcodes. Alternatively, InstrItinData
|
||||
// entries may be included here to define specific operand
|
||||
// latencies. Since these latencies are not used for pipeline hazards,
|
||||
// they do not need to be exact.
|
||||
//
|
||||
// The GenericX86Model contains no instruction schedules
|
||||
// and disables PostRAScheduler.
|
||||
class GenericX86Model : SchedMachineModel {
|
||||
let IssueWidth = 4;
|
||||
let MicroOpBufferSize = 32;
|
||||
let LoadLatency = 4;
|
||||
let HighLatency = 10;
|
||||
let PostRAScheduler = 0;
|
||||
let CompleteModel = 0;
|
||||
}
|
||||
|
||||
def GenericModel : GenericX86Model;
|
||||
|
||||
// Define a model with the PostRAScheduler enabled.
|
||||
def GenericPostRAModel : GenericX86Model {
|
||||
let PostRAScheduler = 1;
|
||||
}
|
||||
917
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleAtom.td
vendored
Normal file
917
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleAtom.td
vendored
Normal file
@@ -0,0 +1,917 @@
|
||||
//===- X86ScheduleAtom.td - X86 Atom Scheduling Definitions -*- tablegen -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the schedule class data for the Intel Atom
|
||||
// in order (Saltwell-32nm/Bonnell-45nm) processors.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//
|
||||
// Scheduling information derived from the "Intel 64 and IA32 Architectures
|
||||
// Optimization Reference Manual", Chapter 13, Section 4.
|
||||
|
||||
// Atom machine model.
|
||||
def AtomModel : SchedMachineModel {
|
||||
let IssueWidth = 2; // Allows 2 instructions per scheduling group.
|
||||
let MicroOpBufferSize = 0; // In-order execution, always hide latency.
|
||||
let LoadLatency = 3; // Expected cycles, may be overridden.
|
||||
let HighLatency = 30;// Expected, may be overridden.
|
||||
|
||||
// On the Atom, the throughput for taken branches is 2 cycles. For small
|
||||
// simple loops, expand by a small factor to hide the backedge cost.
|
||||
let LoopMicroOpBufferSize = 10;
|
||||
let PostRAScheduler = 1;
|
||||
let CompleteModel = 0;
|
||||
}
|
||||
|
||||
let SchedModel = AtomModel in {
|
||||
|
||||
// Functional Units
|
||||
def AtomPort0 : ProcResource<1>; // ALU: ALU0, shift/rotate, load/store
|
||||
// SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
|
||||
def AtomPort1 : ProcResource<1>; // ALU: ALU1, bit processing, jump, and LEA
|
||||
// SIMD/FP: SIMD ALU, FP Adder
|
||||
|
||||
def AtomPort01 : ProcResGroup<[AtomPort0, AtomPort1]>;
|
||||
|
||||
// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
|
||||
// cycles after the memory operand.
|
||||
def : ReadAdvance<ReadAfterLd, 3>;
|
||||
|
||||
// Many SchedWrites are defined in pairs with and without a folded load.
|
||||
// Instructions with folded loads are usually micro-fused, so they only appear
|
||||
// as two micro-ops when dispatched by the schedulers.
|
||||
// This multiclass defines the resource usage for variants with and without
|
||||
// folded loads.
|
||||
multiclass AtomWriteResPair<X86FoldableSchedWrite SchedRW,
|
||||
list<ProcResourceKind> RRPorts,
|
||||
list<ProcResourceKind> RMPorts,
|
||||
int RRLat = 1, int RMLat = 1,
|
||||
list<int> RRRes = [1],
|
||||
list<int> RMRes = [1]> {
|
||||
// Register variant is using a single cycle on ExePort.
|
||||
def : WriteRes<SchedRW, RRPorts> {
|
||||
let Latency = RRLat;
|
||||
let ResourceCycles = RRRes;
|
||||
}
|
||||
|
||||
// Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
|
||||
// latency.
|
||||
def : WriteRes<SchedRW.Folded, RMPorts> {
|
||||
let Latency = RMLat;
|
||||
let ResourceCycles = RMRes;
|
||||
}
|
||||
}
|
||||
|
||||
// A folded store needs a cycle on Port0 for the store data.
|
||||
def : WriteRes<WriteRMW, [AtomPort0]>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Arithmetic.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>;
|
||||
defm : AtomWriteResPair<WriteADC, [AtomPort01], [AtomPort0]>;
|
||||
defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
|
||||
defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
|
||||
|
||||
defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteBSWAP64, [AtomPort0], 1, [1], 1>;
|
||||
|
||||
defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
|
||||
defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
|
||||
defm : AtomWriteResPair<WriteDiv32, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
|
||||
defm : AtomWriteResPair<WriteDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
|
||||
defm : AtomWriteResPair<WriteIDiv8, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
|
||||
defm : AtomWriteResPair<WriteIDiv16, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
|
||||
defm : AtomWriteResPair<WriteIDiv32, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
|
||||
defm : AtomWriteResPair<WriteIDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
|
||||
|
||||
defm : X86WriteResPairUnsupported<WriteCRC32>;
|
||||
|
||||
defm : AtomWriteResPair<WriteCMOV, [AtomPort01], [AtomPort0]>;
|
||||
defm : AtomWriteResPair<WriteCMOV2, [AtomPort01], [AtomPort0]>;
|
||||
defm : X86WriteRes<WriteFCMOV, [AtomPort01], 9, [9], 1>; // x87 conditional move.
|
||||
|
||||
def : WriteRes<WriteSETCC, [AtomPort01]>;
|
||||
def : WriteRes<WriteSETCCStore, [AtomPort01]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : WriteRes<WriteBitTest,[AtomPort01]>;
|
||||
|
||||
defm : X86WriteResUnsupported<WriteIMulH>;
|
||||
|
||||
// This is for simple LEAs with one or two input operands.
|
||||
def : WriteRes<WriteLEA, [AtomPort1]>;
|
||||
|
||||
def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [8];
|
||||
}
|
||||
def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>;
|
||||
|
||||
def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [6];
|
||||
}
|
||||
def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>;
|
||||
|
||||
def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 14;
|
||||
let ResourceCycles = [14];
|
||||
}
|
||||
def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
|
||||
IMUL64rmi8, IMUL64rmi32)>;
|
||||
|
||||
// Bit counts.
|
||||
defm : AtomWriteResPair<WriteBSF, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
|
||||
defm : AtomWriteResPair<WriteBSR, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
|
||||
defm : X86WriteResPairUnsupported<WritePOPCNT>;
|
||||
defm : X86WriteResPairUnsupported<WriteLZCNT>;
|
||||
defm : X86WriteResPairUnsupported<WriteTZCNT>;
|
||||
|
||||
// BMI1 BEXTR, BMI2 BZHI
|
||||
defm : X86WriteResPairUnsupported<WriteBEXTR>;
|
||||
defm : X86WriteResPairUnsupported<WriteBZHI>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Integer shifts and rotates.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
|
||||
|
||||
defm : X86WriteRes<WriteSHDrri, [AtomPort01], 2, [2], 1>;
|
||||
defm : X86WriteRes<WriteSHDrrcl,[AtomPort01], 2, [2], 1>;
|
||||
defm : X86WriteRes<WriteSHDmri, [AtomPort01], 4, [4], 1>;
|
||||
defm : X86WriteRes<WriteSHDmrcl,[AtomPort01], 4, [4], 1>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Loads, stores, and moves, not folded with other operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteLoad, [AtomPort0]>;
|
||||
def : WriteRes<WriteStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteStoreNT, [AtomPort0]>;
|
||||
def : WriteRes<WriteMove, [AtomPort01]>;
|
||||
|
||||
// Treat misc copies as a move.
|
||||
def : InstRW<[WriteMove], (instrs COPY)>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Idioms that clear a register, like xorps %xmm0, %xmm0.
|
||||
// These can often bypass execution ports completely.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteZero, []>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Branches don't produce values, so they have no latency, but they still
|
||||
// consume resources. Indirect branches can fold loads.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : AtomWriteResPair<WriteJump, [AtomPort1], [AtomPort1]>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Special case scheduling classes.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteSystem, [AtomPort01]> { let Latency = 100; }
|
||||
def : WriteRes<WriteMicrocoded, [AtomPort01]> { let Latency = 100; }
|
||||
def : WriteRes<WriteFence, [AtomPort0]>;
|
||||
|
||||
// Nops don't have dependencies, so there's no actual latency, but we set this
|
||||
// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
|
||||
def : WriteRes<WriteNop, [AtomPort01]>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Floating point. This covers both scalar and vector operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : X86WriteRes<WriteFLD0, [AtomPort01], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteFLD1, [AtomPort01], 6, [6], 1>;
|
||||
def : WriteRes<WriteFLoad, [AtomPort0]>;
|
||||
def : WriteRes<WriteFLoadX, [AtomPort0]>;
|
||||
defm : X86WriteResUnsupported<WriteFLoadY>;
|
||||
defm : X86WriteResUnsupported<WriteFMaskedLoad>;
|
||||
defm : X86WriteResUnsupported<WriteFMaskedLoadY>;
|
||||
|
||||
def : WriteRes<WriteFStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteFStoreX, [AtomPort0]>;
|
||||
defm : X86WriteResUnsupported<WriteFStoreY>;
|
||||
def : WriteRes<WriteFStoreNT, [AtomPort0]>;
|
||||
def : WriteRes<WriteFStoreNTX, [AtomPort0]>;
|
||||
defm : X86WriteResUnsupported<WriteFStoreNTY>;
|
||||
defm : X86WriteResUnsupported<WriteFMaskedStore>;
|
||||
defm : X86WriteResUnsupported<WriteFMaskedStoreY>;
|
||||
|
||||
def : WriteRes<WriteFMove, [AtomPort01]>;
|
||||
def : WriteRes<WriteFMoveX, [AtomPort01]>;
|
||||
defm : X86WriteResUnsupported<WriteFMoveY>;
|
||||
|
||||
defm : X86WriteRes<WriteEMMS, [AtomPort01], 5, [5], 1>;
|
||||
|
||||
defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : AtomWriteResPair<WriteFAddX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFAddY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFAddZ>;
|
||||
defm : AtomWriteResPair<WriteFAdd64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : AtomWriteResPair<WriteFAdd64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFAdd64Y>;
|
||||
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
|
||||
defm : AtomWriteResPair<WriteFCmp, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : AtomWriteResPair<WriteFCmpX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFCmpY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
|
||||
defm : AtomWriteResPair<WriteFCmp64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : AtomWriteResPair<WriteFCmp64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFCmp64Y>;
|
||||
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
|
||||
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
|
||||
defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMulY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMulZ>;
|
||||
defm : AtomWriteResPair<WriteFMul64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : AtomWriteResPair<WriteFMul64X, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMul64Y>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
|
||||
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
|
||||
defm : AtomWriteResPair<WriteFRcpX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRcpY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
|
||||
defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
|
||||
defm : AtomWriteResPair<WriteFRsqrtX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
|
||||
defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
|
||||
defm : AtomWriteResPair<WriteFDivX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFDivY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFDivZ>;
|
||||
defm : AtomWriteResPair<WriteFDiv64, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
|
||||
defm : AtomWriteResPair<WriteFDiv64X, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
|
||||
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
|
||||
defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
|
||||
defm : AtomWriteResPair<WriteFSqrtX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFSqrtY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
|
||||
defm : AtomWriteResPair<WriteFSqrt64, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
|
||||
defm : AtomWriteResPair<WriteFSqrt64X, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
|
||||
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
|
||||
defm : AtomWriteResPair<WriteFSqrt80, [AtomPort01], [AtomPort01], 71, 71, [71], [71]>;
|
||||
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
|
||||
defm : AtomWriteResPair<WriteFRnd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRndY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRndZ>;
|
||||
defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFLogicY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
|
||||
defm : AtomWriteResPair<WriteFTest, [AtomPort01], [AtomPort0]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFTestY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFTestZ>;
|
||||
defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFShuffleY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarShuffle>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarShuffleY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMA>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMAX>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMAY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMAZ>;
|
||||
defm : X86WriteResPairUnsupported<WriteDPPD>;
|
||||
defm : X86WriteResPairUnsupported<WriteDPPS>;
|
||||
defm : X86WriteResPairUnsupported<WriteDPPSY>;
|
||||
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
|
||||
defm : X86WriteResPairUnsupported<WriteFBlend>;
|
||||
defm : X86WriteResPairUnsupported<WriteFBlendY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarBlend>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
|
||||
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Conversions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : AtomWriteResPair<WriteCvtSS2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
|
||||
defm : AtomWriteResPair<WriteCvtPS2I, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPS2IY>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
|
||||
defm : AtomWriteResPair<WriteCvtSD2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
|
||||
defm : AtomWriteResPair<WriteCvtPD2I, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPD2IY>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
|
||||
|
||||
defm : AtomWriteResPair<WriteCvtI2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
|
||||
defm : AtomWriteResPair<WriteCvtI2PS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtI2PSY>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
|
||||
defm : AtomWriteResPair<WriteCvtI2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
|
||||
defm : AtomWriteResPair<WriteCvtI2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtI2PDY>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
|
||||
|
||||
defm : AtomWriteResPair<WriteCvtSS2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
|
||||
defm : AtomWriteResPair<WriteCvtPS2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPS2PDY>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
|
||||
defm : AtomWriteResPair<WriteCvtSD2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
|
||||
defm : AtomWriteResPair<WriteCvtPD2PS, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPD2PSY>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
|
||||
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PH>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Vector integer operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteVecLoad, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecLoadX, [AtomPort0]>;
|
||||
defm : X86WriteResUnsupported<WriteVecLoadY>;
|
||||
def : WriteRes<WriteVecLoadNT, [AtomPort0]>;
|
||||
defm : X86WriteResUnsupported<WriteVecLoadNTY>;
|
||||
defm : X86WriteResUnsupported<WriteVecMaskedLoad>;
|
||||
defm : X86WriteResUnsupported<WriteVecMaskedLoadY>;
|
||||
|
||||
def : WriteRes<WriteVecStore, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecStoreX, [AtomPort0]>;
|
||||
defm : X86WriteResUnsupported<WriteVecStoreY>;
|
||||
def : WriteRes<WriteVecStoreNT, [AtomPort0]>;
|
||||
defm : X86WriteResUnsupported<WriteVecStoreNTY>;
|
||||
def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
|
||||
defm : X86WriteResUnsupported<WriteVecMaskedStoreY>;
|
||||
|
||||
def : WriteRes<WriteVecMove, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecMoveX, [AtomPort01]>;
|
||||
defm : X86WriteResUnsupported<WriteVecMoveY>;
|
||||
defm : X86WriteRes<WriteVecMoveToGpr, [AtomPort0], 3, [3], 1>;
|
||||
defm : X86WriteRes<WriteVecMoveFromGpr, [AtomPort0], 1, [1], 1>;
|
||||
|
||||
defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>;
|
||||
defm : AtomWriteResPair<WriteVecALUX, [AtomPort01], [AtomPort0], 1, 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecALUY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
|
||||
defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>;
|
||||
defm : AtomWriteResPair<WriteVecLogicX, [AtomPort01], [AtomPort0], 1, 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecLogicY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
|
||||
defm : AtomWriteResPair<WriteVecTest, [AtomPort01], [AtomPort0], 1, 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecTestY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
|
||||
defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
|
||||
defm : AtomWriteResPair<WriteVecShiftX, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecShiftY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
|
||||
defm : AtomWriteResPair<WriteVecShiftImm, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
|
||||
defm : AtomWriteResPair<WriteVecShiftImmX, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
|
||||
defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
|
||||
defm : AtomWriteResPair<WriteVecIMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecIMulY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
|
||||
defm : X86WriteResPairUnsupported<WritePMULLD>;
|
||||
defm : X86WriteResPairUnsupported<WritePMULLDY>;
|
||||
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
|
||||
defm : X86WriteResPairUnsupported<WritePHMINPOS>;
|
||||
defm : X86WriteResPairUnsupported<WriteMPSAD>;
|
||||
defm : X86WriteResPairUnsupported<WriteMPSADY>;
|
||||
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
|
||||
defm : AtomWriteResPair<WritePSADBW, [AtomPort01], [AtomPort01], 4, 4, [4], [4]>;
|
||||
defm : AtomWriteResPair<WritePSADBWX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
|
||||
defm : X86WriteResPairUnsupported<WritePSADBWY>;
|
||||
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
|
||||
defm : AtomWriteResPair<WriteShuffle, [AtomPort0], [AtomPort0], 1, 1>;
|
||||
defm : AtomWriteResPair<WriteShuffleX, [AtomPort0], [AtomPort0], 1, 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteShuffleY>;
|
||||
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
|
||||
defm : AtomWriteResPair<WriteVarShuffle, [AtomPort0], [AtomPort0], 1, 1>;
|
||||
defm : AtomWriteResPair<WriteVarShuffleX, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
|
||||
defm : X86WriteResPairUnsupported<WriteBlend>;
|
||||
defm : X86WriteResPairUnsupported<WriteBlendY>;
|
||||
defm : X86WriteResPairUnsupported<WriteBlendZ>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarBlend>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
|
||||
defm : X86WriteResPairUnsupported<WriteShuffle256>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarVecShift>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Vector insert/extract operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : AtomWriteResPair<WriteVecInsert, [AtomPort0], [AtomPort0], 1, 1>;
|
||||
def : WriteRes<WriteVecExtract, [AtomPort0]>;
|
||||
def : WriteRes<WriteVecExtractSt, [AtomPort0]>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// SSE42 String instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : X86WriteResPairUnsupported<WritePCmpIStrI>;
|
||||
defm : X86WriteResPairUnsupported<WritePCmpIStrM>;
|
||||
defm : X86WriteResPairUnsupported<WritePCmpEStrI>;
|
||||
defm : X86WriteResPairUnsupported<WritePCmpEStrM>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// MOVMSK Instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteFMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
|
||||
def : WriteRes<WriteVecMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
|
||||
defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
|
||||
def : WriteRes<WriteMMXMOVMSK, [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// AES instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : X86WriteResPairUnsupported<WriteAESIMC>;
|
||||
defm : X86WriteResPairUnsupported<WriteAESKeyGen>;
|
||||
defm : X86WriteResPairUnsupported<WriteAESDecEnc>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Horizontal add/sub instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
|
||||
defm : AtomWriteResPair<WriteFHAddY, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
|
||||
defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 3, 4, [3], [4]>;
|
||||
defm : AtomWriteResPair<WritePHAddX, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
|
||||
defm : AtomWriteResPair<WritePHAddY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Carry-less multiplication instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : X86WriteResPairUnsupported<WriteCLMul>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Load/store MXCSR.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteLDMXCSR, [AtomPort01]> { let Latency = 5; let ResourceCycles = [5]; }
|
||||
def : WriteRes<WriteSTMXCSR, [AtomPort01]> { let Latency = 15; let ResourceCycles = [15]; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Special Cases.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Port0
|
||||
def AtomWrite0_1 : SchedWriteRes<[AtomPort0]> {
|
||||
let Latency = 1;
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def : InstRW<[AtomWrite0_1], (instrs FXAM, LD_Frr,
|
||||
MOVSX64rr32)>;
|
||||
def : SchedAlias<WriteALURMW, AtomWrite0_1>;
|
||||
def : SchedAlias<WriteADCRMW, AtomWrite0_1>;
|
||||
def : InstRW<[AtomWrite0_1], (instregex "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m",
|
||||
"MOV(S|Z)X(32|64)rr(8|8_NOREX|16)")>;
|
||||
|
||||
def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> {
|
||||
let Latency = 5;
|
||||
let ResourceCycles = [5];
|
||||
}
|
||||
def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)")>;
|
||||
|
||||
// Port1
|
||||
def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
|
||||
let Latency = 1;
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>;
|
||||
def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r",
|
||||
"BT(C|R|S)?(16|32|64)(rr|ri8)")>;
|
||||
|
||||
def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {
|
||||
let Latency = 5;
|
||||
let ResourceCycles = [5];
|
||||
}
|
||||
def : InstRW<[AtomWrite1_5], (instrs MMX_CVTPI2PSirr, MMX_CVTPI2PSirm,
|
||||
MMX_CVTPS2PIirr, MMX_CVTTPS2PIirr)>;
|
||||
|
||||
// Port0 and Port1
|
||||
def AtomWrite0_1_1 : SchedWriteRes<[AtomPort0, AtomPort1]> {
|
||||
let Latency = 1;
|
||||
let ResourceCycles = [1, 1];
|
||||
}
|
||||
def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r,
|
||||
POP16rmr, POP32rmr, POP64rmr,
|
||||
PUSH16r, PUSH32r, PUSH64r,
|
||||
PUSHi16, PUSHi32,
|
||||
PUSH16rmr, PUSH32rmr, PUSH64rmr,
|
||||
PUSH16i8, PUSH32i8, PUSH64i8, PUSH64i32,
|
||||
XCH_F)>;
|
||||
def : InstRW<[AtomWrite0_1_1], (instregex "RETI(L|Q|W)$",
|
||||
"IRET(16|32|64)?")>;
|
||||
|
||||
def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
|
||||
let Latency = 5;
|
||||
let ResourceCycles = [5, 5];
|
||||
}
|
||||
def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIirm, MMX_CVTTPS2PIirm)>;
|
||||
def : InstRW<[AtomWrite0_1_5], (instregex "ILD_F(16|32|64)")>;
|
||||
|
||||
// Port0 or Port1
|
||||
def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 1;
|
||||
let ResourceCycles = [1];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, WAIT,
|
||||
LFENCE,
|
||||
STOSB, STOSL, STOSQ, STOSW,
|
||||
MOVSSrr, MOVSSrr_REV,
|
||||
PSLLDQri, PSRLDQri)>;
|
||||
def : InstRW<[AtomWrite01_1], (instregex "MMX_PACK(SSDW|SSWB|USWB)irr",
|
||||
"MMX_PUNPCKH(BW|DQ|WD)irr")>;
|
||||
|
||||
def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [2];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
|
||||
PUSH16rmm, PUSH32rmm, PUSH64rmm,
|
||||
LODSB, LODSL, LODSQ, LODSW,
|
||||
SCASB, SCASL, SCASQ, SCASW)>;
|
||||
def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
|
||||
"PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
|
||||
"XADD(8|16|32|64)rr",
|
||||
"XCHG(8|16|32|64)(ar|rr)",
|
||||
"(ST|ISTT)_F(P)?(16|32|64)?(m|rr)",
|
||||
"MMX_P(ADD|SUB)Qirr",
|
||||
"MOV(S|Z)X16rr8",
|
||||
"MOV(UPS|UPD|DQU)mr",
|
||||
"MASKMOVDQU(64)?",
|
||||
"P(ADD|SUB)Qrr")>;
|
||||
|
||||
def AtomWrite01_3 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 3;
|
||||
let ResourceCycles = [3];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_3], (instrs CLD, LDDQUrm,
|
||||
CMPSB, CMPSL, CMPSQ, CMPSW,
|
||||
MOVSB, MOVSL, MOVSQ, MOVSW,
|
||||
POP16rmm, POP32rmm, POP64rmm)>;
|
||||
def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm",
|
||||
"XCHG(8|16|32|64)rm",
|
||||
"PH(ADD|SUB)Drr",
|
||||
"MOV(S|Z)X16rm8",
|
||||
"MMX_P(ADD|SUB)Qirm",
|
||||
"MOV(UPS|UPD|DQU)rm",
|
||||
"P(ADD|SUB)Qrm")>;
|
||||
|
||||
def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 4;
|
||||
let ResourceCycles = [4];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO,
|
||||
JCXZ, JECXZ, JRCXZ,
|
||||
LD_F80m)>;
|
||||
def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm",
|
||||
"(MMX_)?PEXTRWrr(_REV)?")>;
|
||||
|
||||
def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 5;
|
||||
let ResourceCycles = [5];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, ST_FP80m)>;
|
||||
def : InstRW<[AtomWrite01_5], (instregex "MMX_PH(ADD|SUB)S?Wrr")>;
|
||||
|
||||
def AtomWrite01_6 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [6];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_6], (instrs CMPXCHG8rm, INTO, XLAT,
|
||||
SHLD16rrCL, SHRD16rrCL,
|
||||
SHLD16rri8, SHRD16rri8,
|
||||
SHLD16mrCL, SHRD16mrCL,
|
||||
SHLD16mri8, SHRD16mri8)>;
|
||||
def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr",
|
||||
"IST_F(P)?(16|32|64)?m",
|
||||
"MMX_PH(ADD|SUB)S?Wrm")>;
|
||||
|
||||
def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 7;
|
||||
let ResourceCycles = [7];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_7], (instrs AAD8i8)>;
|
||||
|
||||
def AtomWrite01_8 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [8];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_8], (instrs LOOPE,
|
||||
PUSHA16, PUSHA32,
|
||||
SHLD64rrCL, SHRD64rrCL,
|
||||
FNSTCW16m)>;
|
||||
|
||||
def AtomWrite01_9 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 9;
|
||||
let ResourceCycles = [9];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr,
|
||||
POPA16, POPA32,
|
||||
PUSHF16, PUSHF32, PUSHF64,
|
||||
SHLD64mrCL, SHRD64mrCL,
|
||||
SHLD64mri8, SHRD64mri8,
|
||||
SHLD64rri8, SHRD64rri8,
|
||||
CMPXCHG8rr)>;
|
||||
def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F",
|
||||
"(U)?COMIS(D|S)rr",
|
||||
"CVT(T)?SS2SI64rr(_Int)?")>;
|
||||
|
||||
def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 10;
|
||||
let ResourceCycles = [10];
|
||||
}
|
||||
def : SchedAlias<WriteFLDC, AtomWrite01_10>;
|
||||
def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
|
||||
"CVT(T)?SS2SI64rm(_Int)?")>;
|
||||
|
||||
def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 11;
|
||||
let ResourceCycles = [11];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_11], (instrs BOUNDS16rm, BOUNDS32rm)>;
|
||||
def : InstRW<[AtomWrite01_11], (instregex "BT(C|R|S)(16|32|64)mr")>;
|
||||
|
||||
def AtomWrite01_13 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 13;
|
||||
let ResourceCycles = [13];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_13], (instrs AAA, AAS)>;
|
||||
|
||||
def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 14;
|
||||
let ResourceCycles = [14];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
|
||||
|
||||
def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 15;
|
||||
let ResourceCycles = [15];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr)>;
|
||||
|
||||
def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 17;
|
||||
let ResourceCycles = [17];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_17], (instrs LOOPNE, PAUSE)>;
|
||||
|
||||
def AtomWrite01_18 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 18;
|
||||
let ResourceCycles = [18];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_18], (instrs CMPXCHG8B, DAA, LOOP)>;
|
||||
|
||||
def AtomWrite01_20 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 20;
|
||||
let ResourceCycles = [20];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_20], (instrs DAS)>;
|
||||
|
||||
def AtomWrite01_21 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 21;
|
||||
let ResourceCycles = [21];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_21], (instrs AAM8i8, STD)>;
|
||||
|
||||
def AtomWrite01_22 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 22;
|
||||
let ResourceCycles = [22];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_22], (instrs CMPXCHG16B)>;
|
||||
|
||||
def AtomWrite01_23 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 23;
|
||||
let ResourceCycles = [23];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_23], (instrs ARPL16mr, ARPL16rr)>;
|
||||
|
||||
def AtomWrite01_25 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 25;
|
||||
let ResourceCycles = [25];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_25], (instrs FNCLEX, FXTRACT)>;
|
||||
|
||||
def AtomWrite01_26 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 26;
|
||||
let ResourceCycles = [26];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_26], (instrs POPF32, POPF64)>;
|
||||
|
||||
def AtomWrite01_29 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 29;
|
||||
let ResourceCycles = [29];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_29], (instregex "POP(DS|ES|FS|GS)(16|32|64)")>;
|
||||
|
||||
def AtomWrite01_30 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 30;
|
||||
let ResourceCycles = [30];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_30], (instrs RDTSC, RDTSCP)>;
|
||||
|
||||
def AtomWrite01_32 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 32;
|
||||
let ResourceCycles = [32];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_32], (instrs ENTER, POPF16)>;
|
||||
|
||||
def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 45;
|
||||
let ResourceCycles = [45];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_45], (instrs MONITORrrr)>;
|
||||
|
||||
def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 46;
|
||||
let ResourceCycles = [46];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_46], (instrs FRNDINT, MWAITrr, RDPMC)>;
|
||||
|
||||
def AtomWrite01_48 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 48;
|
||||
let ResourceCycles = [48];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_48], (instrs POPSS16, POPSS32)>;
|
||||
|
||||
def AtomWrite01_55 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 55;
|
||||
let ResourceCycles = [55];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_55], (instrs FPREM)>;
|
||||
|
||||
def AtomWrite01_59 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 59;
|
||||
let ResourceCycles = [59];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_59], (instrs INSB, INSL, INSW)>;
|
||||
|
||||
def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 63;
|
||||
let ResourceCycles = [63];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_63], (instrs FNINIT)>;
|
||||
|
||||
def AtomWrite01_68 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 68;
|
||||
let ResourceCycles = [68];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_68], (instrs OUT8rr, OUT16rr, OUT32rr)>;
|
||||
|
||||
def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 71;
|
||||
let ResourceCycles = [71];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_71], (instrs FPREM1,
|
||||
INVLPG, INVLPGA32, INVLPGA64)>;
|
||||
|
||||
def AtomWrite01_72 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 72;
|
||||
let ResourceCycles = [72];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_72], (instrs OUT8ir, OUT16ir, OUT32ir)>;
|
||||
|
||||
def AtomWrite01_74 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 74;
|
||||
let ResourceCycles = [74];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_74], (instrs OUTSB, OUTSL, OUTSW)>;
|
||||
|
||||
def AtomWrite01_77 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 77;
|
||||
let ResourceCycles = [77];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_77], (instrs FSCALE)>;
|
||||
|
||||
def AtomWrite01_78 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 78;
|
||||
let ResourceCycles = [78];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_78], (instrs RDMSR)>;
|
||||
|
||||
def AtomWrite01_79 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 79;
|
||||
let ResourceCycles = [79];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_79], (instregex "RET(L|Q|W)?$",
|
||||
"LRETI?(L|Q|W)")>;
|
||||
|
||||
def AtomWrite01_92 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 92;
|
||||
let ResourceCycles = [92];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_92], (instrs IN8ri, IN16ri, IN32ri)>;
|
||||
|
||||
def AtomWrite01_94 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 94;
|
||||
let ResourceCycles = [94];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_94], (instrs IN8rr, IN16rr, IN32rr)>;
|
||||
|
||||
def AtomWrite01_99 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 99;
|
||||
let ResourceCycles = [99];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_99], (instrs F2XM1)>;
|
||||
|
||||
def AtomWrite01_121 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 121;
|
||||
let ResourceCycles = [121];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_121], (instrs CPUID)>;
|
||||
|
||||
def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 127;
|
||||
let ResourceCycles = [127];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_127], (instrs INT)>;
|
||||
|
||||
def AtomWrite01_130 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 130;
|
||||
let ResourceCycles = [130];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_130], (instrs INT3)>;
|
||||
|
||||
def AtomWrite01_140 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 140;
|
||||
let ResourceCycles = [140];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_140], (instrs FXSAVE, FXSAVE64)>;
|
||||
|
||||
def AtomWrite01_141 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 141;
|
||||
let ResourceCycles = [141];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_141], (instrs FXRSTOR, FXRSTOR64)>;
|
||||
|
||||
def AtomWrite01_146 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 146;
|
||||
let ResourceCycles = [146];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_146], (instrs FYL2X)>;
|
||||
|
||||
def AtomWrite01_147 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 147;
|
||||
let ResourceCycles = [147];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_147], (instrs FYL2XP1)>;
|
||||
|
||||
def AtomWrite01_168 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 168;
|
||||
let ResourceCycles = [168];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_168], (instrs FPTAN)>;
|
||||
|
||||
def AtomWrite01_174 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 174;
|
||||
let ResourceCycles = [174];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_174], (instrs FSINCOS)>;
|
||||
def : InstRW<[AtomWrite01_174], (instregex "(COS|SIN)_F")>;
|
||||
|
||||
def AtomWrite01_183 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 183;
|
||||
let ResourceCycles = [183];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_183], (instrs FPATAN)>;
|
||||
|
||||
def AtomWrite01_202 : SchedWriteRes<[AtomPort01]> {
|
||||
let Latency = 202;
|
||||
let ResourceCycles = [202];
|
||||
}
|
||||
def : InstRW<[AtomWrite01_202], (instrs WRMSR)>;
|
||||
|
||||
} // SchedModel
|
||||
682
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleBtVer2.td
vendored
Normal file
682
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleBtVer2.td
vendored
Normal file
@@ -0,0 +1,682 @@
|
||||
//=- X86ScheduleBtVer2.td - X86 BtVer2 (Jaguar) Scheduling ---*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the machine model for AMD btver2 (Jaguar) to support
|
||||
// instruction scheduling and other instruction cost heuristics. Based off AMD Software
|
||||
// Optimization Guide for AMD Family 16h Processors & Instruction Latency appendix.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def BtVer2Model : SchedMachineModel {
|
||||
// All x86 instructions are modeled as a single micro-op, and btver2 can
|
||||
// decode 2 instructions per cycle.
|
||||
let IssueWidth = 2;
|
||||
let MicroOpBufferSize = 64; // Retire Control Unit
|
||||
let LoadLatency = 5; // FPU latency (worse case cf Integer 3 cycle latency)
|
||||
let HighLatency = 25;
|
||||
let MispredictPenalty = 14; // Minimum branch misdirection penalty
|
||||
let PostRAScheduler = 1;
|
||||
|
||||
// FIXME: SSE4/AVX is unimplemented. This flag is set to allow
|
||||
// the scheduler to assign a default model to unrecognized opcodes.
|
||||
let CompleteModel = 0;
|
||||
}
|
||||
|
||||
let SchedModel = BtVer2Model in {
|
||||
|
||||
// Jaguar can issue up to 6 micro-ops in one cycle
|
||||
def JALU0 : ProcResource<1>; // Integer Pipe0: integer ALU0 (also handle FP->INT jam)
|
||||
def JALU1 : ProcResource<1>; // Integer Pipe1: integer ALU1/MUL/DIV
|
||||
def JLAGU : ProcResource<1>; // Integer Pipe2: LAGU
|
||||
def JSAGU : ProcResource<1>; // Integer Pipe3: SAGU (also handles 3-operand LEA)
|
||||
def JFPU0 : ProcResource<1>; // Vector/FPU Pipe0: VALU0/VIMUL/FPA
|
||||
def JFPU1 : ProcResource<1>; // Vector/FPU Pipe1: VALU1/STC/FPM
|
||||
|
||||
// The Integer PRF for Jaguar is 64 entries, and it holds the architectural and
|
||||
// speculative version of the 64-bit integer registers.
|
||||
// Reference: www.realworldtech.com/jaguar/4/
|
||||
//
|
||||
// The processor always keeps the different parts of an integer register
|
||||
// together. An instruction that writes to a part of a register will therefore
|
||||
// have a false dependence on any previous write to the same register or any
|
||||
// part of it.
|
||||
// Reference: Section 21.10 "AMD Bobcat and Jaguar pipeline: Partial register
|
||||
// access" - Agner Fog's "microarchitecture.pdf".
|
||||
def JIntegerPRF : RegisterFile<64, [GR64, CCR]>;
|
||||
|
||||
// The Jaguar FP Retire Queue renames SIMD and FP uOps onto a pool of 72 SSE
|
||||
// registers. Operations on 256-bit data types are cracked into two COPs.
|
||||
// Reference: www.realworldtech.com/jaguar/4/
|
||||
def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2]>;
|
||||
|
||||
// The retire control unit (RCU) can track up to 64 macro-ops in-flight. It can
|
||||
// retire up to two macro-ops per cycle.
|
||||
// Reference: "Software Optimization Guide for AMD Family 16h Processors"
|
||||
def JRCU : RetireControlUnit<64, 2>;
|
||||
|
||||
// Integer Pipe Scheduler
|
||||
def JALU01 : ProcResGroup<[JALU0, JALU1]> {
|
||||
let BufferSize=20;
|
||||
}
|
||||
|
||||
// AGU Pipe Scheduler
|
||||
def JLSAGU : ProcResGroup<[JLAGU, JSAGU]> {
|
||||
let BufferSize=12;
|
||||
}
|
||||
|
||||
// Fpu Pipe Scheduler
|
||||
def JFPU01 : ProcResGroup<[JFPU0, JFPU1]> {
|
||||
let BufferSize=18;
|
||||
}
|
||||
|
||||
// Functional units
|
||||
def JDiv : ProcResource<1>; // integer division
|
||||
def JMul : ProcResource<1>; // integer multiplication
|
||||
def JVALU0 : ProcResource<1>; // vector integer
|
||||
def JVALU1 : ProcResource<1>; // vector integer
|
||||
def JVIMUL : ProcResource<1>; // vector integer multiplication
|
||||
def JSTC : ProcResource<1>; // vector store/convert
|
||||
def JFPM : ProcResource<1>; // FP multiplication
|
||||
def JFPA : ProcResource<1>; // FP addition
|
||||
|
||||
// Functional unit groups
|
||||
def JFPX : ProcResGroup<[JFPA, JFPM]>;
|
||||
def JVALU : ProcResGroup<[JVALU0, JVALU1]>;
|
||||
|
||||
// Integer loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
|
||||
// cycles after the memory operand.
|
||||
def : ReadAdvance<ReadAfterLd, 3>;
|
||||
|
||||
// Many SchedWrites are defined in pairs with and without a folded load.
|
||||
// Instructions with folded loads are usually micro-fused, so they only appear
|
||||
// as two micro-ops when dispatched by the schedulers.
|
||||
// This multiclass defines the resource usage for variants with and without
|
||||
// folded loads.
|
||||
multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
|
||||
list<ProcResourceKind> ExePorts,
|
||||
int Lat, list<int> Res = [], int UOps = 1> {
|
||||
// Register variant is using a single cycle on ExePort.
|
||||
def : WriteRes<SchedRW, ExePorts> {
|
||||
let Latency = Lat;
|
||||
let ResourceCycles = Res;
|
||||
let NumMicroOps = UOps;
|
||||
}
|
||||
|
||||
// Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
|
||||
// latency.
|
||||
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
|
||||
let Latency = !add(Lat, 3);
|
||||
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
|
||||
let NumMicroOps = UOps;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
|
||||
list<ProcResourceKind> ExePorts,
|
||||
int Lat, list<int> Res = [], int UOps = 1> {
|
||||
// Register variant is using a single cycle on ExePort.
|
||||
def : WriteRes<SchedRW, ExePorts> {
|
||||
let Latency = Lat;
|
||||
let ResourceCycles = Res;
|
||||
let NumMicroOps = UOps;
|
||||
}
|
||||
|
||||
// Memory variant also uses a cycle on JLAGU and adds 5 cycles to the
|
||||
// latency.
|
||||
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
|
||||
let Latency = !add(Lat, 5);
|
||||
let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
|
||||
let NumMicroOps = UOps;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
|
||||
list<ProcResourceKind> ExePorts,
|
||||
int Lat, list<int> Res = [2], int UOps = 2> {
|
||||
// Register variant is using a single cycle on ExePort.
|
||||
def : WriteRes<SchedRW, ExePorts> {
|
||||
let Latency = Lat;
|
||||
let ResourceCycles = Res;
|
||||
let NumMicroOps = UOps;
|
||||
}
|
||||
|
||||
// Memory variant also uses 2 cycles on JLAGU and adds 5 cycles to the
|
||||
// latency.
|
||||
def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
|
||||
let Latency = !add(Lat, 5);
|
||||
let ResourceCycles = !listconcat([2], Res);
|
||||
let NumMicroOps = UOps;
|
||||
}
|
||||
}
|
||||
|
||||
// A folded store needs a cycle on the SAGU for the store data.
|
||||
def : WriteRes<WriteRMW, [JSAGU]>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Arithmetic.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
|
||||
defm : JWriteResIntPair<WriteADC, [JALU01], 1, [2]>;
|
||||
defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
|
||||
defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication
|
||||
defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
|
||||
|
||||
defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
|
||||
|
||||
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
|
||||
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
|
||||
defm : JWriteResIntPair<WriteDiv32, [JALU1, JDiv], 25, [1, 25], 2>;
|
||||
defm : JWriteResIntPair<WriteDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
|
||||
defm : JWriteResIntPair<WriteIDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
|
||||
defm : JWriteResIntPair<WriteIDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
|
||||
defm : JWriteResIntPair<WriteIDiv32, [JALU1, JDiv], 25, [1, 25], 2>;
|
||||
defm : JWriteResIntPair<WriteIDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
|
||||
|
||||
defm : JWriteResIntPair<WriteCRC32, [JALU01], 3, [4], 3>;
|
||||
|
||||
defm : JWriteResIntPair<WriteCMOV, [JALU01], 1>; // Conditional move.
|
||||
defm : JWriteResIntPair<WriteCMOV2, [JALU01], 1>; // Conditional (CF + ZF flag) move.
|
||||
defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional move.
|
||||
def : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
|
||||
def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
|
||||
def : WriteRes<WriteLAHFSAHF, [JALU01]>;
|
||||
def : WriteRes<WriteBitTest,[JALU01]>;
|
||||
|
||||
// This is for simple LEAs with one or two input operands.
|
||||
def : WriteRes<WriteLEA, [JALU01]>;
|
||||
|
||||
// Bit counts.
|
||||
defm : JWriteResIntPair<WriteBSF, [JALU01], 5, [4], 8>;
|
||||
defm : JWriteResIntPair<WriteBSR, [JALU01], 5, [4], 8>;
|
||||
defm : JWriteResIntPair<WritePOPCNT, [JALU01], 1>;
|
||||
defm : JWriteResIntPair<WriteLZCNT, [JALU01], 1>;
|
||||
defm : JWriteResIntPair<WriteTZCNT, [JALU01], 2, [2]>;
|
||||
|
||||
// BMI1 BEXTR, BMI2 BZHI
|
||||
defm : JWriteResIntPair<WriteBEXTR, [JALU01], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteBZHI>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Integer shifts and rotates.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : JWriteResIntPair<WriteShift, [JALU01], 1>;
|
||||
|
||||
// SHLD/SHRD.
|
||||
defm : X86WriteRes<WriteSHDrri, [JALU01], 3, [6], 6>;
|
||||
defm : X86WriteRes<WriteSHDrrcl,[JALU01], 4, [8], 7>;
|
||||
defm : X86WriteRes<WriteSHDmri, [JLAGU, JALU01], 9, [1, 22], 8>;
|
||||
defm : X86WriteRes<WriteSHDmrcl,[JLAGU, JALU01], 9, [1, 22], 8>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Loads, stores, and moves, not folded with other operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
|
||||
def : WriteRes<WriteStore, [JSAGU]>;
|
||||
def : WriteRes<WriteStoreNT, [JSAGU]>;
|
||||
def : WriteRes<WriteMove, [JALU01]>;
|
||||
|
||||
// Load/store MXCSR.
|
||||
// FIXME: These are copy and pasted from WriteLoad/Store.
|
||||
def : WriteRes<WriteLDMXCSR, [JLAGU]> { let Latency = 5; }
|
||||
def : WriteRes<WriteSTMXCSR, [JSAGU]>;
|
||||
|
||||
// Treat misc copies as a move.
|
||||
def : InstRW<[WriteMove], (instrs COPY)>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Idioms that clear a register, like xorps %xmm0, %xmm0.
|
||||
// These can often bypass execution ports completely.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteZero, []>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Branches don't produce values, so they have no latency, but they still
|
||||
// consume resources. Indirect branches can fold loads.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : JWriteResIntPair<WriteJump, [JALU01], 1>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Special case scheduling classes.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteSystem, [JALU01]> { let Latency = 100; }
|
||||
def : WriteRes<WriteMicrocoded, [JALU01]> { let Latency = 100; }
|
||||
def : WriteRes<WriteFence, [JSAGU]>;
|
||||
|
||||
// Nops don't have dependencies, so there's no actual latency, but we set this
|
||||
// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
|
||||
def : WriteRes<WriteNop, [JALU01]> { let Latency = 1; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Floating point. This covers both scalar and vector operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : X86WriteRes<WriteFLD0, [JFPU1, JSTC], 3, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFLD1, [JFPU1, JSTC], 3, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFLDC, [JFPU1, JSTC], 3, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFLoadX, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFLoadY, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 1, 2], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
|
||||
|
||||
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
|
||||
defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
|
||||
|
||||
defm : X86WriteRes<WriteFMove, [JFPU01, JFPX], 1, [1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFMoveX, [JFPU01, JFPX], 1, [1, 1], 1>;
|
||||
defm : X86WriteRes<WriteFMoveY, [JFPU01, JFPX], 1, [2, 2], 2>;
|
||||
|
||||
defm : X86WriteRes<WriteEMMS, [JFPU01, JFPX], 2, [1, 1], 1>;
|
||||
|
||||
defm : JWriteResFpuPair<WriteFAdd, [JFPU0, JFPA], 3>;
|
||||
defm : JWriteResFpuPair<WriteFAddX, [JFPU0, JFPA], 3>;
|
||||
defm : JWriteResYMMPair<WriteFAddY, [JFPU0, JFPA], 3, [2,2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFAddZ>;
|
||||
defm : JWriteResFpuPair<WriteFAdd64, [JFPU0, JFPA], 3>;
|
||||
defm : JWriteResFpuPair<WriteFAdd64X, [JFPU0, JFPA], 3>;
|
||||
defm : JWriteResYMMPair<WriteFAdd64Y, [JFPU0, JFPA], 3, [2,2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
|
||||
defm : JWriteResFpuPair<WriteFCmp, [JFPU0, JFPA], 2>;
|
||||
defm : JWriteResFpuPair<WriteFCmpX, [JFPU0, JFPA], 2>;
|
||||
defm : JWriteResYMMPair<WriteFCmpY, [JFPU0, JFPA], 2, [2,2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
|
||||
defm : JWriteResFpuPair<WriteFCmp64, [JFPU0, JFPA], 2>;
|
||||
defm : JWriteResFpuPair<WriteFCmp64X, [JFPU0, JFPA], 2>;
|
||||
defm : JWriteResYMMPair<WriteFCmp64Y, [JFPU0, JFPA], 2, [2,2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
|
||||
defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>;
|
||||
defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>;
|
||||
defm : JWriteResFpuPair<WriteFMulX, [JFPU1, JFPM], 2>;
|
||||
defm : JWriteResYMMPair<WriteFMulY, [JFPU1, JFPM], 2, [2,2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMulZ>;
|
||||
defm : JWriteResFpuPair<WriteFMul64, [JFPU1, JFPM], 4, [1,2]>;
|
||||
defm : JWriteResFpuPair<WriteFMul64X, [JFPU1, JFPM], 4, [1,2]>;
|
||||
defm : JWriteResYMMPair<WriteFMul64Y, [JFPU1, JFPM], 4, [2,4], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMA>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMAX>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMAY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMAZ>;
|
||||
defm : JWriteResFpuPair<WriteDPPD, [JFPU1, JFPM, JFPA], 9, [1, 3, 3], 3>;
|
||||
defm : JWriteResFpuPair<WriteDPPS, [JFPU1, JFPM, JFPA], 11, [1, 3, 3], 5>;
|
||||
defm : JWriteResYMMPair<WriteDPPSY, [JFPU1, JFPM, JFPA], 12, [2, 6, 6], 10>;
|
||||
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
|
||||
defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>;
|
||||
defm : JWriteResFpuPair<WriteFRcpX, [JFPU1, JFPM], 2>;
|
||||
defm : JWriteResYMMPair<WriteFRcpY, [JFPU1, JFPM], 2, [2,2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
|
||||
defm : JWriteResFpuPair<WriteFRsqrt, [JFPU1, JFPM], 2>;
|
||||
defm : JWriteResFpuPair<WriteFRsqrtX, [JFPU1, JFPM], 2>;
|
||||
defm : JWriteResYMMPair<WriteFRsqrtY, [JFPU1, JFPM], 2, [2,2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
|
||||
defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>;
|
||||
defm : JWriteResFpuPair<WriteFDivX, [JFPU1, JFPM], 19, [1, 19]>;
|
||||
defm : JWriteResYMMPair<WriteFDivY, [JFPU1, JFPM], 38, [2, 38], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFDivZ>;
|
||||
defm : JWriteResFpuPair<WriteFDiv64, [JFPU1, JFPM], 19, [1, 19]>;
|
||||
defm : JWriteResFpuPair<WriteFDiv64X, [JFPU1, JFPM], 19, [1, 19]>;
|
||||
defm : JWriteResYMMPair<WriteFDiv64Y, [JFPU1, JFPM], 38, [2, 38], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
|
||||
defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>;
|
||||
defm : JWriteResFpuPair<WriteFSqrtX, [JFPU1, JFPM], 21, [1, 21]>;
|
||||
defm : JWriteResYMMPair<WriteFSqrtY, [JFPU1, JFPM], 42, [2, 42], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
|
||||
defm : JWriteResFpuPair<WriteFSqrt64, [JFPU1, JFPM], 27, [1, 27]>;
|
||||
defm : JWriteResFpuPair<WriteFSqrt64X, [JFPU1, JFPM], 27, [1, 27]>;
|
||||
defm : JWriteResYMMPair<WriteFSqrt64Y, [JFPU1, JFPM], 54, [2, 54], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
|
||||
defm : JWriteResFpuPair<WriteFSqrt80, [JFPU1, JFPM], 35, [1, 35]>;
|
||||
defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>;
|
||||
defm : JWriteResFpuPair<WriteFRnd, [JFPU1, JSTC], 3>;
|
||||
defm : JWriteResYMMPair<WriteFRndY, [JFPU1, JSTC], 3, [2,2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRndZ>;
|
||||
defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>;
|
||||
defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
|
||||
defm : JWriteResFpuPair<WriteFTest, [JFPU0, JFPA, JALU0], 3>;
|
||||
defm : JWriteResYMMPair<WriteFTestY , [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteFTestZ>;
|
||||
defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>;
|
||||
defm : JWriteResYMMPair<WriteFShuffleY, [JFPU01, JFPX], 1, [2, 2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
|
||||
defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>;
|
||||
defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX], 3, [2, 6], 6>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
|
||||
defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>;
|
||||
defm : JWriteResYMMPair<WriteFBlendY, [JFPU01, JFPX], 1, [2, 2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
|
||||
defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>;
|
||||
defm : JWriteResYMMPair<WriteFVarBlendY, [JFPU01, JFPX], 3, [2, 6], 6>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
|
||||
defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Conversions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : JWriteResFpuPair<WriteCvtSS2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
|
||||
defm : JWriteResFpuPair<WriteCvtPS2I, [JFPU1, JSTC], 3, [1,1], 1>;
|
||||
defm : JWriteResYMMPair<WriteCvtPS2IY, [JFPU1, JSTC], 3, [2,2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
|
||||
defm : JWriteResFpuPair<WriteCvtSD2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
|
||||
defm : JWriteResFpuPair<WriteCvtPD2I, [JFPU1, JSTC], 3, [1,1], 1>;
|
||||
defm : JWriteResYMMPair<WriteCvtPD2IY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
|
||||
|
||||
// FIXME: f+3 ST, LD+STC latency
|
||||
defm : JWriteResFpuPair<WriteCvtI2SS, [JFPU1, JSTC], 9, [1,1], 2>;
|
||||
defm : JWriteResFpuPair<WriteCvtI2PS, [JFPU1, JSTC], 3, [1,1], 1>;
|
||||
defm : JWriteResYMMPair<WriteCvtI2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
|
||||
defm : JWriteResFpuPair<WriteCvtI2SD, [JFPU1, JSTC], 9, [1,1], 2>;
|
||||
defm : JWriteResFpuPair<WriteCvtI2PD, [JFPU1, JSTC], 3, [1,1], 1>;
|
||||
defm : JWriteResYMMPair<WriteCvtI2PDY, [JFPU1, JSTC], 3, [2,2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
|
||||
|
||||
defm : JWriteResFpuPair<WriteCvtSS2SD, [JFPU1, JSTC], 7, [1,2], 2>;
|
||||
defm : JWriteResFpuPair<WriteCvtPS2PD, [JFPU1, JSTC], 2, [1,1], 1>;
|
||||
defm : JWriteResYMMPair<WriteCvtPS2PDY, [JFPU1, JSTC], 2, [2,2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
|
||||
|
||||
defm : JWriteResFpuPair<WriteCvtSD2SS, [JFPU1, JSTC], 7, [1,2], 2>;
|
||||
defm : JWriteResFpuPair<WriteCvtPD2PS, [JFPU1, JSTC], 3, [1,1], 1>;
|
||||
defm : JWriteResYMMPair<WriteCvtPD2PSY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
|
||||
|
||||
defm : JWriteResFpuPair<WriteCvtPH2PS, [JFPU1, JSTC], 3, [1,1], 1>;
|
||||
defm : JWriteResYMMPair<WriteCvtPH2PSY, [JFPU1, JSTC], 3, [2,2], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
|
||||
|
||||
defm : X86WriteRes<WriteCvtPS2PH, [JFPU1, JSTC], 3, [1,1], 1>;
|
||||
defm : X86WriteRes<WriteCvtPS2PHY, [JFPU1, JSTC, JFPX], 6, [2,2,2], 3>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
|
||||
defm : X86WriteRes<WriteCvtPS2PHSt, [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>;
|
||||
defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Vector integer operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
|
||||
|
||||
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
|
||||
defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
|
||||
|
||||
defm : X86WriteRes<WriteVecMove, [JFPU01, JVALU], 1, [1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecMoveX, [JFPU01, JVALU], 1, [1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecMoveY, [JFPU01, JVALU], 1, [2, 2], 2>;
|
||||
defm : X86WriteRes<WriteVecMoveToGpr, [JFPU0, JFPA, JALU0], 4, [1, 1, 1], 1>;
|
||||
defm : X86WriteRes<WriteVecMoveFromGpr, [JFPU01, JFPX], 8, [1, 1], 2>;
|
||||
|
||||
defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>;
|
||||
defm : JWriteResFpuPair<WriteVecALUX, [JFPU01, JVALU], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecALUY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
|
||||
defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>;
|
||||
defm : JWriteResFpuPair<WriteVecShiftX, [JFPU01, JVALU], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecShiftY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
|
||||
defm : JWriteResFpuPair<WriteVecShiftImm, [JFPU01, JVALU], 1>;
|
||||
defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarVecShift>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
|
||||
defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>;
|
||||
defm : JWriteResFpuPair<WriteVecIMulX, [JFPU0, JVIMUL], 2>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecIMulY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
|
||||
defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
|
||||
defm : X86WriteResPairUnsupported<WritePMULLDY>;
|
||||
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
|
||||
defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>;
|
||||
defm : X86WriteResPairUnsupported<WriteMPSADY>;
|
||||
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
|
||||
defm : JWriteResFpuPair<WritePSADBW, [JFPU01, JVALU], 2>;
|
||||
defm : JWriteResFpuPair<WritePSADBWX, [JFPU01, JVALU], 2>;
|
||||
defm : X86WriteResPairUnsupported<WritePSADBWY>;
|
||||
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
|
||||
defm : JWriteResFpuPair<WritePHMINPOS, [JFPU0, JVALU], 2>;
|
||||
defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>;
|
||||
defm : JWriteResFpuPair<WriteShuffleX, [JFPU01, JVALU], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteShuffleY>;
|
||||
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
|
||||
defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>;
|
||||
defm : JWriteResFpuPair<WriteVarShuffleX, [JFPU01, JVALU], 2, [1, 4], 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
|
||||
defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteBlendY>;
|
||||
defm : X86WriteResPairUnsupported<WriteBlendZ>;
|
||||
defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
|
||||
defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>;
|
||||
defm : JWriteResFpuPair<WriteVecLogicX, [JFPU01, JVALU], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecLogicY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
|
||||
defm : JWriteResFpuPair<WriteVecTest, [JFPU0, JFPA, JALU0], 3>;
|
||||
defm : JWriteResYMMPair<WriteVecTestY, [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
|
||||
defm : X86WriteResPairUnsupported<WriteShuffle256>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Vector insert/extract operations.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : X86WriteRes<WriteVecInsert, [JFPU01, JVALU], 7, [1,1], 2>;
|
||||
defm : X86WriteRes<WriteVecInsertLd, [JFPU01, JVALU, JLAGU], 4, [1,1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecExtract, [JFPU0, JFPA, JALU0], 3, [1,1,1], 1>;
|
||||
defm : X86WriteRes<WriteVecExtractSt, [JFPU1, JSTC, JSAGU], 3, [1,1,1], 1>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// SSE42 String instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : JWriteResFpuPair<WritePCmpIStrI, [JFPU1, JVALU1, JFPA, JALU0], 7, [1, 2, 1, 1], 3>;
|
||||
defm : JWriteResFpuPair<WritePCmpIStrM, [JFPU1, JVALU1, JFPA, JALU0], 8, [1, 2, 1, 1], 3>;
|
||||
defm : JWriteResFpuPair<WritePCmpEStrI, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
|
||||
defm : JWriteResFpuPair<WritePCmpEStrM, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// MOVMSK Instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def : WriteRes<WriteFMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
|
||||
defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
|
||||
def : WriteRes<WriteMMXMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// AES Instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : JWriteResFpuPair<WriteAESIMC, [JFPU0, JVIMUL], 2>;
|
||||
defm : JWriteResFpuPair<WriteAESKeyGen, [JFPU0, JVIMUL], 2>;
|
||||
defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU0, JVIMUL], 3, [1, 1], 2>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Horizontal add/sub instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : JWriteResFpuPair<WriteFHAdd, [JFPU0, JFPA], 3>;
|
||||
defm : JWriteResYMMPair<WriteFHAddY, [JFPU0, JFPA], 3, [2,2], 2>;
|
||||
defm : JWriteResFpuPair<WritePHAdd, [JFPU01, JVALU], 1>;
|
||||
defm : JWriteResFpuPair<WritePHAddX, [JFPU01, JVALU], 1>;
|
||||
defm : X86WriteResPairUnsupported<WritePHAddY>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Carry-less multiplication instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : JWriteResFpuPair<WriteCLMul, [JFPU0, JVIMUL], 2>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// SSE4A instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def JWriteINSERTQ: SchedWriteRes<[JFPU01, JVALU]> {
|
||||
let Latency = 2;
|
||||
let ResourceCycles = [1, 4];
|
||||
}
|
||||
def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// AVX instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
|
||||
let Latency = 6;
|
||||
let ResourceCycles = [1, 2, 4];
|
||||
let NumMicroOps = 2;
|
||||
}
|
||||
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
|
||||
VBROADCASTSSYrm)>;
|
||||
|
||||
def JWriteJVZEROALL: SchedWriteRes<[]> {
|
||||
let Latency = 90;
|
||||
let NumMicroOps = 73;
|
||||
}
|
||||
def : InstRW<[JWriteJVZEROALL], (instrs VZEROALL)>;
|
||||
|
||||
def JWriteJVZEROUPPER: SchedWriteRes<[]> {
|
||||
let Latency = 46;
|
||||
let NumMicroOps = 37;
|
||||
}
|
||||
def : InstRW<[JWriteJVZEROUPPER], (instrs VZEROUPPER)>;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// SchedWriteVariant definitions.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
def JWriteZeroLatency : SchedWriteRes<[]> {
|
||||
let Latency = 0;
|
||||
}
|
||||
|
||||
// Certain instructions that use the same register for both source
|
||||
// operands do not have a real dependency on the previous contents of the
|
||||
// register, and thus, do not have to wait before completing. They can be
|
||||
// optimized out at register renaming stage.
|
||||
// Reference: Section 10.8 of the "Software Optimization Guide for AMD Family
|
||||
// 15h Processors".
|
||||
// Reference: Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
|
||||
// Section 21.8 [Dependency-breaking instructions].
|
||||
|
||||
def JWriteZeroIdiom : SchedWriteVariant<[
|
||||
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
|
||||
SchedVar<MCSchedPredicate<TruePred>, [WriteALU]>
|
||||
]>;
|
||||
def : InstRW<[JWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
|
||||
XOR32rr, XOR64rr)>;
|
||||
|
||||
def JWriteFZeroIdiom : SchedWriteVariant<[
|
||||
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
|
||||
SchedVar<MCSchedPredicate<TruePred>, [WriteFLogic]>
|
||||
]>;
|
||||
def : InstRW<[JWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, VXORPDrr,
|
||||
ANDNPSrr, VANDNPSrr,
|
||||
ANDNPDrr, VANDNPDrr)>;
|
||||
|
||||
def JWriteVZeroIdiomLogic : SchedWriteVariant<[
|
||||
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
|
||||
SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogic]>
|
||||
]>;
|
||||
def : InstRW<[JWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
|
||||
|
||||
def JWriteVZeroIdiomLogicX : SchedWriteVariant<[
|
||||
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
|
||||
SchedVar<MCSchedPredicate<TruePred>, [WriteVecLogicX]>
|
||||
]>;
|
||||
def : InstRW<[JWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
|
||||
PANDNrr, VPANDNrr)>;
|
||||
|
||||
def JWriteVZeroIdiomALU : SchedWriteVariant<[
|
||||
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
|
||||
SchedVar<MCSchedPredicate<TruePred>, [WriteVecALU]>
|
||||
]>;
|
||||
def : InstRW<[JWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr,
|
||||
MMX_PSUBQirr, MMX_PSUBWirr,
|
||||
MMX_PCMPGTBirr, MMX_PCMPGTDirr,
|
||||
MMX_PCMPGTWirr)>;
|
||||
|
||||
def JWriteVZeroIdiomALUX : SchedWriteVariant<[
|
||||
SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
|
||||
SchedVar<MCSchedPredicate<TruePred>, [WriteVecALUX]>
|
||||
]>;
|
||||
def : InstRW<[JWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
|
||||
PSUBDrr, VPSUBDrr,
|
||||
PSUBQrr, VPSUBQrr,
|
||||
PSUBWrr, VPSUBWrr,
|
||||
PCMPGTBrr, VPCMPGTBrr,
|
||||
PCMPGTDrr, VPCMPGTDrr,
|
||||
PCMPGTQrr, VPCMPGTQrr,
|
||||
PCMPGTWrr, VPCMPGTWrr)>;
|
||||
|
||||
// This write is used for slow LEA instructions.
|
||||
def JWrite3OpsLEA : SchedWriteRes<[JALU1, JSAGU]> {
|
||||
let Latency = 2;
|
||||
}
|
||||
|
||||
// On Jaguar, a slow LEA is either a 3Ops LEA (base, index, offset), or an LEA
|
||||
// with a `Scale` value different than 1.
|
||||
def JSlowLEAPredicate : MCSchedPredicate<
|
||||
CheckAny<[
|
||||
// A 3-operand LEA (base, index, offset).
|
||||
IsThreeOperandsLEAFn,
|
||||
// An LEA with a "Scale" different than 1.
|
||||
CheckAll<[
|
||||
CheckIsImmOperand<2>,
|
||||
CheckNot<CheckImmOperand<2, 1>>
|
||||
]>
|
||||
]>
|
||||
>;
|
||||
|
||||
def JWriteLEA : SchedWriteVariant<[
|
||||
SchedVar<JSlowLEAPredicate, [JWrite3OpsLEA]>,
|
||||
SchedVar<MCSchedPredicate<TruePred>, [WriteLEA]>
|
||||
]>;
|
||||
|
||||
def : InstRW<[JWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
|
||||
|
||||
def JSlowLEA16r : SchedWriteRes<[JALU01]> {
|
||||
let Latency = 3;
|
||||
let ResourceCycles = [4];
|
||||
}
|
||||
|
||||
def : InstRW<[JSlowLEA16r], (instrs LEA16r)>;
|
||||
|
||||
} // SchedModel
|
||||
486
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleSLM.td
vendored
Normal file
486
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleSLM.td
vendored
Normal file
@@ -0,0 +1,486 @@
|
||||
//=- X86ScheduleSLM.td - X86 Silvermont Scheduling -----------*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the machine model for Intel Silvermont to support
|
||||
// instruction scheduling and other instruction cost heuristics.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def SLMModel : SchedMachineModel {
|
||||
// All x86 instructions are modeled as a single micro-op, and SLM can decode 2
|
||||
// instructions per cycle.
|
||||
let IssueWidth = 2;
|
||||
let MicroOpBufferSize = 32; // Based on the reorder buffer.
|
||||
let LoadLatency = 3;
|
||||
let MispredictPenalty = 10;
|
||||
let PostRAScheduler = 1;
|
||||
|
||||
// For small loops, expand by a small factor to hide the backedge cost.
|
||||
let LoopMicroOpBufferSize = 10;
|
||||
|
||||
// FIXME: SSE4 is unimplemented. This flag is set to allow
|
||||
// the scheduler to assign a default model to unrecognized opcodes.
|
||||
let CompleteModel = 0;
|
||||
}
|
||||
|
||||
let SchedModel = SLMModel in {
|
||||
|
||||
// Silvermont has 5 reservation stations for micro-ops
|
||||
def SLM_IEC_RSV0 : ProcResource<1>;
|
||||
def SLM_IEC_RSV1 : ProcResource<1>;
|
||||
def SLM_FPC_RSV0 : ProcResource<1> { let BufferSize = 1; }
|
||||
def SLM_FPC_RSV1 : ProcResource<1> { let BufferSize = 1; }
|
||||
def SLM_MEC_RSV : ProcResource<1>;
|
||||
|
||||
// Many micro-ops are capable of issuing on multiple ports.
|
||||
def SLM_IEC_RSV01 : ProcResGroup<[SLM_IEC_RSV0, SLM_IEC_RSV1]>;
|
||||
def SLM_FPC_RSV01 : ProcResGroup<[SLM_FPC_RSV0, SLM_FPC_RSV1]>;
|
||||
|
||||
def SLMDivider : ProcResource<1>;
|
||||
def SLMFPMultiplier : ProcResource<1>;
|
||||
def SLMFPDivider : ProcResource<1>;
|
||||
|
||||
// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
|
||||
// cycles after the memory operand.
|
||||
def : ReadAdvance<ReadAfterLd, 3>;
|
||||
|
||||
// Many SchedWrites are defined in pairs with and without a folded load.
|
||||
// Instructions with folded loads are usually micro-fused, so they only appear
|
||||
// as two micro-ops when queued in the reservation station.
|
||||
// This multiclass defines the resource usage for variants with and without
|
||||
// folded loads.
|
||||
multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
|
||||
list<ProcResourceKind> ExePorts,
|
||||
int Lat, list<int> Res = [1], int UOps = 1,
|
||||
int LoadLat = 3> {
|
||||
// Register variant is using a single cycle on ExePort.
|
||||
def : WriteRes<SchedRW, ExePorts> {
|
||||
let Latency = Lat;
|
||||
let ResourceCycles = Res;
|
||||
let NumMicroOps = UOps;
|
||||
}
|
||||
|
||||
// Memory variant also uses a cycle on MEC_RSV and adds LoadLat cycles to
|
||||
// the latency (default = 3).
|
||||
def : WriteRes<SchedRW.Folded, !listconcat([SLM_MEC_RSV], ExePorts)> {
|
||||
let Latency = !add(Lat, LoadLat);
|
||||
let ResourceCycles = !listconcat([1], Res);
|
||||
let NumMicroOps = UOps;
|
||||
}
|
||||
}
|
||||
|
||||
// A folded store needs a cycle on MEC_RSV for the store data, but it does not
|
||||
// need an extra port cycle to recompute the address.
|
||||
def : WriteRes<WriteRMW, [SLM_MEC_RSV]>;
|
||||
|
||||
def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteStoreNT, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteMove, [SLM_IEC_RSV01]>;
|
||||
def : WriteRes<WriteZero, []>;
|
||||
|
||||
// Load/store MXCSR.
|
||||
// FIXME: These are probably wrong. They are copy pasted from WriteStore/Load.
|
||||
def : WriteRes<WriteSTMXCSR, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteLDMXCSR, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
|
||||
// Treat misc copies as a move.
|
||||
def : InstRW<[WriteMove], (instrs COPY)>;
|
||||
|
||||
defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteADC, [SLM_IEC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
|
||||
|
||||
defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
|
||||
|
||||
defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
|
||||
|
||||
defm : X86WriteRes<WriteSHDrri, [SLM_IEC_RSV0], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteSHDrrcl,[SLM_IEC_RSV0], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteSHDmri, [SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
|
||||
defm : X86WriteRes<WriteSHDmrcl,[SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
|
||||
|
||||
defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>;
|
||||
defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>;
|
||||
|
||||
defm : SLMWriteResPair<WriteCMOV, [SLM_IEC_RSV01], 2, [2]>;
|
||||
defm : SLMWriteResPair<WriteCMOV2, [SLM_IEC_RSV01], 2, [2]>;
|
||||
defm : X86WriteRes<WriteFCMOV, [SLM_FPC_RSV1], 3, [1], 1>; // x87 conditional move.
|
||||
def : WriteRes<WriteSETCC, [SLM_IEC_RSV01]>;
|
||||
def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
|
||||
// FIXME Latency and NumMicrOps?
|
||||
let ResourceCycles = [2,1];
|
||||
}
|
||||
def : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
|
||||
def : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
|
||||
|
||||
// This is for simple LEAs with one or two input operands.
|
||||
// The complex ones can only execute on port 1, and they require two cycles on
|
||||
// the port to read all inputs. We don't model that.
|
||||
def : WriteRes<WriteLEA, [SLM_IEC_RSV1]>;
|
||||
|
||||
// Bit counts.
|
||||
defm : SLMWriteResPair<WriteBSF, [SLM_IEC_RSV01], 10, [20], 10>;
|
||||
defm : SLMWriteResPair<WriteBSR, [SLM_IEC_RSV01], 10, [20], 10>;
|
||||
defm : SLMWriteResPair<WriteLZCNT, [SLM_IEC_RSV0], 3>;
|
||||
defm : SLMWriteResPair<WriteTZCNT, [SLM_IEC_RSV0], 3>;
|
||||
defm : SLMWriteResPair<WritePOPCNT, [SLM_IEC_RSV0], 3>;
|
||||
|
||||
// BMI1 BEXTR, BMI2 BZHI
|
||||
defm : X86WriteResPairUnsupported<WriteBEXTR>;
|
||||
defm : X86WriteResPairUnsupported<WriteBZHI>;
|
||||
|
||||
defm : SLMWriteResPair<WriteDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
|
||||
defm : SLMWriteResPair<WriteDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
|
||||
defm : SLMWriteResPair<WriteDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
|
||||
defm : SLMWriteResPair<WriteDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
|
||||
defm : SLMWriteResPair<WriteIDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
|
||||
defm : SLMWriteResPair<WriteIDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
|
||||
defm : SLMWriteResPair<WriteIDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
|
||||
defm : SLMWriteResPair<WriteIDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
|
||||
|
||||
// Scalar and vector floating point.
|
||||
defm : X86WriteRes<WriteFLD0, [SLM_FPC_RSV01], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteFLD1, [SLM_FPC_RSV01], 1, [1], 1>;
|
||||
defm : X86WriteRes<WriteFLDC, [SLM_FPC_RSV01], 1, [2], 2>;
|
||||
def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFLoadX, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFStore, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFStoreX, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFStoreY, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFStoreNT, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFStoreNTX, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFStoreNTY, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFMaskedStore, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
|
||||
def : WriteRes<WriteFMoveX, [SLM_FPC_RSV01]>;
|
||||
def : WriteRes<WriteFMoveY, [SLM_FPC_RSV01]>;
|
||||
defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>;
|
||||
|
||||
defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFAddY, [SLM_FPC_RSV1], 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteFAddZ>;
|
||||
defm : SLMWriteResPair<WriteFAdd64, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFAdd64X, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFAdd64Y, [SLM_FPC_RSV1], 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
|
||||
defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFCmpX, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFCmpY, [SLM_FPC_RSV1], 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
|
||||
defm : SLMWriteResPair<WriteFCmp64, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFCmp64X, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFCmp64Y, [SLM_FPC_RSV1], 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
|
||||
defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
|
||||
defm : SLMWriteResPair<WriteFMulX, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
|
||||
defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMulZ>;
|
||||
defm : SLMWriteResPair<WriteFMul64, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
|
||||
defm : SLMWriteResPair<WriteFMul64X, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
|
||||
defm : SLMWriteResPair<WriteFMul64Y, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
|
||||
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
|
||||
defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
|
||||
defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFDivZ>;
|
||||
defm : SLMWriteResPair<WriteFDiv64, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
|
||||
defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
|
||||
defm : SLMWriteResPair<WriteFDiv64Y, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
|
||||
defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
|
||||
defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 5>;
|
||||
defm : SLMWriteResPair<WriteFRsqrtY, [SLM_FPC_RSV0], 5>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
|
||||
defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20], 1, 3>;
|
||||
defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
|
||||
defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
|
||||
defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35], 1, 3>;
|
||||
defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
|
||||
defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
|
||||
defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
|
||||
defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
|
||||
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
|
||||
defm : SLMWriteResPair<WriteFRndY, [SLM_FPC_RSV1], 3>;
|
||||
defm : X86WriteResPairUnsupported<WriteFRndZ>;
|
||||
defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
|
||||
defm : SLMWriteResPair<WriteFTest, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteFTestY, [SLM_FPC_RSV01], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteFTestZ>;
|
||||
defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteFShuffleY, [SLM_FPC_RSV0], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
|
||||
defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
|
||||
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
|
||||
|
||||
// Conversion between integer and float.
|
||||
defm : SLMWriteResPair<WriteCvtSS2I, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtPS2I, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtPS2IY, [SLM_FPC_RSV01], 4>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
|
||||
defm : SLMWriteResPair<WriteCvtSD2I, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtPD2I, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtPD2IY, [SLM_FPC_RSV01], 4>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
|
||||
|
||||
defm : SLMWriteResPair<WriteCvtI2SS, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtI2PS, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtI2PSY, [SLM_FPC_RSV01], 4>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
|
||||
defm : SLMWriteResPair<WriteCvtI2SD, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtI2PD, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtI2PDY, [SLM_FPC_RSV01], 4>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
|
||||
|
||||
defm : SLMWriteResPair<WriteCvtSS2SD, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtPS2PD, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV01], 4>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
|
||||
defm : SLMWriteResPair<WriteCvtSD2SS, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtPD2PS, [SLM_FPC_RSV01], 4>;
|
||||
defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV01], 4>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
|
||||
|
||||
// Vector integer operations.
|
||||
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecLoadX, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecLoadNT, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecLoadNTY, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
|
||||
def : WriteRes<WriteVecStore, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecStoreX, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecStoreY, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecStoreNT, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecStoreNTY, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecMaskedStore, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
|
||||
def : WriteRes<WriteVecMoveX, [SLM_FPC_RSV01]>;
|
||||
def : WriteRes<WriteVecMoveY, [SLM_FPC_RSV01]>;
|
||||
def : WriteRes<WriteVecMoveToGpr, [SLM_IEC_RSV01]>;
|
||||
def : WriteRes<WriteVecMoveFromGpr, [SLM_IEC_RSV01]>;
|
||||
|
||||
defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteVecShiftX, [SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteVecShiftY, [SLM_FPC_RSV0], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
|
||||
defm : SLMWriteResPair<WriteVecShiftImm, [SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteVecShiftImmX,[SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteVecShiftImmY,[SLM_FPC_RSV0], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
|
||||
defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteVecLogicX,[SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
|
||||
defm : SLMWriteResPair<WriteVecTest, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteVecTestY, [SLM_FPC_RSV01], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
|
||||
defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteVecALUX, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
|
||||
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
|
||||
defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 4>;
|
||||
defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 4>;
|
||||
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
|
||||
// FIXME: The below is closer to correct, but caused some perf regressions.
|
||||
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
|
||||
defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>;
|
||||
defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0], 4>;
|
||||
defm : X86WriteResPairUnsupported<WritePMULLDZ>;
|
||||
defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
|
||||
defm : SLMWriteResPair<WriteShuffleX, [SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
|
||||
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
|
||||
defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteBlendZ>;
|
||||
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
|
||||
defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0], 7>;
|
||||
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
|
||||
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
|
||||
defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0], 4>;
|
||||
defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0], 4>;
|
||||
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
|
||||
defm : SLMWriteResPair<WritePHMINPOS, [SLM_FPC_RSV0], 4>;
|
||||
|
||||
// Vector insert/extract operations.
|
||||
defm : SLMWriteResPair<WriteVecInsert, [SLM_FPC_RSV0], 1>;
|
||||
|
||||
def : WriteRes<WriteVecExtract, [SLM_FPC_RSV0]>;
|
||||
def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
|
||||
let Latency = 4;
|
||||
let NumMicroOps = 2;
|
||||
let ResourceCycles = [1, 2];
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Horizontal add/sub instructions.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>;
|
||||
defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFHAddZ>;
|
||||
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>;
|
||||
defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>;
|
||||
defm : X86WriteResPairUnsupported<WritePHAddZ>;
|
||||
|
||||
// String instructions.
|
||||
// Packed Compare Implicit Length Strings, Return Mask
|
||||
def : WriteRes<WritePCmpIStrM, [SLM_FPC_RSV0]> {
|
||||
let Latency = 13;
|
||||
let ResourceCycles = [13];
|
||||
}
|
||||
def : WriteRes<WritePCmpIStrMLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
|
||||
let Latency = 13;
|
||||
let ResourceCycles = [13, 1];
|
||||
}
|
||||
|
||||
// Packed Compare Explicit Length Strings, Return Mask
|
||||
def : WriteRes<WritePCmpEStrM, [SLM_FPC_RSV0]> {
|
||||
let Latency = 17;
|
||||
let ResourceCycles = [17];
|
||||
}
|
||||
def : WriteRes<WritePCmpEStrMLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
|
||||
let Latency = 17;
|
||||
let ResourceCycles = [17, 1];
|
||||
}
|
||||
|
||||
// Packed Compare Implicit Length Strings, Return Index
|
||||
def : WriteRes<WritePCmpIStrI, [SLM_FPC_RSV0]> {
|
||||
let Latency = 17;
|
||||
let ResourceCycles = [17];
|
||||
}
|
||||
def : WriteRes<WritePCmpIStrILd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
|
||||
let Latency = 17;
|
||||
let ResourceCycles = [17, 1];
|
||||
}
|
||||
|
||||
// Packed Compare Explicit Length Strings, Return Index
|
||||
def : WriteRes<WritePCmpEStrI, [SLM_FPC_RSV0]> {
|
||||
let Latency = 21;
|
||||
let ResourceCycles = [21];
|
||||
}
|
||||
def : WriteRes<WritePCmpEStrILd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
|
||||
let Latency = 21;
|
||||
let ResourceCycles = [21, 1];
|
||||
}
|
||||
|
||||
// MOVMSK Instructions.
|
||||
def : WriteRes<WriteFMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVecMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
|
||||
def : WriteRes<WriteVecMOVMSKY, [SLM_FPC_RSV1]> { let Latency = 4; }
|
||||
def : WriteRes<WriteMMXMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
|
||||
|
||||
// AES Instructions.
|
||||
def : WriteRes<WriteAESDecEnc, [SLM_FPC_RSV0]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [5];
|
||||
}
|
||||
def : WriteRes<WriteAESDecEncLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [5, 1];
|
||||
}
|
||||
|
||||
def : WriteRes<WriteAESIMC, [SLM_FPC_RSV0]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [5];
|
||||
}
|
||||
def : WriteRes<WriteAESIMCLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [5, 1];
|
||||
}
|
||||
|
||||
def : WriteRes<WriteAESKeyGen, [SLM_FPC_RSV0]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [5];
|
||||
}
|
||||
def : WriteRes<WriteAESKeyGenLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
|
||||
let Latency = 8;
|
||||
let ResourceCycles = [5, 1];
|
||||
}
|
||||
|
||||
// Carry-less multiplication instructions.
|
||||
def : WriteRes<WriteCLMul, [SLM_FPC_RSV0]> {
|
||||
let Latency = 10;
|
||||
let ResourceCycles = [10];
|
||||
}
|
||||
def : WriteRes<WriteCLMulLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
|
||||
let Latency = 10;
|
||||
let ResourceCycles = [10, 1];
|
||||
}
|
||||
|
||||
def : WriteRes<WriteSystem, [SLM_FPC_RSV0]> { let Latency = 100; }
|
||||
def : WriteRes<WriteMicrocoded, [SLM_FPC_RSV0]> { let Latency = 100; }
|
||||
def : WriteRes<WriteFence, [SLM_MEC_RSV]>;
|
||||
def : WriteRes<WriteNop, []>;
|
||||
|
||||
// AVX/FMA is not supported on that architecture, but we should define the basic
|
||||
// scheduling resources anyway.
|
||||
def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
|
||||
defm : X86WriteResPairUnsupported<WriteFBlendY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
|
||||
defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
|
||||
defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
|
||||
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
|
||||
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
|
||||
defm : X86WriteResPairUnsupported<WriteShuffle256>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
|
||||
defm : SLMWriteResPair<WriteVarVecShift, [SLM_FPC_RSV0], 1>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
|
||||
defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMA>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMAX>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMAY>;
|
||||
defm : X86WriteResPairUnsupported<WriteFMAZ>;
|
||||
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
|
||||
defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PH>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
|
||||
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
|
||||
|
||||
} // SchedModel
|
||||
1544
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleZnver1.td
vendored
Normal file
1544
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleZnver1.td
vendored
Normal file
File diff suppressed because it is too large
Load Diff
459
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86_reduce.td
vendored
Normal file
459
thirdparty/capstone/suite/synctools/tablegen/X86/back/X86_reduce.td
vendored
Normal file
@@ -0,0 +1,459 @@
|
||||
//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This is a target description file for the Intel i386 architecture, referred
|
||||
// to here as the "X86" architecture.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Get the target-independent interfaces which we are implementing...
|
||||
//
|
||||
include "llvm/Target/Target.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Subtarget state
|
||||
//
|
||||
|
||||
def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
|
||||
"64-bit mode (x86_64)">;
|
||||
def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
|
||||
"32-bit mode (80386)">;
|
||||
def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
|
||||
"16-bit mode (i8086)">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Subtarget features
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
|
||||
"Enable X87 float instructions">;
|
||||
|
||||
def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true",
|
||||
"Enable NOPL instruction">;
|
||||
|
||||
def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
|
||||
"Enable conditional move instructions">;
|
||||
|
||||
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
|
||||
"Support POPCNT instruction">;
|
||||
|
||||
def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true",
|
||||
"Support fxsave/fxrestore instructions">;
|
||||
|
||||
def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true",
|
||||
"Support xsave instructions">;
|
||||
|
||||
def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
|
||||
"Support xsaveopt instructions">;
|
||||
|
||||
def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
|
||||
"Support xsavec instructions">;
|
||||
|
||||
def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
|
||||
"Support xsaves instructions">;
|
||||
|
||||
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
|
||||
"Enable SSE instructions",
|
||||
// SSE codegen depends on cmovs, and all
|
||||
// SSE1+ processors support them.
|
||||
[FeatureCMOV]>;
|
||||
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
|
||||
"Enable SSE2 instructions",
|
||||
[FeatureSSE1]>;
|
||||
def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
|
||||
"Enable SSE3 instructions",
|
||||
[FeatureSSE2]>;
|
||||
def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
|
||||
"Enable SSSE3 instructions",
|
||||
[FeatureSSE3]>;
|
||||
def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
|
||||
"Enable SSE 4.1 instructions",
|
||||
[FeatureSSSE3]>;
|
||||
def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
|
||||
"Enable SSE 4.2 instructions",
|
||||
[FeatureSSE41]>;
|
||||
// The MMX subtarget feature is separate from the rest of the SSE features
|
||||
// because it's important (for odd compatibility reasons) to be able to
|
||||
// turn it off explicitly while allowing SSE+ to be on.
|
||||
def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
|
||||
"Enable MMX instructions">;
|
||||
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
|
||||
"Enable 3DNow! instructions",
|
||||
[FeatureMMX]>;
|
||||
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
|
||||
"Enable 3DNow! Athlon instructions",
|
||||
[Feature3DNow]>;
|
||||
// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
|
||||
// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
|
||||
// without disabling 64-bit mode.
|
||||
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
|
||||
"Support 64-bit instructions",
|
||||
[FeatureCMOV]>;
|
||||
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
|
||||
"64-bit with cmpxchg16b",
|
||||
[Feature64Bit]>;
|
||||
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
|
||||
"SHLD instruction is slow">;
|
||||
def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
|
||||
"PMULLD instruction is slow">;
|
||||
// FIXME: This should not apply to CPUs that do not have SSE.
|
||||
def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
|
||||
"IsUAMem16Slow", "true",
|
||||
"Slow unaligned 16-byte memory access">;
|
||||
def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
|
||||
"IsUAMem32Slow", "true",
|
||||
"Slow unaligned 32-byte memory access">;
|
||||
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
|
||||
"Support SSE 4a instructions",
|
||||
[FeatureSSE3]>;
|
||||
|
||||
def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
|
||||
"Enable AVX instructions",
|
||||
[FeatureSSE42]>;
|
||||
def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
|
||||
"Enable AVX2 instructions",
|
||||
[FeatureAVX]>;
|
||||
def FeatureFMA : SubtargetFeature<"fma", "HasFMA", "true",
|
||||
"Enable three-operand fused multiple-add",
|
||||
[FeatureAVX]>;
|
||||
def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
|
||||
"Support 16-bit floating point conversion instructions",
|
||||
[FeatureAVX]>;
|
||||
def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
|
||||
"Enable AVX-512 instructions",
|
||||
[FeatureAVX2, FeatureFMA, FeatureF16C]>;
|
||||
def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
|
||||
"Enable AVX-512 Exponential and Reciprocal Instructions",
|
||||
[FeatureAVX512]>;
|
||||
def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
|
||||
"Enable AVX-512 Conflict Detection Instructions",
|
||||
[FeatureAVX512]>;
|
||||
def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
|
||||
"true", "Enable AVX-512 Population Count Instructions",
|
||||
[FeatureAVX512]>;
|
||||
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
|
||||
"Enable AVX-512 PreFetch Instructions",
|
||||
[FeatureAVX512]>;
|
||||
def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
|
||||
"true",
|
||||
"Prefetch with Intent to Write and T1 Hint">;
|
||||
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
|
||||
"Enable AVX-512 Doubleword and Quadword Instructions",
|
||||
[FeatureAVX512]>;
|
||||
def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
|
||||
"Enable AVX-512 Byte and Word Instructions",
|
||||
[FeatureAVX512]>;
|
||||
def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
|
||||
"Enable AVX-512 Vector Length eXtensions",
|
||||
[FeatureAVX512]>;
|
||||
def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
|
||||
"Enable AVX-512 Vector Byte Manipulation Instructions",
|
||||
[FeatureBWI]>;
|
||||
def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
|
||||
"Enable AVX-512 further Vector Byte Manipulation Instructions",
|
||||
[FeatureBWI]>;
|
||||
def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
|
||||
"Enable AVX-512 Integer Fused Multiple-Add",
|
||||
[FeatureAVX512]>;
|
||||
def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
|
||||
"Enable protection keys">;
|
||||
def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
|
||||
"Enable AVX-512 Vector Neural Network Instructions",
|
||||
[FeatureAVX512]>;
|
||||
def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
|
||||
"Enable AVX-512 Bit Algorithms",
|
||||
[FeatureBWI]>;
|
||||
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
|
||||
"Enable packed carry-less multiplication instructions",
|
||||
[FeatureSSE2]>;
|
||||
def FeatureGFNI : SubtargetFeature<"gfni", "HasGFNI", "true",
|
||||
"Enable Galois Field Arithmetic Instructions",
|
||||
[FeatureSSE2]>;
|
||||
def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
|
||||
"Enable vpclmulqdq instructions",
|
||||
[FeatureAVX, FeaturePCLMUL]>;
|
||||
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
|
||||
"Enable four-operand fused multiple-add",
|
||||
[FeatureAVX, FeatureSSE4A]>;
|
||||
def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
|
||||
"Enable XOP instructions",
|
||||
[FeatureFMA4]>;
|
||||
def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
|
||||
"HasSSEUnalignedMem", "true",
|
||||
"Allow unaligned memory operands with SSE instructions">;
|
||||
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
|
||||
"Enable AES instructions",
|
||||
[FeatureSSE2]>;
|
||||
def FeatureVAES : SubtargetFeature<"vaes", "HasVAES", "true",
|
||||
"Promote selected AES instructions to AVX512/AVX registers",
|
||||
[FeatureAVX, FeatureAES]>;
|
||||
def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true",
|
||||
"Enable TBM instructions">;
|
||||
def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true",
|
||||
"Enable LWP instructions">;
|
||||
def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
|
||||
"Support MOVBE instruction">;
|
||||
def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
|
||||
"Support RDRAND instruction">;
|
||||
def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
|
||||
"Support FS/GS Base instructions">;
|
||||
def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
|
||||
"Support LZCNT instruction">;
|
||||
def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true",
|
||||
"Support BMI instructions">;
|
||||
def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
|
||||
"Support BMI2 instructions">;
|
||||
def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
|
||||
"Support RTM instructions">;
|
||||
def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
|
||||
"Support ADX instructions">;
|
||||
def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
|
||||
"Enable SHA instructions",
|
||||
[FeatureSSE2]>;
|
||||
def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
|
||||
"Support CET Shadow-Stack instructions">;
|
||||
def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
|
||||
"Support PRFCHW instructions">;
|
||||
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
|
||||
"Support RDSEED instruction">;
|
||||
def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
|
||||
"Support LAHF and SAHF instructions">;
|
||||
def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
|
||||
"Enable MONITORX/MWAITX timer functionality">;
|
||||
def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
|
||||
"Enable Cache Line Zero">;
|
||||
def FeatureCLDEMOTE : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
|
||||
"Enable Cache Demote">;
|
||||
def FeaturePTWRITE : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
|
||||
"Support ptwrite instruction">;
|
||||
def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
|
||||
"Support MPX instructions">;
|
||||
def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
|
||||
"Use LEA for adjusting the stack pointer">;
|
||||
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
|
||||
"HasSlowDivide32", "true",
|
||||
"Use 8-bit divide for positive values less than 256">;
|
||||
def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
|
||||
"HasSlowDivide64", "true",
|
||||
"Use 32-bit divide for positive values less than 2^32">;
|
||||
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
|
||||
"PadShortFunctions", "true",
|
||||
"Pad short functions">;
|
||||
def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
|
||||
"Invalidate Process-Context Identifier">;
|
||||
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
|
||||
"Enable Software Guard Extensions">;
|
||||
def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
|
||||
"Flush A Cache Line Optimized">;
|
||||
def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
|
||||
"Cache Line Write Back">;
|
||||
def FeatureWBNOINVD : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
|
||||
"Write Back No Invalidate">;
|
||||
def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
|
||||
"Support RDPID instructions">;
|
||||
def FeatureWAITPKG : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
|
||||
"Wait and pause enhancements">;
|
||||
// On some processors, instructions that implicitly take two memory operands are
|
||||
// slow. In practice, this means that CALL, PUSH, and POP with memory operands
|
||||
// should be avoided in favor of a MOV + register CALL/PUSH/POP.
|
||||
def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
|
||||
"SlowTwoMemOps", "true",
|
||||
"Two memory operand instructions are slow">;
|
||||
def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
|
||||
"LEA instruction needs inputs at AG stage">;
|
||||
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
|
||||
"LEA instruction with certain arguments is slow">;
|
||||
def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
|
||||
"LEA instruction with 3 ops or certain registers is slow">;
|
||||
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
|
||||
"INC and DEC instructions are slower than ADD and SUB">;
|
||||
def FeatureSoftFloat
|
||||
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
|
||||
"Use software floating point features.">;
|
||||
def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
|
||||
"HasPOPCNTFalseDeps", "true",
|
||||
"POPCNT has a false dependency on dest register">;
|
||||
def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
|
||||
"HasLZCNTFalseDeps", "true",
|
||||
"LZCNT/TZCNT have a false dependency on dest register">;
|
||||
def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
|
||||
"platform configuration instruction">;
|
||||
// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
|
||||
// using a variable mask over multiple fixed shuffles.
|
||||
def FeatureFastVariableShuffle
|
||||
: SubtargetFeature<"fast-variable-shuffle",
|
||||
"HasFastVariableShuffle",
|
||||
"true", "Shuffles with variable masks are fast">;
|
||||
// On some X86 processors, there is no performance hazard to writing only the
|
||||
// lower parts of a YMM or ZMM register without clearing the upper part.
|
||||
def FeatureFastPartialYMMorZMMWrite
|
||||
: SubtargetFeature<"fast-partial-ymm-or-zmm-write",
|
||||
"HasFastPartialYMMorZMMWrite",
|
||||
"true", "Partial writes to YMM/ZMM registers are fast">;
|
||||
// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
|
||||
// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
|
||||
// vector FSQRT has higher throughput than the corresponding NR code.
|
||||
// The idea is that throughput bound code is likely to be vectorized, so for
|
||||
// vectorized code we should care about the throughput of SQRT operations.
|
||||
// But if the code is scalar that probably means that the code has some kind of
|
||||
// dependency and we should care more about reducing the latency.
|
||||
def FeatureFastScalarFSQRT
|
||||
: SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
|
||||
"true", "Scalar SQRT is fast (disable Newton-Raphson)">;
|
||||
def FeatureFastVectorFSQRT
|
||||
: SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
|
||||
"true", "Vector SQRT is fast (disable Newton-Raphson)">;
|
||||
// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
|
||||
// be used to replace test/set sequences.
|
||||
def FeatureFastLZCNT
|
||||
: SubtargetFeature<
|
||||
"fast-lzcnt", "HasFastLZCNT", "true",
|
||||
"LZCNT instructions are as fast as most simple integer ops">;
|
||||
// If the target can efficiently decode NOPs upto 11-bytes in length.
|
||||
def FeatureFast11ByteNOP
|
||||
: SubtargetFeature<
|
||||
"fast-11bytenop", "HasFast11ByteNOP", "true",
|
||||
"Target can quickly decode up to 11 byte NOPs">;
|
||||
// If the target can efficiently decode NOPs upto 15-bytes in length.
|
||||
def FeatureFast15ByteNOP
|
||||
: SubtargetFeature<
|
||||
"fast-15bytenop", "HasFast15ByteNOP", "true",
|
||||
"Target can quickly decode up to 15 byte NOPs">;
|
||||
// Sandy Bridge and newer processors can use SHLD with the same source on both
|
||||
// inputs to implement rotate to avoid the partial flag update of the normal
|
||||
// rotate instructions.
|
||||
def FeatureFastSHLDRotate
|
||||
: SubtargetFeature<
|
||||
"fast-shld-rotate", "HasFastSHLDRotate", "true",
|
||||
"SHLD can be used as a faster rotate">;
|
||||
|
||||
// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
|
||||
// "string operations"). See "REP String Enhancement" in the Intel Software
|
||||
// Development Manual. This feature essentially means that REP MOVSB will copy
|
||||
// using the largest available size instead of copying bytes one by one, making
|
||||
// it at least as fast as REPMOVS{W,D,Q}.
|
||||
def FeatureERMSB
|
||||
: SubtargetFeature<
|
||||
"ermsb", "HasERMSB", "true",
|
||||
"REP MOVS/STOS are fast">;
|
||||
|
||||
// Sandy Bridge and newer processors have many instructions that can be
|
||||
// fused with conditional branches and pass through the CPU as a single
|
||||
// operation.
|
||||
def FeatureMacroFusion
|
||||
: SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
|
||||
"Various instructions can be fused with conditional branches">;
|
||||
|
||||
// Gather is available since Haswell (AVX2 set). So technically, we can
|
||||
// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
|
||||
// Skylake Client processor has faster Gathers than HSW and performance is
|
||||
// similar to Skylake Server (AVX-512).
|
||||
def FeatureHasFastGather
|
||||
: SubtargetFeature<"fast-gather", "HasFastGather", "true",
|
||||
"Indicates if gather is reasonably fast.">;
|
||||
|
||||
def FeaturePrefer256Bit
|
||||
: SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
|
||||
"Prefer 256-bit AVX instructions">;
|
||||
|
||||
// Enable mitigation of some aspects of speculative execution related
|
||||
// vulnerabilities by removing speculatable indirect branches. This disables
|
||||
// jump-table formation, rewrites explicit `indirectbr` instructions into
|
||||
// `switch` instructions, and uses a special construct called a "retpoline" to
|
||||
// prevent speculation of the remaining indirect branches (indirect calls and
|
||||
// tail calls).
|
||||
def FeatureRetpoline
|
||||
: SubtargetFeature<"retpoline", "UseRetpoline", "true",
|
||||
"Remove speculation of indirect branches from the "
|
||||
"generated code, either by avoiding them entirely or "
|
||||
"lowering them with a speculation blocking construct.">;
|
||||
|
||||
// Rely on external thunks for the emitted retpoline calls. This allows users
|
||||
// to provide their own custom thunk definitions in highly specialized
|
||||
// environments such as a kernel that does boot-time hot patching.
|
||||
def FeatureRetpolineExternalThunk
|
||||
: SubtargetFeature<
|
||||
"retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
|
||||
"Enable retpoline, but with an externally provided thunk.",
|
||||
[FeatureRetpoline]>;
|
||||
|
||||
// Direct Move instructions.
|
||||
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
|
||||
"Support movdiri instruction">;
|
||||
def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
|
||||
"Support movdir64b instruction">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register File Description
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "X86RegisterInfo.td"
|
||||
include "X86RegisterBanks.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction Descriptions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "X86Schedule.td"
|
||||
include "X86InstrInfo_reduce.td"
|
||||
|
||||
def X86InstrInfo : InstrInfo;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Assembly Parser
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def ATTAsmParserVariant : AsmParserVariant {
|
||||
int Variant = 0;
|
||||
|
||||
// Variant name.
|
||||
string Name = "att";
|
||||
|
||||
// Discard comments in assembly strings.
|
||||
string CommentDelimiter = "#";
|
||||
|
||||
// Recognize hard coded registers.
|
||||
string RegisterPrefix = "%";
|
||||
}
|
||||
|
||||
def IntelAsmParserVariant : AsmParserVariant {
|
||||
int Variant = 1;
|
||||
|
||||
// Variant name.
|
||||
string Name = "intel";
|
||||
|
||||
// Discard comments in assembly strings.
|
||||
string CommentDelimiter = ";";
|
||||
|
||||
// Recognize hard coded registers.
|
||||
string RegisterPrefix = "";
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Assembly Printers
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// The X86 target supports two different syntaxes for emitting machine code.
|
||||
// This is controlled by the -x86-asm-syntax={att|intel}
|
||||
def ATTAsmWriter : AsmWriter {
|
||||
string AsmWriterClassName = "ATTInstPrinter";
|
||||
int Variant = 0;
|
||||
}
|
||||
def IntelAsmWriter : AsmWriter {
|
||||
string AsmWriterClassName = "IntelInstPrinter";
|
||||
int Variant = 1;
|
||||
}
|
||||
|
||||
def X86 : Target {
|
||||
// Information about the instructions...
|
||||
let InstructionSet = X86InstrInfo;
|
||||
let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
|
||||
let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
|
||||
let AllowRegisterRenaming = 1;
|
||||
}
|
||||
Reference in New Issue
Block a user