Add missing thirdparty files

2025-09-14 07:16:36 +00:00 · 2024-09-08 17:16:32 +06:00
parent 458577aaee
commit 13ec7258e1
488 changed files with 1066961 additions and 1 deletions
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86CallingConv.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86CallingConv.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86Capstone.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86Capstone.td
@@ -0,0 +1,7 @@
+// Capstone definitions fix for X86 LLVM instructions.
+
+let Defs = [EFLAGS] in
+  def INT1 : I<0xf1, RawFrm, (outs), (ins), "int1", []>;
+
+// def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", [], IIC_FNCLEX>;
+def FSETPM : I<0xDB, MRM_E4, (outs), (ins), "fsetpm", []>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86Instr3DNow.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86Instr3DNow.td
@@ -0,0 +1,111 @@
+//===-- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the 3DNow! instruction set, which extends MMX to support
+// floating point and also adds a few more random instructions for good measure.
+//
+//===----------------------------------------------------------------------===//
+
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
+      : I<o, F, outs, ins, asm, pat>, Requires<[Has3DNow]> {
+}
+
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+      : I3DNow<o, F, (outs VR64:$dst), ins,
+          !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>, ThreeDNow {
+  let Constraints = "$src1 = $dst";
+}
+
+class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+      : I3DNow<o, F, (outs VR64:$dst), ins,
+          !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>, ThreeDNow;
+
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn,
+                               X86FoldableSchedWrite sched, bit Commutable = 0,
+                               string Ver = ""> {
+  let isCommutable = Commutable in
+  def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>,
+      Sched<[sched]>;
+  def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
+        (bitconvert (load_mmx addr:$src2))))]>,
+        Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn,
+                              X86FoldableSchedWrite sched, string Ver = ""> {
+  def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>,
+      Sched<[sched]>;
+  def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn))
+        (bitconvert (load_mmx addr:$src))))]>,
+        Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+defm PAVGUSB  : I3DNow_binop_rm_int<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>;
+defm PF2ID    : I3DNow_conv_rm_int<0x1D, "pf2id", WriteCvtPS2I>;
+defm PFACC    : I3DNow_binop_rm_int<0xAE, "pfacc", WriteFAdd>;
+defm PFADD    : I3DNow_binop_rm_int<0x9E, "pfadd", WriteFAdd, 1>;
+defm PFCMPEQ  : I3DNow_binop_rm_int<0xB0, "pfcmpeq", WriteFAdd, 1>;
+defm PFCMPGE  : I3DNow_binop_rm_int<0x90, "pfcmpge", WriteFAdd>;
+defm PFCMPGT  : I3DNow_binop_rm_int<0xA0, "pfcmpgt", WriteFAdd>;
+defm PFMAX    : I3DNow_binop_rm_int<0xA4, "pfmax", WriteFAdd>;
+defm PFMIN    : I3DNow_binop_rm_int<0x94, "pfmin", WriteFAdd>;
+defm PFMUL    : I3DNow_binop_rm_int<0xB4, "pfmul", WriteFAdd, 1>;
+defm PFRCP    : I3DNow_conv_rm_int<0x96, "pfrcp", WriteFAdd>;
+defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1", WriteFAdd>;
+defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2", WriteFAdd>;
+defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1", WriteFAdd>;
+defm PFRSQRT  : I3DNow_conv_rm_int<0x97, "pfrsqrt", WriteFAdd>;
+defm PFSUB    : I3DNow_binop_rm_int<0x9A, "pfsub", WriteFAdd, 1>;
+defm PFSUBR   : I3DNow_binop_rm_int<0xAA, "pfsubr", WriteFAdd, 1>;
+defm PI2FD    : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2PS>;
+defm PMULHRW  : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>;
+
+let SchedRW = [WriteEMMS] in
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
+                   [(int_x86_mmx_femms)]>, TB;
+
+// PREFETCHWT1 is supported we want to use it for everything but T0.
+def PrefetchWLevel : PatFrag<(ops), (i32 imm), [{
+  return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1();
+}]>;
+
+// Use PREFETCHWT1 for NTA, T2, T1.
+def PrefetchWT1Level : ImmLeaf<i32, [{
+  return Imm < 3;
+}]>;
+
+let SchedRW = [WriteLoad] in {
+let Predicates = [Has3DNow, NoSSEPrefetch] in
+def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
+                      "prefetch\t$addr",
+                      [(prefetch addr:$addr, imm, imm, (i32 1))]>, TB;
+
+def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
+                  [(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))]>,
+                  TB, Requires<[HasPrefetchW]>;
+
+def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr",
+                    [(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))]>,
+                    TB, Requires<[HasPREFETCHWT1]>;
+}
+
+// "3DNowA" instructions
+defm PF2IW    : I3DNow_conv_rm_int<0x1C, "pf2iw", WriteCvtPS2I, "a">;
+defm PI2FW    : I3DNow_conv_rm_int<0x0C, "pi2fw", WriteCvtI2PS, "a">;
+defm PFNACC   : I3DNow_binop_rm_int<0x8A, "pfnacc", WriteFAdd, 0, "a">;
+defm PFPNACC  : I3DNow_binop_rm_int<0x8E, "pfpnacc", WriteFAdd, 0, "a">;
+defm PSWAPD   : I3DNow_conv_rm_int<0xBB, "pswapd", SchedWriteShuffle.MMX, "a">;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrAVX512.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrAVX512.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrArithmetic.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrArithmetic.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrCMovSetCC.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrCMovSetCC.td
@@ -0,0 +1,116 @@
+//===-- X86InstrCMovSetCC.td - Conditional Move and SetCC --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 conditional move and set on condition
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+
+// CMOV instructions.
+multiclass CMOV<bits<8> opc, string Mnemonic, X86FoldableSchedWrite Sched,
+                PatLeaf CondNode> {
+  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+      isCommutable = 1, SchedRW = [Sched] in {
+    def NAME#16rr
+      : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+          !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR16:$dst,
+                (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,
+                TB, OpSize16;
+    def NAME#32rr
+      : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+          !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR32:$dst,
+                (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>,
+                TB, OpSize32;
+    def NAME#64rr
+      :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+          !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR64:$dst,
+                (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
+  }
+
+  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+      SchedRW = [Sched.Folded, ReadAfterLd] in {
+    def NAME#16rm
+      : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+          !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB, OpSize16;
+    def NAME#32rm
+      : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+          !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB, OpSize32;
+    def NAME#64rm
+      :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+          !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB;
+  } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
+} // end multiclass
+
+
+// Conditional Moves.
+defm CMOVO  : CMOV<0x40, "cmovo" , WriteCMOV,  X86_COND_O>;
+defm CMOVNO : CMOV<0x41, "cmovno", WriteCMOV,  X86_COND_NO>;
+defm CMOVB  : CMOV<0x42, "cmovb" , WriteCMOV,  X86_COND_B>;
+defm CMOVAE : CMOV<0x43, "cmovae", WriteCMOV,  X86_COND_AE>;
+defm CMOVE  : CMOV<0x44, "cmove" , WriteCMOV,  X86_COND_E>;
+defm CMOVNE : CMOV<0x45, "cmovne", WriteCMOV,  X86_COND_NE>;
+defm CMOVBE : CMOV<0x46, "cmovbe", WriteCMOV2, X86_COND_BE>;
+defm CMOVA  : CMOV<0x47, "cmova" , WriteCMOV2, X86_COND_A>;
+defm CMOVS  : CMOV<0x48, "cmovs" , WriteCMOV,  X86_COND_S>;
+defm CMOVNS : CMOV<0x49, "cmovns", WriteCMOV,  X86_COND_NS>;
+defm CMOVP  : CMOV<0x4A, "cmovp" , WriteCMOV,  X86_COND_P>;
+defm CMOVNP : CMOV<0x4B, "cmovnp", WriteCMOV,  X86_COND_NP>;
+defm CMOVL  : CMOV<0x4C, "cmovl" , WriteCMOV,  X86_COND_L>;
+defm CMOVGE : CMOV<0x4D, "cmovge", WriteCMOV,  X86_COND_GE>;
+defm CMOVLE : CMOV<0x4E, "cmovle", WriteCMOV,  X86_COND_LE>;
+defm CMOVG  : CMOV<0x4F, "cmovg" , WriteCMOV,  X86_COND_G>;
+
+
+// SetCC instructions.
+multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
+  let Uses = [EFLAGS] in {
+    def r    : I<opc, MRMXr,  (outs GR8:$dst), (ins),
+                     !strconcat(Mnemonic, "\t$dst"),
+                     [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>,
+                     TB, Sched<[WriteSETCC]>;
+    def m    : I<opc, MRMXm,  (outs), (ins i8mem:$dst),
+                     !strconcat(Mnemonic, "\t$dst"),
+                     [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>,
+                     TB, Sched<[WriteSETCCStore]>;
+  } // Uses = [EFLAGS]
+}
+
+defm SETO  : SETCC<0x90, "seto",  X86_COND_O>;   // is overflow bit set
+defm SETNO : SETCC<0x91, "setno", X86_COND_NO>;  // is overflow bit not set
+defm SETB  : SETCC<0x92, "setb",  X86_COND_B>;   // unsigned less than
+defm SETAE : SETCC<0x93, "setae", X86_COND_AE>;  // unsigned greater or equal
+defm SETE  : SETCC<0x94, "sete",  X86_COND_E>;   // equal to
+defm SETNE : SETCC<0x95, "setne", X86_COND_NE>;  // not equal to
+defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>;  // unsigned less than or equal
+defm SETA  : SETCC<0x97, "seta",  X86_COND_A>;   // unsigned greater than
+defm SETS  : SETCC<0x98, "sets",  X86_COND_S>;   // is signed bit set
+defm SETNS : SETCC<0x99, "setns", X86_COND_NS>;  // is not signed
+defm SETP  : SETCC<0x9A, "setp",  X86_COND_P>;   // is parity bit set
+defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>;  // is parity bit not set
+defm SETL  : SETCC<0x9C, "setl",  X86_COND_L>;   // signed less than
+defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>;  // signed greater or equal
+defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>;  // signed less than or equal
+defm SETG  : SETCC<0x9F, "setg",  X86_COND_G>;   // signed greater than
+
+// SALC is an undocumented instruction. Information for this instruction can be found
+// here http://www.rcollins.org/secrets/opcodes/SALC.html
+// Set AL if carry. 
+let Uses = [EFLAGS], Defs = [AL], SchedRW = [WriteALU] in {
+  def SALC : I<0xD6, RawFrm, (outs), (ins), "salc", []>, Requires<[Not64BitMode]>;
+}
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrCompiler.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrCompiler.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrControl.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrControl.td
@@ -0,0 +1,413 @@
+//===-- X86InstrControl.td - Control Flow Instructions -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 jump, return, call, and related instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions.
+//
+
+// Return instructions.
+//
+// The X86retflag return instructions are variadic because we may add ST0 and
+// ST1 arguments when returning values on the x87 stack.
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
+  def RETL   : I   <0xC3, RawFrm, (outs), (ins variable_ops),
+                    "ret{l}", []>, OpSize32, Requires<[Not64BitMode]>;
+  def RETQ   : I   <0xC3, RawFrm, (outs), (ins variable_ops),
+                    "ret{q}", []>, OpSize32, Requires<[In64BitMode]>;
+  def RETW   : I   <0xC3, RawFrm, (outs), (ins),
+                    "ret{w}", []>, OpSize16;
+  def RETIL  : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+                    "ret{l}\t$amt", []>, OpSize32, Requires<[Not64BitMode]>;
+  def RETIQ  : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+                    "ret{q}\t$amt", []>, OpSize32, Requires<[In64BitMode]>;
+  def RETIW  : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
+                    "ret{w}\t$amt", []>, OpSize16;
+  def LRETL  : I   <0xCB, RawFrm, (outs), (ins),
+                    "{l}ret{l|f}", []>, OpSize32;
+  def LRETQ  : RI  <0xCB, RawFrm, (outs), (ins),
+                    "{l}ret{|f}q", []>, Requires<[In64BitMode]>;
+  def LRETW  : I   <0xCB, RawFrm, (outs), (ins),
+                    "{l}ret{w|f}", []>, OpSize16;
+  def LRETIL : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+                    "{l}ret{l|f}\t$amt", []>, OpSize32;
+  def LRETIQ : RIi16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+                    "{l}ret{|f}q\t$amt", []>, Requires<[In64BitMode]>;
+  def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+                    "{l}ret{w|f}\t$amt", []>, OpSize16;
+
+  // The machine return from interrupt instruction, but sometimes we need to
+  // perform a post-epilogue stack adjustment. Codegen emits the pseudo form
+  // which expands to include an SP adjustment if necessary.
+  def IRET16 : I   <0xcf, RawFrm, (outs), (ins), "iret{w}", []>,
+               OpSize16;
+  def IRET32 : I   <0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>, OpSize32;
+  def IRET64 : RI  <0xcf, RawFrm, (outs), (ins), "iretq", []>, Requires<[In64BitMode]>;
+  // let isCodeGenOnly = 1 in
+  // def IRET : PseudoI<(outs), (ins i32imm:$adj), [(X86iret timm:$adj)]>;
+  // def RET  : PseudoI<(outs), (ins i32imm:$adj, variable_ops), [(X86retflag timm:$adj)]>;
+}
+
+// Unconditional branches.
+let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
+  def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+                       "jmp\t$dst", [(br bb:$dst)]>;
+  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
+    def JMP_2 : Ii16PCRel<0xE9, RawFrm, (outs), (ins brtarget16:$dst),
+                          "jmp\t$dst", []>, OpSize16;
+    def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget32:$dst),
+                          "jmp\t$dst", []>, OpSize32;
+  }
+}
+
+// Conditional Branches.
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
+  multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+    def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm,
+                       [(X86brcond bb:$dst, Cond, EFLAGS)]>;
+    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
+      def _2 : Ii16PCRel<opc4, RawFrm, (outs), (ins brtarget16:$dst), asm,
+                         []>, OpSize16, TB;
+      def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget32:$dst), asm,
+                         []>, TB, OpSize32;
+    }
+  }
+}
+
+defm JO  : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst", X86_COND_NO>;
+defm JB  : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
+defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
+defm JE  : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
+defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
+defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
+defm JA  : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
+defm JS  : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
+defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
+defm JP  : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
+defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
+defm JL  : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
+defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
+defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
+defm JG  : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+
+// jcx/jecx/jrcx instructions.
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in {
+  // These are the 32-bit versions of this instruction for the asmparser.  In
+  // 32-bit mode, the address size prefix is jcxz and the unprefixed version is
+  // jecxz.
+  let Uses = [CX] in
+    def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                        "jcxz\t$dst", []>, AdSize16, Requires<[Not64BitMode]>;
+  let Uses = [ECX] in
+    def JECXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                        "jecxz\t$dst", []>, AdSize32;
+
+  let Uses = [RCX] in
+    def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                         "jrcxz\t$dst", []>, AdSize64, Requires<[In64BitMode]>;
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def JMP16r     : I<0xFF, MRM4r, (outs), (ins GR16:$dst), "jmp{w}\t{*}$dst",
+                     [(brind GR16:$dst)]>, Requires<[Not64BitMode]>,
+                     OpSize16, Sched<[WriteJump]>;
+  def JMP16m     : I<0xFF, MRM4m, (outs), (ins i16mem:$dst), "jmp{w}\t{*}$dst",
+                     [(brind (loadi16 addr:$dst))]>, Requires<[Not64BitMode]>,
+                     OpSize16, Sched<[WriteJumpLd]>;
+
+  def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
+                     [(brind GR32:$dst)]>, Requires<[Not64BitMode]>,
+                     OpSize32, Sched<[WriteJump]>;
+  def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
+                     [(brind (loadi32 addr:$dst))]>, Requires<[Not64BitMode]>,
+                     OpSize32, Sched<[WriteJumpLd]>;
+
+  def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
+                     [(brind GR64:$dst)]>, Requires<[In64BitMode]>,
+                     Sched<[WriteJump]>;
+  def JMP64m     : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
+                     [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>,
+                     Sched<[WriteJumpLd]>;
+
+  // Non-tracking jumps for IBT, use with caution.
+  let isCodeGenOnly = 1 in {
+    def JMP16r_NT : I<0xFF, MRM4r, (outs), (ins GR16 : $dst), "jmp{w}\t{*}$dst",
+                      [(X86NoTrackBrind GR16 : $dst)]>, Requires<[Not64BitMode]>,
+                      OpSize16, Sched<[WriteJump]>, NOTRACK;
+
+    def JMP16m_NT : I<0xFF, MRM4m, (outs), (ins i16mem : $dst), "jmp{w}\t{*}$dst",
+                      [(X86NoTrackBrind (loadi16 addr : $dst))]>,
+                      Requires<[Not64BitMode]>, OpSize16, Sched<[WriteJumpLd]>,
+                      NOTRACK;
+
+    def JMP32r_NT : I<0xFF, MRM4r, (outs), (ins GR32 : $dst), "jmp{l}\t{*}$dst",
+                      [(X86NoTrackBrind GR32 : $dst)]>, Requires<[Not64BitMode]>,
+                      OpSize32, Sched<[WriteJump]>, NOTRACK;
+    def JMP32m_NT : I<0xFF, MRM4m, (outs), (ins i32mem : $dst), "jmp{l}\t{*}$dst",
+                      [(X86NoTrackBrind (loadi32 addr : $dst))]>,
+                      Requires<[Not64BitMode]>, OpSize32, Sched<[WriteJumpLd]>,
+                      NOTRACK;
+
+    def JMP64r_NT : I<0xFF, MRM4r, (outs), (ins GR64 : $dst), "jmp{q}\t{*}$dst",
+                      [(X86NoTrackBrind GR64 : $dst)]>, Requires<[In64BitMode]>,
+                      Sched<[WriteJump]>, NOTRACK;
+    def JMP64m_NT : I<0xFF, MRM4m, (outs), (ins i64mem : $dst), "jmp{q}\t{*}$dst",
+                      [(X86NoTrackBrind(loadi64 addr : $dst))]>,
+                      Requires<[In64BitMode]>, Sched<[WriteJumpLd]>, NOTRACK;
+  }
+
+  let Predicates = [Not64BitMode], AsmVariantName = "att" in {
+    def FARJMP16i  : Iseg16<0xEA, RawFrmImm16, (outs),
+                            (ins i16imm:$off, i16imm:$seg),
+                            "ljmp{w}\t$seg : $off", []>,
+                            OpSize16, Sched<[WriteJump]>;
+    def FARJMP32i  : Iseg32<0xEA, RawFrmImm16, (outs),
+                            (ins i32imm:$off, i16imm:$seg),
+                            "ljmp{l}\t$seg : $off", []>,
+                            OpSize32, Sched<[WriteJump]>;
+  }
+  def FARJMP64   : RI<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
+                      "ljmp{q}\t{*}$dst", []>, Sched<[WriteJump]>, Requires<[In64BitMode]>;
+
+  let AsmVariantName = "att" in
+  def FARJMP16m  : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
+                     "ljmp{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
+  def FARJMP32m  : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
+                     "{l}jmp{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
+}
+
+// Loop instructions
+let SchedRW = [WriteJump] in {
+def LOOP   : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE  : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+}
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. ESP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Uses = [ESP, SSP] in {
+    def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
+                           (outs), (ins i32imm_pcrel:$dst),
+                           "call{l}\t$dst", []>, OpSize32,
+                      Requires<[Not64BitMode]>, Sched<[WriteJump]>;
+    let hasSideEffects = 0 in
+      def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
+                             (outs), (ins i16imm_pcrel:$dst),
+                             "call{w}\t$dst", []>, OpSize16,
+                        Sched<[WriteJump]>;
+    def CALL16r     : I<0xFF, MRM2r, (outs), (ins GR16:$dst),
+                        "call{w}\t{*}$dst", [(X86call GR16:$dst)]>,
+                      OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>;
+    def CALL16m     : I<0xFF, MRM2m, (outs), (ins i16mem:$dst),
+                        "call{w}\t{*}$dst", [(X86call (loadi16 addr:$dst))]>,
+                        OpSize16, Requires<[Not64BitMode,FavorMemIndirectCall]>,
+                        Sched<[WriteJumpLd]>;
+    def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
+                        "call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
+                        Requires<[Not64BitMode,NotUseRetpoline]>, Sched<[WriteJump]>;
+    def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
+                        "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
+                        OpSize32,
+                        Requires<[Not64BitMode,FavorMemIndirectCall,NotUseRetpoline]>,
+                        Sched<[WriteJumpLd]>;
+
+    // Non-tracking calls for IBT, use with caution.
+    let isCodeGenOnly = 1 in {
+      def CALL16r_NT : I<0xFF, MRM2r, (outs), (ins GR16 : $dst),
+                        "call{w}\t{*}$dst",[(X86NoTrackCall GR16 : $dst)]>,
+                        OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>, NOTRACK;
+      def CALL16m_NT : I<0xFF, MRM2m, (outs), (ins i16mem : $dst),
+                        "call{w}\t{*}$dst",[(X86NoTrackCall(loadi16 addr : $dst))]>,
+                        OpSize16, Requires<[Not64BitMode,FavorMemIndirectCall]>,
+                        Sched<[WriteJumpLd]>, NOTRACK;
+      def CALL32r_NT : I<0xFF, MRM2r, (outs), (ins GR32 : $dst),
+                        "call{l}\t{*}$dst",[(X86NoTrackCall GR32 : $dst)]>,
+                        OpSize32, Requires<[Not64BitMode]>, Sched<[WriteJump]>, NOTRACK;
+      def CALL32m_NT : I<0xFF, MRM2m, (outs), (ins i32mem : $dst),
+                        "call{l}\t{*}$dst",[(X86NoTrackCall(loadi32 addr : $dst))]>,
+                        OpSize32, Requires<[Not64BitMode,FavorMemIndirectCall]>,
+                        Sched<[WriteJumpLd]>, NOTRACK;
+    }
+
+    let Predicates = [Not64BitMode], AsmVariantName = "att" in {
+      def FARCALL16i  : Iseg16<0x9A, RawFrmImm16, (outs),
+                               (ins i16imm:$off, i16imm:$seg),
+                               "lcall{w}\t$seg : $off", []>,
+                               OpSize16, Sched<[WriteJump]>;
+      def FARCALL32i  : Iseg32<0x9A, RawFrmImm16, (outs),
+                               (ins i32imm:$off, i16imm:$seg),
+                               "lcall{l}\t$seg : $off", []>,
+                               OpSize32, Sched<[WriteJump]>;
+    }
+
+    def FARCALL16m  : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
+                        "lcall{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
+    def FARCALL32m  : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
+                        "{l}call{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
+  }
+
+
+/*
+// Tail call stuff.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+  let Uses = [ESP, SSP] in {
+  def TCRETURNdi : PseudoI<(outs),
+                     (ins i32imm_pcrel:$dst, i32imm:$offset), []>, NotMemoryFoldable;
+  def TCRETURNri : PseudoI<(outs),
+                     (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable;
+  let mayLoad = 1 in
+  def TCRETURNmi : PseudoI<(outs),
+                     (ins i32mem_TC:$dst, i32imm:$offset), []>;
+
+  // FIXME: The should be pseudo instructions that are lowered when going to
+  // mcinst.
+  def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
+                           (ins i32imm_pcrel:$dst), "jmp\t$dst", []>;
+
+  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
+                   "", []>;  // FIXME: Remove encoding when JIT is dead.
+  let mayLoad = 1 in
+  def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst),
+                   "jmp{l}\t{*}$dst", []>;
+}
+
+// Conditional tail calls are similar to the above, but they are branches
+// rather than barriers, and they use EFLAGS.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
+    isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+  let Uses = [ESP, EFLAGS, SSP] in {
+  def TCRETURNdicc : PseudoI<(outs),
+                     (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
+
+  // This gets substituted to a conditional jump instruction in MC lowering.
+  def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
+                           (ins i32imm_pcrel:$dst, i32imm:$cond), "", []>;
+}
+*/
+
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+
+// RSP is marked as a use to prevent stack-pointer assignments that appear
+// immediately before calls from potentially appearing dead. Uses for argument
+// registers are added manually.
+let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
+  // NOTE: this pattern doesn't match "X86call imm", because we do not know
+  // that the offset between an arbitrary immediate and the call will fit in
+  // the 32-bit pcrel field that we have.
+  def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+                        (outs), (ins i64i32imm_pcrel:$dst),
+                        "call{q}\t$dst", []>, OpSize32,
+                      Requires<[In64BitMode]>;
+  def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
+                        "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
+                      Requires<[In64BitMode,NotUseRetpoline]>;
+  def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
+                        "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
+                      Requires<[In64BitMode,FavorMemIndirectCall,
+                                NotUseRetpoline]>;
+
+  // Non-tracking calls for IBT, use with caution.
+  let isCodeGenOnly = 1 in {
+    def CALL64r_NT : I<0xFF, MRM2r, (outs), (ins GR64 : $dst),
+                      "call{q}\t{*}$dst",[(X86NoTrackCall GR64 : $dst)]>,
+                      Requires<[In64BitMode]>, NOTRACK;
+    def CALL64m_NT : I<0xFF, MRM2m, (outs), (ins i64mem : $dst),
+                       "call{q}\t{*}$dst",
+                       [(X86NoTrackCall(loadi64 addr : $dst))]>,
+                       Requires<[In64BitMode,FavorMemIndirectCall]>, NOTRACK;
+  }
+
+  def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
+                       "lcall{q}\t{*}$dst", []>;
+}
+
+/*
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
+  def TCRETURNdi64   : PseudoI<(outs),
+                        (ins i64i32imm_pcrel:$dst, i32imm:$offset),
+                        []>;
+  def TCRETURNri64   : PseudoI<(outs),
+                        (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable;
+  let mayLoad = 1 in
+  def TCRETURNmi64   : PseudoI<(outs),
+                        (ins i64mem_TC:$dst, i32imm:$offset), []>, NotMemoryFoldable;
+
+  def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst),
+                   "jmp\t$dst", []>;
+
+  def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
+                     "jmp{q}\t{*}$dst", []>;
+
+  let mayLoad = 1 in
+  def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
+                     "jmp{q}\t{*}$dst", []>;
+
+  // Win64 wants indirect jumps leaving the function to have a REX_W prefix.
+  let hasREX_WPrefix = 1 in {
+    def TAILJMPr64_REX : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
+                           "rex64 jmp{q}\t{*}$dst", []>;
+
+    let mayLoad = 1 in
+    def TAILJMPm64_REX : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
+                           "rex64 jmp{q}\t{*}$dst", []>;
+  }
+}
+
+let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
+    Uses = [RSP, SSP],
+    usesCustomInserter = 1,
+    SchedRW = [WriteJump] in {
+  def RETPOLINE_CALL32 :
+    PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
+            Requires<[Not64BitMode,UseRetpoline]>;
+
+  def RETPOLINE_CALL64 :
+    PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
+            Requires<[In64BitMode,UseRetpoline]>;
+
+  // Retpoline variant of indirect tail calls.
+  let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+    def RETPOLINE_TCRETURN64 :
+      PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>;
+    def RETPOLINE_TCRETURN32 :
+      PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;
+  }
+}
+
+// Conditional tail calls are similar to the above, but they are branches
+// rather than barriers, and they use EFLAGS.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
+    isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+  let Uses = [RSP, EFLAGS, SSP] in {
+  def TCRETURNdi64cc : PseudoI<(outs),
+                           (ins i64i32imm_pcrel:$dst, i32imm:$offset,
+                            i32imm:$cond), []>;
+
+  // This gets substituted to a conditional jump instruction in MC lowering.
+  def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
+                           (ins i64i32imm_pcrel:$dst, i32imm:$cond), "", []>;
+}
+*/
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrExtension.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrExtension.td
@@ -0,0 +1,204 @@
+//===-- X86InstrExtension.td - Sign and Zero Extensions ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the sign and zero extension operations.
+//
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0 in {
+  let Defs = [AX], Uses = [AL] in // AX = signext(AL)
+  def CBW : I<0x98, RawFrm, (outs), (ins),
+              "{cbtw|cbw}", []>, OpSize16, Sched<[WriteALU]>;
+  let Defs = [EAX], Uses = [AX] in // EAX = signext(AX)
+  def CWDE : I<0x98, RawFrm, (outs), (ins),
+              "{cwtl|cwde}", []>, OpSize32, Sched<[WriteALU]>;
+
+  let Defs = [AX,DX], Uses = [AX] in // DX:AX = signext(AX)
+  def CWD : I<0x99, RawFrm, (outs), (ins),
+              "{cwtd|cwd}", []>, OpSize16, Sched<[WriteALU]>;
+  let Defs = [EAX,EDX], Uses = [EAX] in // EDX:EAX = signext(EAX)
+  def CDQ : I<0x99, RawFrm, (outs), (ins),
+              "{cltd|cdq}", []>, OpSize32, Sched<[WriteALU]>;
+
+
+  let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX)
+  def CDQE : RI<0x98, RawFrm, (outs), (ins),
+               "{cltq|cdqe}", []>, Sched<[WriteALU]>;
+
+  let Defs = [RAX,RDX], Uses = [RAX] in // RDX:RAX = signext(RAX)
+  def CQO  : RI<0x99, RawFrm, (outs), (ins),
+                "{cqto|cqo}", []>, Sched<[WriteALU]>;
+}
+
+// Sign/Zero extenders
+let hasSideEffects = 0 in {
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+                   "movs{bw|x}\t{$src, $dst|$dst, $src}", []>,
+                   TB, OpSize16, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+                   "movs{bw|x}\t{$src, $dst|$dst, $src}", []>,
+                   TB, OpSize16, Sched<[WriteALULd]>;
+} // hasSideEffects = 0
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
+                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sext GR8:$src))]>, TB,
+                   OpSize32, Sched<[WriteALU]>;
+def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB,
+                   OpSize32, Sched<[WriteALULd]>;
+def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sext GR16:$src))]>, TB,
+                   OpSize32, Sched<[WriteALU]>;
+def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sextloadi32i16 addr:$src))]>,
+                   OpSize32, TB, Sched<[WriteALULd]>;
+
+let hasSideEffects = 0 in {
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+                   "movz{bw|x}\t{$src, $dst|$dst, $src}", []>,
+                   TB, OpSize16, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+                   "movz{bw|x}\t{$src, $dst|$dst, $src}", []>,
+                   TB, OpSize16, Sched<[WriteALULd]>;
+} // hasSideEffects = 0
+def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zext GR8:$src))]>, TB,
+                   OpSize32, Sched<[WriteALU]>;
+def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB,
+                   OpSize32, Sched<[WriteALULd]>;
+def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zext GR16:$src))]>, TB,
+                   OpSize32, Sched<[WriteALU]>;
+def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zextloadi32i16 addr:$src))]>,
+                   TB, OpSize32, Sched<[WriteALULd]>;
+
+// These instructions exist as a consequence of operand size prefix having
+// control of the destination size, but not the input size. Only support them
+// for the disassembler.
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
+def MOVSX16rr16: I<0xBF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                   "movs{ww|x}\t{$src, $dst|$dst, $src}",
+                   []>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
+def MOVZX16rr16: I<0xB7, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                   "movz{ww|x}\t{$src, $dst|$dst, $src}",
+                   []>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
+let mayLoad = 1 in {
+def MOVSX16rm16: I<0xBF, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                   "movs{ww|x}\t{$src, $dst|$dst, $src}",
+                   []>, OpSize16, TB, Sched<[WriteALULd]>, NotMemoryFoldable;
+def MOVZX16rm16: I<0xB7, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                   "movz{ww|x}\t{$src, $dst|$dst, $src}",
+                   []>, TB, OpSize16, Sched<[WriteALULd]>, NotMemoryFoldable;
+} // mayLoad = 1
+} // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0
+
+// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+let hasSideEffects = 0, isCodeGenOnly = 1 in {
+def MOVZX32rr8_NOREX : I<0xB6, MRMSrcReg,
+                         (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
+                         "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB, OpSize32, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX32rm8_NOREX : I<0xB6, MRMSrcMem,
+                         (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
+                         "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB, OpSize32, Sched<[WriteALULd]>;
+
+def MOVSX32rr8_NOREX : I<0xBE, MRMSrcReg,
+                         (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
+                         "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB, OpSize32, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVSX32rm8_NOREX : I<0xBE, MRMSrcMem,
+                         (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
+                         "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB, OpSize32, Sched<[WriteALULd]>;
+}
+
+// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
+// operand, which makes it a rare instruction with an 8-bit register
+// operand that can never access an h register. If support for h registers
+// were generalized, this would require a special register class.
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR8:$src))]>, TB,
+                    Sched<[WriteALU]>;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i8 addr:$src))]>,
+                    TB, Sched<[WriteALULd]>;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR16:$src))]>, TB,
+                    Sched<[WriteALU]>;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i16 addr:$src))]>,
+                    TB, Sched<[WriteALULd]>;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR32:$src))]>,
+                    Sched<[WriteALU]>, Requires<[In64BitMode]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i32 addr:$src))]>,
+                    Sched<[WriteALULd]>, Requires<[In64BitMode]>;
+
+// movzbq and movzwq encodings for the disassembler
+let hasSideEffects = 0 in {
+def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
+                     "movz{bq|x}\t{$src, $dst|$dst, $src}", []>,
+                     TB, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
+                     "movz{bq|x}\t{$src, $dst|$dst, $src}", []>,
+                     TB, Sched<[WriteALULd]>;
+def MOVZX64rr16 : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                     "movz{wq|x}\t{$src, $dst|$dst, $src}", []>,
+                     TB, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX64rm16 : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                     "movz{wq|x}\t{$src, $dst|$dst, $src}", []>,
+                     TB, Sched<[WriteALULd]>;
+}
+
+// 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a
+// 32-bit register.
+def : Pat<(i64 (zext GR8:$src)),
+          (SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8:$src), sub_32bit)>;
+def : Pat<(zextloadi64i8 addr:$src),
+          (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
+
+def : Pat<(i64 (zext GR16:$src)),
+          (SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16:$src), sub_32bit)>;
+def : Pat<(zextloadi64i16 addr:$src),
+          (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
+
+// The preferred way to do 32-bit-to-64-bit zero extension on x86-64 is to use a
+// SUBREG_TO_REG to utilize implicit zero-extension, however this isn't possible
+// when the 32-bit value is defined by a truncate or is copied from something
+// where the high bits aren't necessarily all zero. In such cases, we fall back
+// to these explicit zext instructions.
+def : Pat<(i64 (zext GR32:$src)),
+          (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src), sub_32bit)>;
+def : Pat<(i64 (zextloadi64i32 addr:$src)),
+          (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrFMA.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrFMA.td
@@ -0,0 +1,636 @@
+//===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes FMA (Fused Multiply-Add) instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FMA3 - Intel 3 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+// For all FMA opcodes declared in fma3p_rm_* and fma3s_rm_* multiclasses
+// defined below, both the register and memory variants are commutable.
+// For the register form the commutable operands are 1, 2 and 3.
+// For the memory variant the folded operand must be in 3. Thus,
+// in that case, only the operands 1 and 2 can be swapped.
+// Commuting some of operands may require the opcode change.
+// FMA*213*:
+//   operands 1 and 2 (memory & register forms): *213* --> *213*(no changes);
+//   operands 1 and 3 (register forms only):     *213* --> *231*;
+//   operands 2 and 3 (register forms only):     *213* --> *132*.
+// FMA*132*:
+//   operands 1 and 2 (memory & register forms): *132* --> *231*;
+//   operands 1 and 3 (register forms only):     *132* --> *132*(no changes);
+//   operands 2 and 3 (register forms only):     *132* --> *213*.
+// FMA*231*:
+//   operands 1 and 2 (memory & register forms): *231* --> *132*;
+//   operands 1 and 3 (register forms only):     *231* --> *213*;
+//   operands 2 and 3 (register forms only):     *231* --> *231*(no changes).
+
+multiclass fma3p_rm_213<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                        ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
+                        SDNode Op, X86FoldableSchedWrite sched> {
+  def r     : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, RC:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set RC:$dst, (VT (Op RC:$src2, RC:$src1, RC:$src3)))]>,
+                   Sched<[sched]>;
+
+  let mayLoad = 1 in
+  def m     : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, x86memop:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set RC:$dst, (VT (Op RC:$src2, RC:$src1,
+                                          (MemFrag addr:$src3))))]>,
+                   Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                        ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
+                        SDNode Op, X86FoldableSchedWrite sched> {
+  let hasSideEffects = 0 in
+  def r     : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, RC:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   []>, Sched<[sched]>;
+
+  let mayLoad = 1 in
+  def m     : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, x86memop:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3),
+                                          RC:$src1)))]>,
+                   Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                        ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
+                        SDNode Op, X86FoldableSchedWrite sched> {
+  let hasSideEffects = 0 in
+  def r     : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, RC:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   []>, Sched<[sched]>;
+
+  // Pattern is 312 order so that the load is in a different place from the
+  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
+  let mayLoad = 1 in
+  def m     : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, x86memop:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1,
+                                          RC:$src2)))]>,
+                   Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
+multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                       string OpcodeStr, string PackTy, string Suff,
+                       PatFrag MemFrag128, PatFrag MemFrag256,
+                       SDNode Op, ValueType OpTy128, ValueType OpTy256,
+                       X86SchedWriteWidths sched> {
+  defm NAME#213#Suff : fma3p_rm_213<opc213, !strconcat(OpcodeStr, "213", PackTy),
+                                    VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
+  defm NAME#231#Suff : fma3p_rm_231<opc231, !strconcat(OpcodeStr, "231", PackTy),
+                                    VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
+  defm NAME#132#Suff : fma3p_rm_132<opc132, !strconcat(OpcodeStr, "132", PackTy),
+                                    VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
+
+  defm NAME#213#Suff#Y : fma3p_rm_213<opc213, !strconcat(OpcodeStr, "213", PackTy),
+                                      VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
+                                      VEX_L;
+  defm NAME#231#Suff#Y : fma3p_rm_231<opc231, !strconcat(OpcodeStr, "231", PackTy),
+                                      VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
+                                      VEX_L;
+  defm NAME#132#Suff#Y : fma3p_rm_132<opc132, !strconcat(OpcodeStr, "132", PackTy),
+                                      VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
+                                      VEX_L;
+}
+
+// Fused Multiply-Add
+let ExeDomain = SSEPackedSingle in {
+  defm VFMADD    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS",
+                               loadv4f32, loadv8f32, X86Fmadd, v4f32, v8f32,
+                               SchedWriteFMA>;
+  defm VFMSUB    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS",
+                               loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32,
+                               SchedWriteFMA>;
+  defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", "PS",
+                               loadv4f32, loadv8f32, X86Fmaddsub, v4f32, v8f32,
+                               SchedWriteFMA>;
+  defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", "PS",
+                               loadv4f32, loadv8f32, X86Fmsubadd, v4f32, v8f32,
+                               SchedWriteFMA>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+  defm VFMADD    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD",
+                               loadv2f64, loadv4f64, X86Fmadd, v2f64,
+                               v4f64, SchedWriteFMA>, VEX_W;
+  defm VFMSUB    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD",
+                               loadv2f64, loadv4f64, X86Fmsub, v2f64,
+                               v4f64, SchedWriteFMA>, VEX_W;
+  defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", "PD",
+                               loadv2f64, loadv4f64, X86Fmaddsub,
+                               v2f64, v4f64, SchedWriteFMA>, VEX_W;
+  defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", "PD",
+                               loadv2f64, loadv4f64, X86Fmsubadd,
+                               v2f64, v4f64, SchedWriteFMA>, VEX_W;
+}
+
+// Fused Negative Multiply-Add
+let ExeDomain = SSEPackedSingle in {
+  defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", "PS", loadv4f32,
+                             loadv8f32, X86Fnmadd, v4f32, v8f32, SchedWriteFMA>;
+  defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", "PS", loadv4f32,
+                             loadv8f32, X86Fnmsub, v4f32, v8f32, SchedWriteFMA>;
+}
+let ExeDomain = SSEPackedDouble in {
+  defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", "PD", loadv2f64,
+                             loadv4f64, X86Fnmadd, v2f64, v4f64, SchedWriteFMA>, VEX_W;
+  defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", "PD", loadv2f64,
+                             loadv4f64, X86Fnmsub, v2f64, v4f64, SchedWriteFMA>, VEX_W;
+}
+
+// All source register operands of FMA opcodes defined in fma3s_rm multiclass
+// can be commuted. In many cases such commute transformation requires an opcode
+// adjustment, for example, commuting the operands 1 and 2 in FMA*132 form
+// would require an opcode change to FMA*231:
+//     FMA*132* reg1, reg2, reg3; // reg1 * reg3 + reg2;
+//     -->
+//     FMA*231* reg2, reg1, reg3; // reg1 * reg3 + reg2;
+// Please see more detailed comment at the very beginning of the section
+// defining FMA3 opcodes above.
+multiclass fma3s_rm_213<bits<8> opc, string OpcodeStr,
+                        X86MemOperand x86memop, RegisterClass RC,
+                        SDPatternOperator OpNode,
+                        X86FoldableSchedWrite sched> {
+  def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
+                (ins RC:$src1, RC:$src2, RC:$src3),
+                !strconcat(OpcodeStr,
+                           "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                [(set RC:$dst, (OpNode RC:$src2, RC:$src1, RC:$src3))]>,
+                Sched<[sched]>;
+
+  let mayLoad = 1 in
+  def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
+                (ins RC:$src1, RC:$src2, x86memop:$src3),
+                !strconcat(OpcodeStr,
+                           "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                [(set RC:$dst,
+                  (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>,
+                Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
+                        X86MemOperand x86memop, RegisterClass RC,
+                        SDPatternOperator OpNode, X86FoldableSchedWrite sched> {
+  let hasSideEffects = 0 in
+  def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
+                (ins RC:$src1, RC:$src2, RC:$src3),
+                !strconcat(OpcodeStr,
+                           "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                []>, Sched<[sched]>;
+
+  let mayLoad = 1 in
+  def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
+                (ins RC:$src1, RC:$src2, x86memop:$src3),
+                !strconcat(OpcodeStr,
+                           "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                [(set RC:$dst,
+                  (OpNode RC:$src2, (load addr:$src3), RC:$src1))]>,
+                Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
+                        X86MemOperand x86memop, RegisterClass RC,
+                        SDPatternOperator OpNode, X86FoldableSchedWrite sched> {
+  let hasSideEffects = 0 in
+  def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
+                (ins RC:$src1, RC:$src2, RC:$src3),
+                !strconcat(OpcodeStr,
+                           "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                []>, Sched<[sched]>;
+
+  // Pattern is 312 order so that the load is in a different place from the
+  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
+  let mayLoad = 1 in
+  def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
+                (ins RC:$src1, RC:$src2, x86memop:$src3),
+                !strconcat(OpcodeStr,
+                           "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                [(set RC:$dst,
+                  (OpNode (load addr:$src3), RC:$src1, RC:$src2))]>,
+                Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
+multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                       string OpStr, string PackTy, string Suff,
+                       SDNode OpNode, RegisterClass RC,
+                       X86MemOperand x86memop, X86FoldableSchedWrite sched> {
+  defm NAME#213#Suff : fma3s_rm_213<opc213, !strconcat(OpStr, "213", PackTy),
+                                    x86memop, RC, OpNode, sched>;
+  defm NAME#231#Suff : fma3s_rm_231<opc231, !strconcat(OpStr, "231", PackTy),
+                                    x86memop, RC, OpNode, sched>;
+  defm NAME#132#Suff : fma3s_rm_132<opc132, !strconcat(OpStr, "132", PackTy),
+                                    x86memop, RC, OpNode, sched>;
+}
+
+// These FMA*_Int instructions are defined specially for being used when
+// the scalar FMA intrinsics are lowered to machine instructions, and in that
+// sense, they are similar to existing ADD*_Int, SUB*_Int, MUL*_Int, etc.
+// instructions.
+//
+// All of the FMA*_Int opcodes are defined as commutable here.
+// Commuting the 2nd and 3rd source register operands of FMAs is quite trivial
+// and the corresponding optimizations have been developed.
+// Commuting the 1st operand of FMA*_Int requires some additional analysis,
+// the commute optimization is legal only if all users of FMA*_Int use only
+// the lowest element of the FMA*_Int instruction. Even though such analysis
+// may be not implemented yet we allow the routines doing the actual commute
+// transformation to decide if one or another instruction is commutable or not.
+let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
+    hasSideEffects = 0 in
+multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
+                        Operand memopr, RegisterClass RC,
+                        X86FoldableSchedWrite sched> {
+  def r_Int : FMA3S_Int<opc, MRMSrcReg, (outs RC:$dst),
+                        (ins RC:$src1, RC:$src2, RC:$src3),
+                        !strconcat(OpcodeStr,
+                                   "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                        []>, Sched<[sched]>;
+
+  let mayLoad = 1 in
+  def m_Int : FMA3S_Int<opc, MRMSrcMem, (outs RC:$dst),
+                        (ins RC:$src1, RC:$src2, memopr:$src3),
+                        !strconcat(OpcodeStr,
+                                   "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                        []>, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+// The FMA 213 form is created for lowering of scalar FMA intrinscis
+// to machine instructions.
+// The FMA 132 form can trivially be get by commuting the 2nd and 3rd operands
+// of FMA 213 form.
+// The FMA 231 form can be get only by commuting the 1st operand of 213 or 132
+// forms and is possible only after special analysis of all uses of the initial
+// instruction. Such analysis do not exist yet and thus introducing the 231
+// form of FMA*_Int instructions is done using an optimistic assumption that
+// such analysis will be implemented eventually.
+multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                           string OpStr, string PackTy, string Suff,
+                           RegisterClass RC, Operand memop,
+                           X86FoldableSchedWrite sched> {
+  defm NAME#132#Suff : fma3s_rm_int<opc132, !strconcat(OpStr, "132", PackTy),
+                                    memop, RC, sched>;
+  defm NAME#213#Suff : fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
+                                    memop, RC, sched>;
+  defm NAME#231#Suff : fma3s_rm_int<opc231, !strconcat(OpStr, "231", PackTy),
+                                    memop, RC, sched>;
+}
+
+multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                 string OpStr, SDNode OpNode, X86FoldableSchedWrite sched> {
+  let ExeDomain = SSEPackedSingle in
+  defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", OpNode,
+                          FR32, f32mem, sched>,
+              fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", "SS",
+                              VR128, ssmem, sched>;
+
+  let ExeDomain = SSEPackedDouble in
+  defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "SD", OpNode,
+                        FR64, f64mem, sched>,
+              fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", "SD",
+                              VR128, sdmem, sched>, VEX_W;
+}
+
+defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadd,
+                    SchedWriteFMA.Scl>, VEX_LIG;
+defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsub,
+                    SchedWriteFMA.Scl>, VEX_LIG;
+
+defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadd,
+                     SchedWriteFMA.Scl>, VEX_LIG;
+defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsub,
+                     SchedWriteFMA.Scl>, VEX_LIG;
+
+multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
+                               SDNode Move, ValueType VT, ValueType EltVT,
+                               RegisterClass RC, PatFrag mem_frag> {
+  let Predicates = [HasFMA, NoAVX512] in {
+    def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
+                (Op RC:$src2,
+                    (EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
+                    RC:$src3))))),
+              (!cast<Instruction>(Prefix#"213"#Suffix#"r_Int")
+               VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
+               (VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
+
+    def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
+                (Op RC:$src2, RC:$src3,
+                    (EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))),
+              (!cast<Instruction>(Prefix#"231"#Suffix#"r_Int")
+               VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
+               (VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
+
+    def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
+                (Op RC:$src2,
+                    (EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
+                    (mem_frag addr:$src3)))))),
+              (!cast<Instruction>(Prefix#"213"#Suffix#"m_Int")
+               VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
+               addr:$src3)>;
+
+    def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
+                (Op (EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
+                    (mem_frag addr:$src3), RC:$src2))))),
+              (!cast<Instruction>(Prefix#"132"#Suffix#"m_Int")
+               VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
+               addr:$src3)>;
+
+    def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
+                (Op RC:$src2, (mem_frag addr:$src3),
+                    (EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))),
+              (!cast<Instruction>(Prefix#"231"#Suffix#"m_Int")
+               VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
+               addr:$src3)>;
+  }
+}
+
+defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+
+defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+
+//===----------------------------------------------------------------------===//
+// FMA4 - AMD 4 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                 X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
+                 PatFrag mem_frag, X86FoldableSchedWrite sched> {
+  let isCommutable = 1 in
+  def rr : FMA4S<opc, MRMSrcRegOp4, (outs RC:$dst),
+           (ins RC:$src1, RC:$src2, RC:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set RC:$dst,
+             (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG,
+           Sched<[sched]>;
+  def rm : FMA4S<opc, MRMSrcMemOp4, (outs RC:$dst),
+           (ins RC:$src1, RC:$src2, x86memop:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
+                           (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG,
+           Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+  def mr : FMA4S<opc, MRMSrcMem, (outs RC:$dst),
+           (ins RC:$src1, x86memop:$src2, RC:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set RC:$dst,
+             (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG,
+           Sched<[sched.Folded, ReadAfterLd,
+                  // x86memop:$src2
+                  ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                  ReadDefault,
+                  // RC:$src3
+                  ReadAfterLd]>;
+// For disassembler
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def rr_REV : FMA4S<opc, MRMSrcReg, (outs RC:$dst),
+               (ins RC:$src1, RC:$src2, RC:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
+               VEX_LIG, FoldGenData<NAME#rr>, Sched<[sched]>;
+}
+
+multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
+                     ValueType VT, X86FoldableSchedWrite sched> {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
+  def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2, VR128:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+               []>, VEX_W, VEX_LIG, Sched<[sched]>;
+  let mayLoad = 1 in
+  def rm_Int : FMA4S_Int<opc, MRMSrcMemOp4, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2, memop:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+               []>, VEX_W, VEX_LIG,
+               Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+  let mayLoad = 1 in
+  def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst),
+               (ins VR128:$src1, memop:$src2, VR128:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+               []>,
+               VEX_LIG, Sched<[sched.Folded, ReadAfterLd,
+                               // memop:$src2
+                               ReadDefault, ReadDefault, ReadDefault,
+                               ReadDefault, ReadDefault,
+                               // VR128::$src3
+                               ReadAfterLd]>;
+  def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2, VR128:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+               []>, VEX_LIG, FoldGenData<NAME#rr_Int>, Sched<[sched]>;
+} // isCodeGenOnly = 1
+}
+
+multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                 ValueType OpVT128, ValueType OpVT256,
+                 PatFrag ld_frag128, PatFrag ld_frag256,
+                 X86SchedWriteWidths sched> {
+  let isCommutable = 1 in
+  def rr : FMA4<opc, MRMSrcRegOp4, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+             (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>,
+           VEX_W, Sched<[sched.XMM]>;
+  def rm : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
+                              (ld_frag128 addr:$src3)))]>, VEX_W,
+           Sched<[sched.XMM.Folded, ReadAfterLd, ReadAfterLd]>;
+  def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+             (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>,
+           Sched<[sched.XMM.Folded, ReadAfterLd,
+                  // f128mem:$src2
+                  ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                  ReadDefault,
+                  // VR128::$src3
+                  ReadAfterLd]>;
+  let isCommutable = 1 in
+  def Yrr : FMA4<opc, MRMSrcRegOp4, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2, VR256:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR256:$dst,
+             (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>,
+           VEX_W, VEX_L, Sched<[sched.YMM]>;
+  def Yrm : FMA4<opc, MRMSrcMemOp4, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
+                              (ld_frag256 addr:$src3)))]>, VEX_W, VEX_L,
+           Sched<[sched.YMM.Folded, ReadAfterLd, ReadAfterLd]>;
+  def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+           (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR256:$dst, (OpNode VR256:$src1,
+                              (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L,
+           Sched<[sched.YMM.Folded, ReadAfterLd,
+                  // f256mem:$src2
+                  ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                  ReadDefault,
+                  // VR256::$src3
+                  ReadAfterLd]>;
+// For disassembler
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
+  def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2, VR128:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
+               Sched<[sched.XMM]>, FoldGenData<NAME#rr>;
+  def Yrr_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
+                (ins VR256:$src1, VR256:$src2, VR256:$src3),
+                !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
+                VEX_L, Sched<[sched.YMM]>, FoldGenData<NAME#Yrr>;
+} // isCodeGenOnly = 1
+}
+
+let ExeDomain = SSEPackedSingle in {
+  // Scalar Instructions
+  defm VFMADDSS4  : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32,
+                          SchedWriteFMA.Scl>,
+                    fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
+                              SchedWriteFMA.Scl>;
+  defm VFMSUBSS4  : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32,
+                          SchedWriteFMA.Scl>,
+                    fma4s_int<0x6E, "vfmsubss", ssmem, v4f32,
+                              SchedWriteFMA.Scl>;
+  defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
+                          X86Fnmadd, loadf32, SchedWriteFMA.Scl>,
+                    fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32,
+                              SchedWriteFMA.Scl>;
+  defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
+                          X86Fnmsub, loadf32, SchedWriteFMA.Scl>,
+                    fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
+                              SchedWriteFMA.Scl>;
+  // Packed Instructions
+  defm VFMADDPS4    : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
+                            loadv4f32, loadv8f32, SchedWriteFMA>;
+  defm VFMSUBPS4    : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
+                            loadv4f32, loadv8f32, SchedWriteFMA>;
+  defm VFNMADDPS4   : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
+                            loadv4f32, loadv8f32, SchedWriteFMA>;
+  defm VFNMSUBPS4   : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
+                            loadv4f32, loadv8f32, SchedWriteFMA>;
+  defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
+                            loadv4f32, loadv8f32, SchedWriteFMA>;
+  defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
+                            loadv4f32, loadv8f32, SchedWriteFMA>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+  // Scalar Instructions
+  defm VFMADDSD4  : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64,
+                          SchedWriteFMA.Scl>,
+                    fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
+                              SchedWriteFMA.Scl>;
+  defm VFMSUBSD4  : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64,
+                          SchedWriteFMA.Scl>,
+                    fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64,
+                              SchedWriteFMA.Scl>;
+  defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
+                          X86Fnmadd, loadf64, SchedWriteFMA.Scl>,
+                    fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64,
+                              SchedWriteFMA.Scl>;
+  defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
+                          X86Fnmsub, loadf64, SchedWriteFMA.Scl>,
+                    fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
+                              SchedWriteFMA.Scl>;
+  // Packed Instructions
+  defm VFMADDPD4    : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
+                            loadv2f64, loadv4f64, SchedWriteFMA>;
+  defm VFMSUBPD4    : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
+                            loadv2f64, loadv4f64, SchedWriteFMA>;
+  defm VFNMADDPD4   : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
+                            loadv2f64, loadv4f64, SchedWriteFMA>;
+  defm VFNMSUBPD4   : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
+                            loadv2f64, loadv4f64, SchedWriteFMA>;
+  defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
+                            loadv2f64, loadv4f64, SchedWriteFMA>;
+  defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
+                            loadv2f64, loadv4f64, SchedWriteFMA>;
+}
+
+multiclass scalar_fma4_patterns<SDNode Op, string Name,
+                               ValueType VT, ValueType EltVT,
+                               RegisterClass RC, PatFrag mem_frag> {
+  let Predicates = [HasFMA4] in {
+    def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
+                                  (Op RC:$src1, RC:$src2, RC:$src3))))),
+              (!cast<Instruction>(Name#"rr_Int")
+               (VT (COPY_TO_REGCLASS RC:$src1, VR128)),
+               (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
+               (VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
+
+    def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
+                                  (Op RC:$src1, RC:$src2,
+                                      (mem_frag addr:$src3)))))),
+              (!cast<Instruction>(Name#"rm_Int")
+               (VT (COPY_TO_REGCLASS RC:$src1, VR128)),
+               (VT (COPY_TO_REGCLASS RC:$src2, VR128)), addr:$src3)>;
+
+    def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
+                                  (Op RC:$src1, (mem_frag addr:$src2),
+                                      RC:$src3))))),
+              (!cast<Instruction>(Name#"mr_Int")
+               (VT (COPY_TO_REGCLASS RC:$src1, VR128)), addr:$src2,
+               (VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
+  }
+}
+
+defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
+
+defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrFPStack.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrFPStack.td
@@ -0,0 +1,748 @@
+//===- X86InstrFPStack.td - FPU Instruction Set ------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 x87 FPU instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPStack specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FpGet2    : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
+                                           SDTCisVT<1, f80>]>;
+def SDTX86Fld       : SDTypeProfile<1, 2, [SDTCisFP<0>,
+                                           SDTCisPtrTy<1>,
+                                           SDTCisVT<2, OtherVT>]>;
+def SDTX86Fst       : SDTypeProfile<0, 3, [SDTCisFP<0>,
+                                           SDTCisPtrTy<1>,
+                                           SDTCisVT<2, OtherVT>]>;
+def SDTX86Fild      : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
+                                           SDTCisVT<2, OtherVT>]>;
+def SDTX86Fnstsw    : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
+def SDTX86FpToIMem  : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+
+def SDTX86CwdStore  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def X86fld          : SDNode<"X86ISD::FLD", SDTX86Fld,
+                             [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fst          : SDNode<"X86ISD::FST", SDTX86Fst,
+                             [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+                              SDNPMemOperand]>;
+def X86fild         : SDNode<"X86ISD::FILD", SDTX86Fild,
+                             [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fildflag     : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
+                             [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+                              SDNPMemOperand]>;
+def X86fp_stsw      : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
+def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m",          SDTX86CwdStore,
+                             [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+                              SDNPMemOperand]>;
+
+//===----------------------------------------------------------------------===//
+// FPStack pattern fragments
+//===----------------------------------------------------------------------===//
+
+def fpimm0 : FPImmLeaf<fAny, [{
+  return Imm.isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : FPImmLeaf<fAny, [{
+  return Imm.isExactlyValue(-0.0);
+}]>;
+
+def fpimm1 : FPImmLeaf<fAny, [{
+  return Imm.isExactlyValue(+1.0);
+}]>;
+
+def fpimmneg1 : FPImmLeaf<fAny, [{
+  return Imm.isExactlyValue(-1.0);
+}]>;
+
+/*
+// Some 'special' instructions - expanded after instruction selection.
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+  def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
+                              [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
+  def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
+                              [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
+  def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
+                              [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
+  def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
+                              [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
+  def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
+                              [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
+  def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
+                              [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
+  def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
+                              [(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
+  def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
+                              [(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
+  def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
+                              [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
+}
+*/
+
+// All FP Stack operations are represented with four instructions here.  The
+// first three instructions, generated by the instruction selector, use "RFP32"
+// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
+// 64-bit or 80-bit floating point values.  These sizes apply to the values,
+// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
+// copied to each other without losing information.  These instructions are all
+// pseudo instructions and use the "_Fp" suffix.
+// In some cases there are additional variants with a mixture of different
+// register sizes.
+// The second instruction is defined with FPI, which is the actual instruction
+// emitted by the assembler.  These use "RST" registers, although frequently
+// the actual register(s) used are implicit.  These are always 80 bits.
+// The FP stackifier pass converts one to the other after register allocation
+// occurs.
+//
+// Note that the FpI instruction should have instruction selection info (e.g.
+// a pattern) and the FPI instruction should have emission info (e.g. opcode
+// encoding and asm printing info).
+
+// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
+// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
+// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
+// f80 instructions cannot use SSE and use neither of these.
+class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+             FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
+class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+             FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
+
+// Factoring for arithmetic.
+multiclass FPBinary_rr<SDNode OpNode> {
+// Register op register -> register
+// These are separated out because they have no reversed form.
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
+                [(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
+                [(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
+                [(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
+}
+// The FopST0 series are not included here because of the irregularities
+// in where the 'r' goes in assembly output.
+// These instructions cannot address 80-bit memory.
+multiclass FPBinary<SDNode OpNode, Format fp, string asmstring,
+                    bit Forward = 1> {
+let mayLoad = 1, hasSideEffects = 1 in {
+// ST(0) = ST(0) + [mem]
+def _Fp32m  : FpIf32<(outs RFP32:$dst),
+                     (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
+                  [!if(Forward,
+                       (set RFP32:$dst,
+                        (OpNode RFP32:$src1, (loadf32 addr:$src2))),
+                       (set RFP32:$dst,
+                        (OpNode (loadf32 addr:$src2), RFP32:$src1)))]>;
+def _Fp64m  : FpIf64<(outs RFP64:$dst),
+                     (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
+                  [!if(Forward,
+                       (set RFP64:$dst,
+                        (OpNode RFP64:$src1, (loadf64 addr:$src2))),
+                       (set RFP64:$dst,
+                        (OpNode (loadf64 addr:$src2), RFP64:$src1)))]>;
+def _Fp64m32: FpIf64<(outs RFP64:$dst),
+                     (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
+                  [!if(Forward,
+                       (set RFP64:$dst,
+                        (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2)))),
+                       (set RFP64:$dst,
+                        (OpNode (f64 (extloadf32 addr:$src2)), RFP64:$src1)))]>;
+def _Fp80m32: FpI_<(outs RFP80:$dst),
+                   (ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
+                  [!if(Forward,
+                       (set RFP80:$dst,
+                        (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2)))),
+                       (set RFP80:$dst,
+                        (OpNode (f80 (extloadf32 addr:$src2)), RFP80:$src1)))]>;
+def _Fp80m64: FpI_<(outs RFP80:$dst),
+                   (ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
+                  [!if(Forward,
+                       (set RFP80:$dst,
+                        (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))),
+                       (set RFP80:$dst,
+                        (OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>;
+def _F32m  : FPI<0xD8, fp, (outs), (ins f32mem:$src),
+                 !strconcat("f", asmstring, "{s}\t$src")>;
+def _F64m  : FPI<0xDC, fp, (outs), (ins f64mem:$src),
+                 !strconcat("f", asmstring, "{l}\t$src")>;
+// ST(0) = ST(0) + [memint]
+def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
+                       OneArgFPRW,
+                       [!if(Forward,
+                            (set RFP32:$dst,
+                             (OpNode RFP32:$src1, (X86fild addr:$src2, i16))),
+                            (set RFP32:$dst,
+                             (OpNode (X86fild addr:$src2, i16), RFP32:$src1)))]>;
+def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
+                       OneArgFPRW,
+                       [!if(Forward,
+                            (set RFP32:$dst,
+                             (OpNode RFP32:$src1, (X86fild addr:$src2, i32))),
+                            (set RFP32:$dst,
+                             (OpNode (X86fild addr:$src2, i32), RFP32:$src1)))]>;
+def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
+                       OneArgFPRW,
+                       [!if(Forward,
+                            (set RFP64:$dst,
+                             (OpNode RFP64:$src1, (X86fild addr:$src2, i16))),
+                            (set RFP64:$dst,
+                             (OpNode (X86fild addr:$src2, i16), RFP64:$src1)))]>;
+def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
+                       OneArgFPRW,
+                       [!if(Forward,
+                            (set RFP64:$dst,
+                             (OpNode RFP64:$src1, (X86fild addr:$src2, i32))),
+                            (set RFP64:$dst,
+                             (OpNode (X86fild addr:$src2, i32), RFP64:$src1)))]>;
+def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
+                     OneArgFPRW,
+                     [!if(Forward,
+                          (set RFP80:$dst,
+                           (OpNode RFP80:$src1, (X86fild addr:$src2, i16))),
+                          (set RFP80:$dst,
+                           (OpNode (X86fild addr:$src2, i16), RFP80:$src1)))]>;
+def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
+                     OneArgFPRW,
+                     [!if(Forward,
+                          (set RFP80:$dst,
+                           (OpNode RFP80:$src1, (X86fild addr:$src2, i32))),
+                          (set RFP80:$dst,
+                           (OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>;
+def _FI16m  : FPI<0xDE, fp, (outs), (ins i16mem:$src),
+                  !strconcat("fi", asmstring, "{s}\t$src")>;
+def _FI32m  : FPI<0xDA, fp, (outs), (ins i32mem:$src),
+                  !strconcat("fi", asmstring, "{l}\t$src")>;
+} // mayLoad = 1, hasSideEffects = 1
+}
+
+let Defs = [FPSW] in {
+// FPBinary_rr just defines pseudo-instructions, no need to set a scheduling
+// resources.
+let hasNoSchedulingInfo = 1 in {
+defm ADD : FPBinary_rr<fadd>;
+defm SUB : FPBinary_rr<fsub>;
+defm MUL : FPBinary_rr<fmul>;
+defm DIV : FPBinary_rr<fdiv>;
+}
+
+// Sets the scheduling resources for the actual NAME#_F<size>m definitions.
+let SchedRW = [WriteFAddLd] in {
+defm ADD : FPBinary<fadd, MRM0m, "add">;
+defm SUB : FPBinary<fsub, MRM4m, "sub">;
+defm SUBR: FPBinary<fsub ,MRM5m, "subr", 0>;
+}
+
+let SchedRW = [WriteFMulLd] in {
+defm MUL : FPBinary<fmul, MRM1m, "mul">;
+}
+
+let SchedRW = [WriteFDivLd] in {
+defm DIV : FPBinary<fdiv, MRM6m, "div">;
+defm DIVR: FPBinary<fdiv, MRM7m, "divr", 0>;
+}
+} // Defs = [FPSW]
+
+class FPST0rInst<Format fp, string asm>
+  : FPI<0xD8, fp, (outs), (ins RST:$op), asm>;
+class FPrST0Inst<Format fp, string asm>
+  : FPI<0xDC, fp, (outs), (ins RST:$op), asm>;
+class FPrST0PInst<Format fp, string asm>
+  : FPI<0xDE, fp, (outs), (ins RST:$op), asm>;
+
+// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
+// of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
+// we have to put some 'r's in and take them out of weird places.
+let SchedRW = [WriteFAdd] in {
+def ADD_FST0r   : FPST0rInst <MRM0r, "fadd\t$op">;
+def ADD_FrST0   : FPrST0Inst <MRM0r, "fadd\t{%st(0), $op|$op, st(0)}">;
+def ADD_FPrST0  : FPrST0PInst<MRM0r, "faddp\t$op">;
+def SUBR_FST0r  : FPST0rInst <MRM5r, "fsubr\t$op">;
+def SUB_FrST0   : FPrST0Inst <MRM5r, "fsub{r}\t{%st(0), $op|$op, st(0)}">;
+def SUB_FPrST0  : FPrST0PInst<MRM5r, "fsub{r}p\t$op">;
+def SUB_FST0r   : FPST0rInst <MRM4r, "fsub\t$op">;
+def SUBR_FrST0  : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">;
+def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">;
+} // SchedRW
+let SchedRW = [WriteFCom] in {
+def COM_FST0r   : FPST0rInst <MRM2r, "fcom\t$op">;
+def COMP_FST0r  : FPST0rInst <MRM3r, "fcomp\t$op">;
+} // SchedRW
+let SchedRW = [WriteFMul] in {
+def MUL_FST0r   : FPST0rInst <MRM1r, "fmul\t$op">;
+def MUL_FrST0   : FPrST0Inst <MRM1r, "fmul\t{%st(0), $op|$op, st(0)}">;
+def MUL_FPrST0  : FPrST0PInst<MRM1r, "fmulp\t$op">;
+} // SchedRW
+let SchedRW = [WriteFDiv] in {
+def DIVR_FST0r  : FPST0rInst <MRM7r, "fdivr\t$op">;
+def DIV_FrST0   : FPrST0Inst <MRM7r, "fdiv{r}\t{%st(0), $op|$op, st(0)}">;
+def DIV_FPrST0  : FPrST0PInst<MRM7r, "fdiv{r}p\t$op">;
+def DIV_FST0r   : FPST0rInst <MRM6r, "fdiv\t$op">;
+def DIVR_FrST0  : FPrST0Inst <MRM6r, "fdiv{|r}\t{%st(0), $op|$op, st(0)}">;
+def DIVR_FPrST0 : FPrST0PInst<MRM6r, "fdiv{|r}p\t$op">;
+} // SchedRW
+
+// Unary operations.
+multiclass FPUnary<SDNode OpNode, Format fp, string asmstring> {
+def _Fp32  : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
+                 [(set RFP32:$dst, (OpNode RFP32:$src))]>;
+def _Fp64  : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
+                 [(set RFP64:$dst, (OpNode RFP64:$src))]>;
+def _Fp80  : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
+                 [(set RFP80:$dst, (OpNode RFP80:$src))]>;
+def _F     : FPI<0xD9, fp, (outs), (ins), asmstring>;
+}
+
+let Defs = [FPSW] in {
+
+let SchedRW = [WriteFSign] in {
+defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
+defm ABS : FPUnary<fabs, MRM_E1, "fabs">;
+}
+
+let SchedRW = [WriteFSqrt80] in
+defm SQRT: FPUnary<fsqrt,MRM_FA, "fsqrt">;
+
+let SchedRW = [WriteMicrocoded] in {
+defm SIN : FPUnary<fsin, MRM_FE, "fsin">;
+defm COS : FPUnary<fcos, MRM_FF, "fcos">;
+}
+
+let SchedRW = [WriteFCom] in {
+let hasSideEffects = 0 in {
+def TST_Fp32  : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
+def TST_Fp64  : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
+def TST_Fp80  : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
+} // hasSideEffects
+
+def TST_F  : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
+} // SchedRW
+} // Defs = [FPSW]
+
+// Versions of FP instructions that take a single memory operand.  Added for the
+//   disassembler; remove as they are included with patterns elsewhere.
+let SchedRW = [WriteFComLd] in {
+def FCOM32m  : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
+def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
+
+def FCOM64m  : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">;
+def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
+
+def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
+def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
+
+def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
+def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
+} // SchedRW
+
+let SchedRW = [WriteMicrocoded] in {
+def FLDENVm  : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
+def FSTENVm  : FPI<0xD9, MRM6m, (outs), (ins f32mem:$dst), "fnstenv\t$dst">;
+
+def FRSTORm  : FPI<0xDD, MRM4m, (outs), (ins f32mem:$dst), "frstor\t$dst">;
+def FSAVEm   : FPI<0xDD, MRM6m, (outs), (ins f32mem:$dst), "fnsave\t$dst">;
+def FNSTSWm  : FPI<0xDD, MRM7m, (outs), (ins i16mem:$dst), "fnstsw\t$dst">;
+
+def FBLDm    : FPI<0xDF, MRM4m, (outs), (ins f80mem:$src), "fbld\ttbyte ptr $src">;
+def FBSTPm   : FPI<0xDF, MRM6m, (outs), (ins f80mem:$dst), "fbstp\ttbyte ptr $dst">;
+} // SchedRW
+
+// Floating point cmovs.
+class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32, HasCMov]>;
+class FpIf64CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64, HasCMov]>;
+
+multiclass FPCMov<PatLeaf cc> {
+  def _Fp32  : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
+                       CondMovFP,
+                     [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
+                                        cc, EFLAGS))]>;
+  def _Fp64  : FpIf64CMov<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
+                       CondMovFP,
+                     [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+                                        cc, EFLAGS))]>;
+  def _Fp80  : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
+                     CondMovFP,
+                     [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
+                                        cc, EFLAGS))]>,
+                                        Requires<[HasCMov]>;
+}
+
+let Defs = [FPSW] in {
+let SchedRW = [WriteFCMOV] in {
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
+defm CMOVB  : FPCMov<X86_COND_B>;
+defm CMOVBE : FPCMov<X86_COND_BE>;
+defm CMOVE  : FPCMov<X86_COND_E>;
+defm CMOVP  : FPCMov<X86_COND_P>;
+defm CMOVNB : FPCMov<X86_COND_AE>;
+defm CMOVNBE: FPCMov<X86_COND_A>;
+defm CMOVNE : FPCMov<X86_COND_NE>;
+defm CMOVNP : FPCMov<X86_COND_NP>;
+} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
+
+let Predicates = [HasCMov] in {
+// These are not factored because there's no clean way to pass DA/DB.
+def CMOVB_F  : FPI<0xDA, MRM0r, (outs), (ins RST:$op),
+                  "fcmovb\t{$op, %st(0)|st(0), $op}">;
+def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RST:$op),
+                  "fcmovbe\t{$op, %st(0)|st(0), $op}">;
+def CMOVE_F  : FPI<0xDA, MRM1r, (outs), (ins RST:$op),
+                  "fcmove\t{$op, %st(0)|st(0), $op}">;
+def CMOVP_F  : FPI<0xDA, MRM3r, (outs), (ins RST:$op),
+                  "fcmovu\t{$op, %st(0)|st(0), $op}">;
+def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RST:$op),
+                  "fcmovnb\t{$op, %st(0)|st(0), $op}">;
+def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RST:$op),
+                  "fcmovnbe\t{$op, %st(0)|st(0), $op}">;
+def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RST:$op),
+                  "fcmovne\t{$op, %st(0)|st(0), $op}">;
+def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RST:$op),
+                  "fcmovnu\t{$op, %st(0)|st(0), $op}">;
+} // Predicates = [HasCMov]
+} // SchedRW
+
+// Floating point loads & stores.
+let SchedRW = [WriteLoad] in {
+let canFoldAsLoad = 1 in {
+def LD_Fp32m   : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (loadf32 addr:$src))]>;
+let isReMaterializable = 1 in
+  def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (loadf64 addr:$src))]>;
+def LD_Fp80m   : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (loadf80 addr:$src))]>;
+} // canFoldAsLoad
+def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
+def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
+def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
+def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
+} // SchedRW
+
+let SchedRW = [WriteStore] in {
+def ST_Fp32m   : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
+                  [(store RFP32:$src, addr:$op)]>;
+def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
+                  [(truncstoref32 RFP64:$src, addr:$op)]>;
+def ST_Fp64m   : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
+                  [(store RFP64:$src, addr:$op)]>;
+def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
+                  [(truncstoref32 RFP80:$src, addr:$op)]>;
+def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP,
+                  [(truncstoref64 RFP80:$src, addr:$op)]>;
+// FST does not support 80-bit memory target; FSTP must be used.
+
+let mayStore = 1, hasSideEffects = 0 in {
+def ST_FpP32m    : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
+def ST_FpP64m32  : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP64m    : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP80m32  : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
+def ST_FpP80m64  : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
+} // mayStore
+
+def ST_FpP80m    : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
+                    [(store RFP80:$src, addr:$op)]>;
+
+let mayStore = 1, hasSideEffects = 0 in {
+def IST_Fp16m32  : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp32m32  : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp64m32  : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp16m64  : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp32m64  : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp64m64  : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp16m80  : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp32m80  : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp64m80  : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
+} // mayStore
+} // SchedRW
+
+let mayLoad = 1, SchedRW = [WriteLoad] in {
+def LD_F32m   : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
+def LD_F64m   : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
+def LD_F80m   : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
+def ILD_F16m  : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
+def ILD_F32m  : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
+def ILD_F64m  : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
+}
+let mayStore = 1, SchedRW = [WriteStore] in {
+def ST_F32m   : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
+def ST_F64m   : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
+def ST_FP32m  : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
+def ST_FP64m  : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">;
+def ST_FP80m  : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">;
+def IST_F16m  : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">;
+def IST_F32m  : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">;
+def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">;
+def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">;
+def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
+}
+
+// FISTTP requires SSE3 even though it's a FPStack op.
+let Predicates = [HasSSE3], SchedRW = [WriteStore] in {
+def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
+                    [(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
+def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
+                    [(X86fp_to_i32mem RFP32:$src, addr:$op)]>;
+def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
+                    [(X86fp_to_i64mem RFP32:$src, addr:$op)]>;
+def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
+                    [(X86fp_to_i16mem RFP64:$src, addr:$op)]>;
+def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
+                    [(X86fp_to_i32mem RFP64:$src, addr:$op)]>;
+def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
+                    [(X86fp_to_i64mem RFP64:$src, addr:$op)]>;
+def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
+                    [(X86fp_to_i16mem RFP80:$src, addr:$op)]>;
+def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
+                    [(X86fp_to_i32mem RFP80:$src, addr:$op)]>;
+def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
+                    [(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
+} // Predicates = [HasSSE3]
+
+let mayStore = 1, SchedRW = [WriteStore] in {
+def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
+def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
+def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">;
+}
+
+// FP Stack manipulation instructions.
+let SchedRW = [WriteMove] in {
+def LD_Frr   : FPI<0xD9, MRM0r, (outs), (ins RST:$op), "fld\t$op">;
+def ST_Frr   : FPI<0xDD, MRM2r, (outs), (ins RST:$op), "fst\t$op">;
+def ST_FPrr  : FPI<0xDD, MRM3r, (outs), (ins RST:$op), "fstp\t$op">;
+def XCH_F    : FPI<0xD9, MRM1r, (outs), (ins RST:$op), "fxch\t$op">;
+}
+
+// Floating point constant loads.
+let isReMaterializable = 1, SchedRW = [WriteZero] in {
+def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+                [(set RFP32:$dst, fpimm0)]>;
+def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+                [(set RFP32:$dst, fpimm1)]>;
+def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+                [(set RFP64:$dst, fpimm0)]>;
+def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+                [(set RFP64:$dst, fpimm1)]>;
+def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+                [(set RFP80:$dst, fpimm0)]>;
+def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+                [(set RFP80:$dst, fpimm1)]>;
+}
+
+let SchedRW = [WriteFLD0] in
+def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
+
+let SchedRW = [WriteFLD1] in
+def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
+
+let SchedRW = [WriteFLDC], Defs = [FPSW] in {
+def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
+def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
+def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
+def FLDLG2 : I<0xD9, MRM_EC, (outs), (ins), "fldlg2", []>;
+def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", []>;
+} // SchedRW
+
+// Floating point compares.
+let SchedRW = [WriteFCom] in {
+def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+                        [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
+def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+                        [(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>;
+def UCOM_Fpr80 : FpI_  <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+                        [(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>;
+} // SchedRW
+} // Defs = [FPSW]
+
+let SchedRW = [WriteFCom] in {
+// CC = ST(0) cmp ST(i)
+let Defs = [EFLAGS, FPSW] in {
+def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
+def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
+def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
+}
+
+let Defs = [FPSW], Uses = [ST0] in {
+def UCOM_Fr    : FPI<0xDD, MRM4r,    // FPSW = cmp ST(0) with ST(i)
+                    (outs), (ins RST:$reg), "fucom\t$reg">;
+def UCOM_FPr   : FPI<0xDD, MRM5r,    // FPSW = cmp ST(0) with ST(i), pop
+                    (outs), (ins RST:$reg), "fucomp\t$reg">;
+def UCOM_FPPr  : FPI<0xDA, MRM_E9,       // cmp ST(0) with ST(1), pop, pop
+                    (outs), (ins), "fucompp">;
+}
+
+let Defs = [EFLAGS, FPSW], Uses = [ST0] in {
+def UCOM_FIr   : FPI<0xDB, MRM5r,     // CC = cmp ST(0) with ST(i)
+                    (outs), (ins RST:$reg), "fucomi\t$reg">;
+def UCOM_FIPr  : FPI<0xDF, MRM5r,     // CC = cmp ST(0) with ST(i), pop
+                    (outs), (ins RST:$reg), "fucompi\t$reg">;
+}
+
+let Defs = [EFLAGS, FPSW] in {
+def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RST:$reg), "fcomi\t$reg">;
+def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RST:$reg), "fcompi\t$reg">;
+}
+} // SchedRW
+
+// Floating point flag ops.
+let SchedRW = [WriteALU] in {
+let Defs = [AX], Uses = [FPSW] in
+def FNSTSW16r : I<0xDF, MRM_E0,                  // AX = fp flags
+                  (outs), (ins), "fnstsw\t{%ax|ax}",
+                  [(set AX, (X86fp_stsw FPSW))]>;
+let Defs = [FPSW] in
+def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
+                  (outs), (ins i16mem:$dst), "fnstcw\t$dst",
+                  [(X86fp_cwd_get16 addr:$dst)]>;
+} // SchedRW
+let Defs = [FPSW], mayLoad = 1 in
+def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
+                  (outs), (ins i16mem:$dst), "fldcw\t$dst", []>,
+                Sched<[WriteLoad]>;
+
+// FPU control instructions
+let SchedRW = [WriteMicrocoded] in {
+let Defs = [FPSW] in {
+def FNINIT : I<0xDB, MRM_E3, (outs), (ins), "fninit", []>;
+def FFREE : FPI<0xDD, MRM0r, (outs), (ins RST:$reg), "ffree\t$reg">;
+def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RST:$reg), "ffreep\t$reg">;
+
+def FPNCEST0r : FPI<0xD9, MRM3r, (outs RST:$op), (ins),
+                  "fstpnce\t{%st(0), $op|$op, st(0)}">;
+
+def FENI8087_NOP : I<0xDB, MRM_E0, (outs), (ins), "feni8087_nop", []>;
+
+def FDISI8087_NOP : I<0xDB, MRM_E1, (outs), (ins), "fdisi8087_nop", []>;
+
+// Clear exceptions
+def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", []>;
+} // Defs = [FPSW]
+} // SchedRW
+
+// Operand-less floating-point instructions for the disassembler.
+def FNOP : I<0xD9, MRM_D0, (outs), (ins), "fnop", []>, Sched<[WriteNop]>;
+
+let SchedRW = [WriteMicrocoded] in {
+let Defs = [FPSW] in {
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
+def FXAM : I<0xD9, MRM_E5, (outs), (ins), "fxam", []>;
+def F2XM1 : I<0xD9, MRM_F0, (outs), (ins), "f2xm1", []>;
+def FYL2X : I<0xD9, MRM_F1, (outs), (ins), "fyl2x", []>;
+def FPTAN : I<0xD9, MRM_F2, (outs), (ins), "fptan", []>;
+def FPATAN : I<0xD9, MRM_F3, (outs), (ins), "fpatan", []>;
+def FXTRACT : I<0xD9, MRM_F4, (outs), (ins), "fxtract", []>;
+def FPREM1 : I<0xD9, MRM_F5, (outs), (ins), "fprem1", []>;
+def FDECSTP : I<0xD9, MRM_F6, (outs), (ins), "fdecstp", []>;
+def FINCSTP : I<0xD9, MRM_F7, (outs), (ins), "fincstp", []>;
+def FPREM : I<0xD9, MRM_F8, (outs), (ins), "fprem", []>;
+def FYL2XP1 : I<0xD9, MRM_F9, (outs), (ins), "fyl2xp1", []>;
+def FSINCOS : I<0xD9, MRM_FB, (outs), (ins), "fsincos", []>;
+def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", []>;
+def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", []>;
+def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", []>;
+} // Defs = [FPSW]
+
+def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
+             "fxsave\t$dst", [(int_x86_fxsave addr:$dst)]>, TB,
+             Requires<[HasFXSR]>;
+def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
+               "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)]>,
+               TB, Requires<[HasFXSR, In64BitMode]>;
+def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaquemem:$src),
+              "fxrstor\t$src", [(int_x86_fxrstor addr:$src)]>,
+              TB, Requires<[HasFXSR]>;
+def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaquemem:$src),
+                "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)]>,
+                TB, Requires<[HasFXSR, In64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Required for RET of f32 / f64 / f80 values.
+def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
+def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
+def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
+
+// Required for CALL which return f32 / f64 / f80 values.
+def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
+                                                          RFP64:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
+                                                          RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
+                                                          RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
+                                                         RFP80:$src)>;
+
+// Floating point constant -0.0 and -1.0
+def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
+def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
+def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
+def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
+def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
+def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
+
+// Used to conv. i64 to f64 since there isn't a SSE version.
+def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
+
+// FP extensions map onto simple pseudo-value conversions if they are to/from
+// the FP stack.
+def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
+          Requires<[FPStackf32]>;
+def : Pat<(f80 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
+           Requires<[FPStackf32]>;
+def : Pat<(f80 (fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
+           Requires<[FPStackf64]>;
+
+// FP truncations map onto simple pseudo-value conversions if they are to/from
+// the FP stack.  We have validated that only value-preserving truncations make
+// it through isel.
+def : Pat<(f32 (fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
+          Requires<[FPStackf32]>;
+def : Pat<(f32 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
+           Requires<[FPStackf32]>;
+def : Pat<(f64 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
+           Requires<[FPStackf64]>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrFormats.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrFormats.td
@@ -0,0 +1,993 @@
+//===-- X86InstrFormats.td - X86 Instruction Formats -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<7> val> {
+  bits<7> Value = val;
+}
+
+def Pseudo        : Format<0>;
+def RawFrm        : Format<1>;
+def AddRegFrm     : Format<2>;
+def RawFrmMemOffs : Format<3>;
+def RawFrmSrc     : Format<4>;
+def RawFrmDst     : Format<5>;
+def RawFrmDstSrc  : Format<6>;
+def RawFrmImm8    : Format<7>;
+def RawFrmImm16   : Format<8>;
+def MRMDestMem     : Format<32>;
+def MRMSrcMem      : Format<33>;
+def MRMSrcMem4VOp3 : Format<34>;
+def MRMSrcMemOp4   : Format<35>;
+def MRMXm  : Format<39>;
+def MRM0m  : Format<40>;  def MRM1m  : Format<41>;  def MRM2m  : Format<42>;
+def MRM3m  : Format<43>;  def MRM4m  : Format<44>;  def MRM5m  : Format<45>;
+def MRM6m  : Format<46>;  def MRM7m  : Format<47>;
+def MRMDestReg     : Format<48>;
+def MRMSrcReg      : Format<49>;
+def MRMSrcReg4VOp3 : Format<50>;
+def MRMSrcRegOp4   : Format<51>;
+def MRMXr  : Format<55>;
+def MRM0r  : Format<56>;  def MRM1r  : Format<57>;  def MRM2r  : Format<58>;
+def MRM3r  : Format<59>;  def MRM4r  : Format<60>;  def MRM5r  : Format<61>;
+def MRM6r  : Format<62>;  def MRM7r  : Format<63>;
+def MRM_C0 : Format<64>;  def MRM_C1 : Format<65>;  def MRM_C2 : Format<66>;
+def MRM_C3 : Format<67>;  def MRM_C4 : Format<68>;  def MRM_C5 : Format<69>;
+def MRM_C6 : Format<70>;  def MRM_C7 : Format<71>;  def MRM_C8 : Format<72>;
+def MRM_C9 : Format<73>;  def MRM_CA : Format<74>;  def MRM_CB : Format<75>;
+def MRM_CC : Format<76>;  def MRM_CD : Format<77>;  def MRM_CE : Format<78>;
+def MRM_CF : Format<79>;  def MRM_D0 : Format<80>;  def MRM_D1 : Format<81>;
+def MRM_D2 : Format<82>;  def MRM_D3 : Format<83>;  def MRM_D4 : Format<84>;
+def MRM_D5 : Format<85>;  def MRM_D6 : Format<86>;  def MRM_D7 : Format<87>;
+def MRM_D8 : Format<88>;  def MRM_D9 : Format<89>;  def MRM_DA : Format<90>;
+def MRM_DB : Format<91>;  def MRM_DC : Format<92>;  def MRM_DD : Format<93>;
+def MRM_DE : Format<94>;  def MRM_DF : Format<95>;  def MRM_E0 : Format<96>;
+def MRM_E1 : Format<97>;  def MRM_E2 : Format<98>;  def MRM_E3 : Format<99>;
+def MRM_E4 : Format<100>; def MRM_E5 : Format<101>; def MRM_E6 : Format<102>;
+def MRM_E7 : Format<103>; def MRM_E8 : Format<104>; def MRM_E9 : Format<105>;
+def MRM_EA : Format<106>; def MRM_EB : Format<107>; def MRM_EC : Format<108>;
+def MRM_ED : Format<109>; def MRM_EE : Format<110>; def MRM_EF : Format<111>;
+def MRM_F0 : Format<112>; def MRM_F1 : Format<113>; def MRM_F2 : Format<114>;
+def MRM_F3 : Format<115>; def MRM_F4 : Format<116>; def MRM_F5 : Format<117>;
+def MRM_F6 : Format<118>; def MRM_F7 : Format<119>; def MRM_F8 : Format<120>;
+def MRM_F9 : Format<121>; def MRM_FA : Format<122>; def MRM_FB : Format<123>;
+def MRM_FC : Format<124>; def MRM_FD : Format<125>; def MRM_FE : Format<126>;
+def MRM_FF : Format<127>;
+
+// ImmType - This specifies the immediate type used by an instruction. This is
+// part of the ad-hoc solution used to emit machine instruction encodings by our
+// machine code emitter.
+class ImmType<bits<4> val> {
+  bits<4> Value = val;
+}
+def NoImm      : ImmType<0>;
+def Imm8       : ImmType<1>;
+def Imm8PCRel  : ImmType<2>;
+def Imm8Reg    : ImmType<3>; // Register encoded in [7:4].
+def Imm16      : ImmType<4>;
+def Imm16PCRel : ImmType<5>;
+def Imm32      : ImmType<6>;
+def Imm32PCRel : ImmType<7>;
+def Imm32S     : ImmType<8>;
+def Imm64      : ImmType<9>;
+
+// FPFormat - This specifies what form this FP instruction has.  This is used by
+// the Floating-Point stackifier pass.
+class FPFormat<bits<3> val> {
+  bits<3> Value = val;
+}
+def NotFP      : FPFormat<0>;
+def ZeroArgFP  : FPFormat<1>;
+def OneArgFP   : FPFormat<2>;
+def OneArgFPRW : FPFormat<3>;
+def TwoArgFP   : FPFormat<4>;
+def CompareFP  : FPFormat<5>;
+def CondMovFP  : FPFormat<6>;
+def SpecialFP  : FPFormat<7>;
+
+// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
+// Keep in sync with tables in X86InstrInfo.cpp.
+class Domain<bits<2> val> {
+  bits<2> Value = val;
+}
+def GenericDomain   : Domain<0>;
+def SSEPackedSingle : Domain<1>;
+def SSEPackedDouble : Domain<2>;
+def SSEPackedInt    : Domain<3>;
+
+// Class specifying the vector form of the decompressed
+// displacement of 8-bit.
+class CD8VForm<bits<3> val> {
+  bits<3> Value = val;
+}
+def CD8VF  : CD8VForm<0>;  // v := VL
+def CD8VH  : CD8VForm<1>;  // v := VL/2
+def CD8VQ  : CD8VForm<2>;  // v := VL/4
+def CD8VO  : CD8VForm<3>;  // v := VL/8
+// The tuple (subvector) forms.
+def CD8VT1 : CD8VForm<4>;  // v := 1
+def CD8VT2 : CD8VForm<5>;  // v := 2
+def CD8VT4 : CD8VForm<6>;  // v := 4
+def CD8VT8 : CD8VForm<7>;  // v := 8
+
+// Class specifying the prefix used an opcode extension.
+class Prefix<bits<3> val> {
+  bits<3> Value = val;
+}
+def NoPrfx : Prefix<0>;
+def PD     : Prefix<1>;
+def XS     : Prefix<2>;
+def XD     : Prefix<3>;
+def PS     : Prefix<4>; // Similar to NoPrfx, but disassembler uses this to know
+                        // that other instructions with this opcode use PD/XS/XD
+                        // and if any of those is not supported they shouldn't
+                        // decode to this instruction. e.g. ANDSS/ANDSD don't
+                        // exist, but the 0xf2/0xf3 encoding shouldn't
+                        // disable to ANDPS.
+
+// Class specifying the opcode map.
+class Map<bits<3> val> {
+  bits<3> Value = val;
+}
+def OB        : Map<0>;
+def TB        : Map<1>;
+def T8        : Map<2>;
+def TA        : Map<3>;
+def XOP8      : Map<4>;
+def XOP9      : Map<5>;
+def XOPA      : Map<6>;
+def ThreeDNow : Map<7>;
+
+// Class specifying the encoding
+class Encoding<bits<2> val> {
+  bits<2> Value = val;
+}
+def EncNormal : Encoding<0>;
+def EncVEX    : Encoding<1>;
+def EncXOP    : Encoding<2>;
+def EncEVEX   : Encoding<3>;
+
+// Operand size for encodings that change based on mode.
+class OperandSize<bits<2> val> {
+  bits<2> Value = val;
+}
+def OpSizeFixed  : OperandSize<0>; // Never needs a 0x66 prefix.
+def OpSize16     : OperandSize<1>; // Needs 0x66 prefix in 32-bit mode.
+def OpSize32     : OperandSize<2>; // Needs 0x66 prefix in 16-bit mode.
+
+// Address size for encodings that change based on mode.
+class AddressSize<bits<2> val> {
+  bits<2> Value = val;
+}
+def AdSizeX  : AddressSize<0>; // Address size determined using addr operand.
+def AdSize16 : AddressSize<1>; // Encodes a 16-bit address.
+def AdSize32 : AddressSize<2>; // Encodes a 32-bit address.
+def AdSize64 : AddressSize<3>; // Encodes a 64-bit address.
+
+// Prefix byte classes which are used to indicate to the ad-hoc machine code
+// emitter that various prefix bytes are required.
+class OpSize16 { OperandSize OpSize = OpSize16; }
+class OpSize32 { OperandSize OpSize = OpSize32; }
+class AdSize16 { AddressSize AdSize = AdSize16; }
+class AdSize32 { AddressSize AdSize = AdSize32; }
+class AdSize64 { AddressSize AdSize = AdSize64; }
+class REX_W  { bit hasREX_WPrefix = 1; }
+class LOCK   { bit hasLockPrefix = 1; }
+class REP    { bit hasREPPrefix = 1; }
+class TB     { Map OpMap = TB; }
+class T8     { Map OpMap = T8; }
+class TA     { Map OpMap = TA; }
+class XOP8   { Map OpMap = XOP8; Prefix OpPrefix = PS; }
+class XOP9   { Map OpMap = XOP9; Prefix OpPrefix = PS; }
+class XOPA   { Map OpMap = XOPA; Prefix OpPrefix = PS; }
+class ThreeDNow { Map OpMap = ThreeDNow; }
+class OBXS   { Prefix OpPrefix = XS; }
+class PS   : TB { Prefix OpPrefix = PS; }
+class PD   : TB { Prefix OpPrefix = PD; }
+class XD   : TB { Prefix OpPrefix = XD; }
+class XS   : TB { Prefix OpPrefix = XS; }
+class T8PS : T8 { Prefix OpPrefix = PS; }
+class T8PD : T8 { Prefix OpPrefix = PD; }
+class T8XD : T8 { Prefix OpPrefix = XD; }
+class T8XS : T8 { Prefix OpPrefix = XS; }
+class TAPS : TA { Prefix OpPrefix = PS; }
+class TAPD : TA { Prefix OpPrefix = PD; }
+class TAXD : TA { Prefix OpPrefix = XD; }
+class VEX    { Encoding OpEnc = EncVEX; }
+class VEX_W    { bits<2> VEX_WPrefix = 1; }
+class VEX_WIG  { bits<2> VEX_WPrefix = 2; }
+// Special version of VEX_W that can be changed to VEX.W==0 for EVEX2VEX.
+// FIXME: We should consider adding separate bits for VEX_WIG and the extra
+// part of W1X. This would probably simplify the tablegen emitters and
+// the TSFlags creation below.
+class VEX_W1X  { bits<2> VEX_WPrefix = 3; }
+class VEX_4V : VEX { bit hasVEX_4V = 1; }
+class VEX_L  { bit hasVEX_L = 1; }
+class VEX_LIG { bit ignoresVEX_L = 1; }
+class EVEX   { Encoding OpEnc = EncEVEX; }
+class EVEX_4V : EVEX { bit hasVEX_4V = 1; }
+class EVEX_K { bit hasEVEX_K = 1; }
+class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
+class EVEX_B { bit hasEVEX_B = 1; }
+class EVEX_RC { bit hasEVEX_RC = 1; }
+class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
+class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; }
+class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; }
+class NOTRACK { bit hasNoTrackPrefix = 1; }
+
+// Specify AVX512 8-bit compressed displacement encoding based on the vector
+// element size in bits (8, 16, 32, 64) and the CDisp8 form.
+class EVEX_CD8<int esize, CD8VForm form> {
+  int CD8_EltSize = !srl(esize, 3);
+  bits<3> CD8_Form = form.Value;
+}
+
+class XOP { Encoding OpEnc = EncXOP; }
+class XOP_4V : XOP { bit hasVEX_4V = 1; }
+
+// Specify the alternative register form instruction to replace the current
+// instruction in case it was picked during generation of memory folding tables
+class FoldGenData<string _RegisterForm> {
+  string FoldGenRegForm = _RegisterForm;
+}
+
+// Provide a specific instruction to be used by the EVEX2VEX conversion.
+class EVEX2VEXOverride<string VEXInstrName> {
+  string EVEX2VEXOverride = VEXInstrName;
+}
+
+// Mark the instruction as "illegal to memory fold/unfold"
+class NotMemoryFoldable { bit isMemoryFoldable = 0; }
+
+// Prevent EVEX->VEX conversion from considering this instruction.
+class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; }
+
+class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
+              string AsmStr, Domain d = GenericDomain>
+  : Instruction {
+  let Namespace = "X86";
+
+  bits<8> Opcode = opcod;
+  Format Form = f;
+  bits<7> FormBits = Form.Value;
+  ImmType ImmT = i;
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  string AsmString = AsmStr;
+
+  // If this is a pseudo instruction, mark it isCodeGenOnly.
+  let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+  //
+  // Attributes specific to X86 instructions...
+  //
+  bit ForceDisassemble = 0; // Force instruction to disassemble even though it's
+                            // isCodeGenonly. Needed to hide an ambiguous
+                            // AsmString from the parser, but still disassemble.
+
+  OperandSize OpSize = OpSizeFixed; // Does this instruction's encoding change
+                                    // based on operand size of the mode?
+  bits<2> OpSizeBits = OpSize.Value;
+  AddressSize AdSize = AdSizeX; // Does this instruction's encoding change
+                                // based on address size of the mode?
+  bits<2> AdSizeBits = AdSize.Value;
+
+  Prefix OpPrefix = NoPrfx; // Which prefix byte does this inst have?
+  bits<3> OpPrefixBits = OpPrefix.Value;
+  Map OpMap = OB;           // Which opcode map does this inst have?
+  bits<3> OpMapBits = OpMap.Value;
+  bit hasREX_WPrefix  = 0;  // Does this inst require the REX.W prefix?
+  FPFormat FPForm = NotFP;  // What flavor of FP instruction is this?
+  bit hasLockPrefix = 0;    // Does this inst have a 0xF0 prefix?
+  Domain ExeDomain = d;
+  bit hasREPPrefix = 0;     // Does this inst have a REP prefix?
+  Encoding OpEnc = EncNormal; // Encoding used by this instruction
+  bits<2> OpEncBits = OpEnc.Value;
+  bits<2> VEX_WPrefix = 0;  // Does this inst set the VEX_W field?
+  bit hasVEX_4V = 0;        // Does this inst require the VEX.VVVV field?
+  bit hasVEX_L = 0;         // Does this inst use large (256-bit) registers?
+  bit ignoresVEX_L = 0;     // Does this instruction ignore the L-bit
+  bit hasEVEX_K = 0;        // Does this inst require masking?
+  bit hasEVEX_Z = 0;        // Does this inst set the EVEX_Z field?
+  bit hasEVEX_L2 = 0;       // Does this inst set the EVEX_L2 field?
+  bit hasEVEX_B = 0;        // Does this inst set the EVEX_B field?
+  bits<3> CD8_Form = 0;     // Compressed disp8 form - vector-width.
+  // Declare it int rather than bits<4> so that all bits are defined when
+  // assigning to bits<7>.
+  int CD8_EltSize = 0;      // Compressed disp8 form - element-size in bytes.
+  bit hasEVEX_RC = 0;       // Explicitly specified rounding control in FP instruction.
+  bit hasNoTrackPrefix = 0; // Does this inst has 0x3E (NoTrack) prefix?
+
+  bits<2> EVEX_LL;
+  let EVEX_LL{0} = hasVEX_L;
+  let EVEX_LL{1} = hasEVEX_L2;
+  // Vector size in bytes.
+  bits<7> VectSize = !shl(16, EVEX_LL);
+
+  // The scaling factor for AVX512's compressed displacement is either
+  //   - the size of a  power-of-two number of elements or
+  //   - the size of a single element for broadcasts or
+  //   - the total vector size divided by a power-of-two number.
+  // Possible values are: 0 (non-AVX512 inst), 1, 2, 4, 8, 16, 32 and 64.
+  bits<7> CD8_Scale = !if (!eq (OpEnc.Value, EncEVEX.Value),
+                           !if (CD8_Form{2},
+                                !shl(CD8_EltSize, CD8_Form{1-0}),
+                                !if (hasEVEX_B,
+                                     CD8_EltSize,
+                                     !srl(VectSize, CD8_Form{1-0}))), 0);
+
+  // Used in the memory folding generation (TableGen backend) to point to an alternative
+  // instruction to replace the current one in case it got picked during generation.
+  string FoldGenRegForm = ?;
+
+  // Used to prevent an explicit EVEX2VEX override for this instruction.
+  string EVEX2VEXOverride = ?;
+
+  bit isMemoryFoldable = 1;     // Is it allowed to memory fold/unfold this instruction?
+  bit notEVEX2VEXConvertible = 0; // Prevent EVEX->VEX conversion.
+
+  // TSFlags layout should be kept in sync with X86BaseInfo.h.
+  let TSFlags{6-0}   = FormBits;
+  let TSFlags{8-7}   = OpSizeBits;
+  let TSFlags{10-9}  = AdSizeBits;
+  // No need for 3rd bit, we don't need to distinguish NoPrfx from PS.
+  let TSFlags{12-11} = OpPrefixBits{1-0};
+  let TSFlags{15-13} = OpMapBits;
+  let TSFlags{16}    = hasREX_WPrefix;
+  let TSFlags{20-17} = ImmT.Value;
+  let TSFlags{23-21} = FPForm.Value;
+  let TSFlags{24}    = hasLockPrefix;
+  let TSFlags{25}    = hasREPPrefix;
+  let TSFlags{27-26} = ExeDomain.Value;
+  let TSFlags{29-28} = OpEncBits;
+  let TSFlags{37-30} = Opcode;
+  // Currently no need for second bit in TSFlags - W Ignore is equivalent to 0.
+  let TSFlags{38}    = VEX_WPrefix{0};
+  let TSFlags{39}    = hasVEX_4V;
+  let TSFlags{40}    = hasVEX_L;
+  let TSFlags{41}    = hasEVEX_K;
+  let TSFlags{42}    = hasEVEX_Z;
+  let TSFlags{43}    = hasEVEX_L2;
+  let TSFlags{44}    = hasEVEX_B;
+  // If we run out of TSFlags bits, it's possible to encode this in 3 bits.
+  let TSFlags{51-45} = CD8_Scale;
+  let TSFlags{52}    = hasEVEX_RC;
+  let TSFlags{53}    = hasNoTrackPrefix;
+}
+
+class PseudoI<dag oops, dag iops, list<dag> pattern>
+  : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
+  let Pattern = pattern;
+}
+
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+        list<dag> pattern, Domain d = GenericDomain>
+  : X86Inst<o, f, NoImm, outs, ins, asm, d> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii8<bits<8> o, Format f, dag outs, dag ins, string asm,
+          list<dag> pattern, Domain d = GenericDomain>
+  : X86Inst<o, f, Imm8, outs, ins, asm, d> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii8Reg<bits<8> o, Format f, dag outs, dag ins, string asm,
+             list<dag> pattern, Domain d = GenericDomain>
+  : X86Inst<o, f, Imm8Reg, outs, ins, asm, d> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+               list<dag> pattern>
+  : X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
+           list<dag> pattern>
+  : X86Inst<o, f, Imm16, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
+           list<dag> pattern>
+  : X86Inst<o, f, Imm32, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii32S<bits<8> o, Format f, dag outs, dag ins, string asm,
+            list<dag> pattern>
+  : X86Inst<o, f, Imm32S, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Ii64<bits<8> o, Format f, dag outs, dag ins, string asm,
+           list<dag> pattern>
+  : X86Inst<o, f, Imm64, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+           list<dag> pattern>
+           : X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+           list<dag> pattern>
+  : X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+// FPStack Instruction Templates:
+// FPI - Floating Point Instruction template.
+class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
+  : I<o, F, outs, ins, asm, []> {}
+
+// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
+class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
+  : PseudoI<outs, ins, pattern> {
+  let FPForm = fp;
+}
+
+// Templates for instructions that use a 16- or 32-bit segmented address as
+//  their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
+//
+//   Iseg16 - 16-bit segment selector, 16-bit offset
+//   Iseg32 - 16-bit segment selector, 32-bit offset
+
+class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : X86Inst<o, f, Imm16, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : X86Inst<o, f, Imm32, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+// SI - SSE 1 & 2 scalar instructions
+class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+         list<dag> pattern, Domain d = GenericDomain>
+      : I<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+                   !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
+                   !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
+                   !if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
+                   !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
+                   [UseSSE1])))));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+                  !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+                  asm));
+}
+
+// SI - SSE 1 & 2 scalar intrinsics - vex form available on AVX512
+class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
+         list<dag> pattern, Domain d = GenericDomain>
+      : I<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+                   !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
+                   !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
+                   !if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
+                   !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
+                   [UseSSE1])))));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+                  !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+                  asm));
+}
+// SIi8 - SSE 1 & 2 scalar instructions - vex form available on AVX512
+class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern> {
+  let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+                   !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
+                   !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
+                   [UseSSE2])));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+                  !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+                  asm));
+}
+
+// PI - SSE 1 & 2 packed instructions
+class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+         Domain d>
+      : I<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+                   !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
+                   !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
+                   [UseSSE1])));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+                  !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+                  asm));
+}
+
+// MMXPI - SSE 1 & 2 packed instructions with MMX operands
+class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+            Domain d>
+      : I<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(!eq(OpPrefix.Value, PD.Value), [HasMMX, HasSSE2],
+                       [HasMMX, HasSSE1]);
+}
+
+// PIi8 - SSE 1 & 2 packed instructions with immediate
+class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern, Domain d>
+      : Ii8<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+                   !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
+                   !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
+                   [UseSSE1])));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+                  !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+                  asm));
+}
+
+// SSE1 Instruction Templates:
+//
+//   SSI   - SSE1 instructions with XS prefix.
+//   PSI   - SSE1 instructions with PS prefix.
+//   PSIi8 - SSE1 instructions with ImmT == Imm8 and PS prefix.
+//   VSSI  - SSE1 instructions with XS prefix in AVX form.
+//   VPSI  - SSE1 instructions with PS prefix in AVX form, packed single.
+
+class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
+class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
+        Requires<[UseSSE1]>;
+class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
+        Requires<[UseSSE1]>;
+class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+        Requires<[HasAVX]>;
+class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, PS,
+        Requires<[HasAVX]>;
+
+// SSE2 Instruction Templates:
+//
+//   SDI    - SSE2 instructions with XD prefix.
+//   SDIi8  - SSE2 instructions with ImmT == Imm8 and XD prefix.
+//   S2SI   - SSE2 instructions with XS prefix.
+//   SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
+//   PDI    - SSE2 instructions with PD prefix, packed double domain.
+//   PDIi8  - SSE2 instructions with ImmT == Imm8 and PD prefix.
+//   VSDI   - SSE2 scalar instructions with XD prefix in AVX form.
+//   VPDI   - SSE2 vector instructions with PD prefix in AVX form,
+//                 packed double domain.
+//   VS2I   - SSE2 scalar instructions with PD prefix in AVX form.
+//   S2I    - SSE2 scalar instructions with PD prefix.
+//   MMXSDIi8  - SSE2 instructions with ImmT == Imm8 and XD prefix as well as
+//               MMX operands.
+//   MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as
+//               MMX operands.
+
+class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
+class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
+class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
+class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
+class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+        Requires<[UseSSE2]>;
+class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+        Requires<[UseSSE2]>;
+class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
+        Requires<[UseAVX]>;
+class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+        Requires<[HasAVX]>;
+class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
+        PD, Requires<[HasAVX]>;
+class VS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, PD,
+        Requires<[UseAVX]>;
+class S2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, PD, Requires<[UseSSE2]>;
+class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+               list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX, HasSSE2]>;
+class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+                list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX, HasSSE2]>;
+
+// SSE3 Instruction Templates:
+//
+//   S3I   - SSE3 instructions with PD prefixes.
+//   S3SI  - SSE3 instructions with XS prefix.
+//   S3DI  - SSE3 instructions with XD prefix.
+
+class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+        Requires<[UseSSE3]>;
+class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+        Requires<[UseSSE3]>;
+class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+        Requires<[UseSSE3]>;
+
+
+// SSSE3 Instruction Templates:
+//
+//   SS38I - SSSE3 instructions with T8 prefix.
+//   SS3AI - SSSE3 instructions with TA prefix.
+//   MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands.
+//   MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands.
+//
+// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
+// uses the MMX registers. The 64-bit versions are grouped with the MMX
+// classes. They need to be enabled even if AVX is enabled.
+
+class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[UseSSSE3]>;
+class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[UseSSSE3]>;
+class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+               list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PS,
+        Requires<[HasMMX, HasSSSE3]>;
+class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+               list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPS,
+        Requires<[HasMMX, HasSSSE3]>;
+
+// SSE4.1 Instruction Templates:
+//
+//   SS48I - SSE 4.1 instructions with T8 prefix.
+//   SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
+//
+class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[UseSSE41]>;
+class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[UseSSE41]>;
+
+// SSE4.2 Instruction Templates:
+//
+//   SS428I - SSE 4.2 instructions with T8 prefix.
+class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[UseSSE42]>;
+
+//   SS42FI - SSE 4.2 instructions with T8XD prefix.
+// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
+class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;
+
+//   SS42AI = SSE 4.2 instructions with TA prefix
+class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[UseSSE42]>;
+
+// AVX Instruction Templates:
+//   Instructions introduced in AVX (no SSE equivalent forms)
+//
+//   AVX8I - AVX instructions with T8PD prefix.
+//   AVXAIi8 - AVX instructions with TAPD prefix and ImmT = Imm8.
+class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[HasAVX]>;
+class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[HasAVX]>;
+
+// AVX2 Instruction Templates:
+//   Instructions introduced in AVX2 (no SSE equivalent forms)
+//
+//   AVX28I - AVX2 instructions with T8PD prefix.
+//   AVX2AIi8 - AVX2 instructions with TAPD prefix and ImmT = Imm8.
+class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[HasAVX2]>;
+class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[HasAVX2]>;
+
+
+// AVX-512 Instruction Templates:
+//   Instructions introduced in AVX-512 (no SSE equivalent forms)
+//
+//   AVX5128I - AVX-512 instructions with T8PD prefix.
+//   AVX512AIi8 - AVX-512 instructions with TAPD prefix and ImmT = Imm8.
+//   AVX512PDI  - AVX-512 instructions with PD, double packed.
+//   AVX512PSI  - AVX-512 instructions with PS, single packed.
+//   AVX512XS8I - AVX-512 instructions with T8 and XS prefixes.
+//   AVX512XSI  - AVX-512 instructions with XS prefix, generic domain.
+//   AVX512BI   - AVX-512 instructions with PD, int packed domain.
+//   AVX512SI   - AVX-512 scalar instructions with PD prefix.
+
+class AVX5128I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[HasAVX512]>;
+class AVX5128IBase : T8PD {
+  Domain ExeDomain = SSEPackedInt;
+}
+class AVX512XS8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8XS,
+        Requires<[HasAVX512]>;
+class AVX512XSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, XS,
+        Requires<[HasAVX512]>;
+class AVX512XDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, XD,
+        Requires<[HasAVX512]>;
+class AVX512BI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
+        Requires<[HasAVX512]>;
+class AVX512BIBase : PD {
+  Domain ExeDomain = SSEPackedInt;
+}
+class AVX512BIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
+        Requires<[HasAVX512]>;
+class AVX512BIi8Base : PD {
+  Domain ExeDomain = SSEPackedInt;
+  ImmType ImmT = Imm8;
+}
+class AVX512XSIi8Base : XS {
+  Domain ExeDomain = SSEPackedInt;
+  ImmType ImmT = Imm8;
+}
+class AVX512XDIi8Base : XD {
+  Domain ExeDomain = SSEPackedInt;
+  ImmType ImmT = Imm8;
+}
+class AVX512PSIi8Base : PS {
+  Domain ExeDomain = SSEPackedSingle;
+  ImmType ImmT = Imm8;
+}
+class AVX512PDIi8Base : PD {
+  Domain ExeDomain = SSEPackedDouble;
+  ImmType ImmT = Imm8;
+}
+class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[HasAVX512]>;
+class AVX512AIi8Base : TAPD {
+  ImmType ImmT = Imm8;
+}
+class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>,
+        Requires<[HasAVX512]>;
+class AVX512PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+        Requires<[HasAVX512]>;
+class AVX512PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
+        Requires<[HasAVX512]>;
+class AVX512PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern, Domain d>
+      : Ii8<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
+class AVX512PI<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern, Domain d>
+      : I<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
+class AVX512FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern>, T8PD,
+        EVEX_4V, Requires<[HasAVX512]>;
+class AVX512FMA3Base : T8PD, EVEX_4V;
+
+class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern>, Requires<[HasAVX512]>;
+
+// AES Instruction Templates:
+//
+// AES8I
+// These use the same encoding as the SSE4.2 T8 and TA encodings.
+class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[NoAVX, HasAES]>;
+
+class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[NoAVX, HasAES]>;
+
+// PCLMUL Instruction Templates
+class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+               list<dag>pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD;
+
+// FMA3 Instruction Templates
+class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern>, T8PD,
+        VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoVLX]>;
+class FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern>, T8PD,
+        VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoAVX512]>;
+class FMA3S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
+                list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern>, T8PD,
+        VEX_4V, FMASC, Requires<[HasFMA, NoAVX512]>;
+
+// FMA4 Instruction Templates
+class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
+        VEX_4V, FMASC, Requires<[HasFMA4, NoVLX]>;
+class FMA4S<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag>pattern>
+      : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
+        VEX_4V, FMASC, Requires<[HasFMA4, NoAVX512]>;
+class FMA4S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
+                list<dag>pattern>
+      : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
+        VEX_4V, FMASC, Requires<[HasFMA4]>;
+
+// XOP 2, 3 and 4 Operand Instruction Template
+class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+         XOP9, Requires<[HasXOP]>;
+
+// XOP 2 and 3 Operand Instruction Templates with imm byte
+class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+         XOP8, Requires<[HasXOP]>;
+// XOP 4 Operand Instruction Templates with imm byte
+class IXOPi8Reg<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+         XOP8, Requires<[HasXOP]>;
+
+//  XOP 5 operand instruction (VEX encoding!)
+class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        VEX_4V, Requires<[HasXOP]>;
+
+// X86-64 Instruction templates...
+//
+
+class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
+         list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi16 <bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii16<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii32<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi32S <bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii32S<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi64<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii64<o, F, outs, ins, asm, pattern>, REX_W;
+
+class RS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : S2I<o, F, outs, ins, asm, pattern>, REX_W;
+class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : VS2I<o, F, outs, ins, asm, pattern>, VEX_W;
+
+// MMX Instruction templates
+//
+
+// MMXI   - MMX instructions with TB prefix.
+// MMXI32 - MMX instructions with TB prefix valid only in 32 bit mode.
+// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
+// MMX2I  - MMX / SSE2 instructions with PD prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
+// MMXID  - MMX instructions with XD prefix.
+// MMXIS  - MMX instructions with XS prefix.
+class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
+class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,Not64BitMode]>;
+class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,In64BitMode]>;
+class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, PS, REX_W, Requires<[HasMMX]>;
+class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, PD, Requires<[HasMMX]>;
+class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
+class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
+class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrFragmentsSIMD.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrFragmentsSIMD.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrInfo.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrInfo.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrInfo_reduce.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrInfo_reduce.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrMMX.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrMMX.td
@@ -0,0 +1,612 @@
+//===-- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MMX instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+// All instructions that use MMX should be in this file, even if they also use
+// SSE.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Multiclasses
+//===----------------------------------------------------------------------===//
+
+// Alias instruction that maps zero vector to pxor mmx.
+// This is expanded by ExpandPostRAPseudos to an pxor.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isPseudo = 1, SchedRW = [WriteZero] in {
+def MMX_SET0 : I<0, Pseudo, (outs VR64:$dst), (ins), "", []>;
+}
+
+let Constraints = "$src1 = $dst" in {
+  // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+  // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
+  multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+                               X86FoldableSchedWrite sched, bit Commutable = 0,
+                               X86MemOperand OType = i64mem> {
+    def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+                 (ins VR64:$src1, VR64:$src2),
+                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                 [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>,
+              Sched<[sched]> {
+      let isCommutable = Commutable;
+    }
+    def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+                 (ins VR64:$src1, OType:$src2),
+                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                 [(set VR64:$dst, (IntId VR64:$src1,
+                                   (bitconvert (load_mmx addr:$src2))))]>,
+                 Sched<[sched.Folded, ReadAfterLd]>;
+  }
+
+  multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+                                string OpcodeStr, Intrinsic IntId,
+                                Intrinsic IntId2, X86FoldableSchedWrite sched,
+                                X86FoldableSchedWrite schedImm> {
+    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+                                  (ins VR64:$src1, VR64:$src2),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                  [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>,
+             Sched<[sched]>;
+    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+                                  (ins VR64:$src1, i64mem:$src2),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                  [(set VR64:$dst, (IntId VR64:$src1,
+                                    (bitconvert (load_mmx addr:$src2))))]>,
+                  Sched<[sched.Folded, ReadAfterLd]>;
+    def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
+                                   (ins VR64:$src1, i32u8imm:$src2),
+                    !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           [(set VR64:$dst, (IntId2 VR64:$src1, imm:$src2))]>,
+           Sched<[schedImm]>;
+  }
+}
+
+/// Unary MMX instructions requiring SSSE3.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+                               Intrinsic IntId64, X86FoldableSchedWrite sched> {
+  def rr : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR64:$dst, (IntId64 VR64:$src))]>,
+           Sched<[sched]>;
+
+  def rm : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR64:$dst,
+                   (IntId64 (bitconvert (load_mmx addr:$src))))]>,
+                 Sched<[sched.Folded]>;
+}
+
+/// Binary MMX instructions requiring SSSE3.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+                             Intrinsic IntId64, X86FoldableSchedWrite sched,
+                             bit Commutable = 0> {
+  let isCommutable = Commutable in
+  def rr : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
+       (ins VR64:$src1, VR64:$src2),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>,
+      Sched<[sched]>;
+  def rm : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst),
+       (ins VR64:$src1, i64mem:$src2),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       [(set VR64:$dst,
+         (IntId64 VR64:$src1,
+          (bitconvert (load_mmx addr:$src2))))]>,
+      Sched<[sched.Folded, ReadAfterLd]>;
+}
+}
+
+/// PALIGN MMX instructions (require SSSE3).
+multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
+                           X86FoldableSchedWrite sched> {
+  def rri  : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+      (ins VR64:$src1, VR64:$src2, u8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+      [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
+      Sched<[sched]>;
+  def rmi  : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+      (ins VR64:$src1, i64mem:$src2, u8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+      [(set VR64:$dst, (IntId VR64:$src1,
+                       (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
+      Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+                         string asm, X86FoldableSchedWrite sched, Domain d> {
+  def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                  [(set DstRC:$dst, (Int SrcRC:$src))], d>,
+            Sched<[sched]>;
+  def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                  [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>,
+            Sched<[sched.Folded]>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+                    PatFrag ld_frag, string asm, Domain d> {
+  def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst),
+                  (ins DstRC:$src1, SrcRC:$src2), asm,
+                  [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>,
+                  Sched<[WriteCvtI2PS]>;
+  def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst),
+                  (ins DstRC:$src1, x86memop:$src2), asm,
+                  [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>,
+                  Sched<[WriteCvtI2PS.Folded]>;
+}
+
+//===----------------------------------------------------------------------===//
+// MMX EMMS Instruction
+//===----------------------------------------------------------------------===//
+
+let SchedRW = [WriteEMMS] in
+def MMX_EMMS  : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
+
+//===----------------------------------------------------------------------===//
+// MMX Scalar Instructions
+//===----------------------------------------------------------------------===//
+
+// Data Transfer Instructions
+def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+                        "movd\t{$src, $dst|$dst, $src}",
+                        [(set VR64:$dst,
+                         (x86mmx (scalar_to_vector GR32:$src)))]>,
+                        Sched<[WriteVecMoveFromGpr]>;
+def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
+                        "movd\t{$src, $dst|$dst, $src}",
+                        [(set VR64:$dst,
+                        (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>,
+                        Sched<[WriteVecLoad]>;
+
+let Predicates = [HasMMX] in {
+  def : Pat<(x86mmx (MMX_X86movw2d GR32:$src)),
+            (MMX_MOVD64rr GR32:$src)>;
+  def : Pat<(x86mmx (MMX_X86movw2d (i32 0))),
+            (MMX_SET0)>;
+  def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))),
+            (MMX_MOVD64rm addr:$src)>;
+}
+
+let mayStore = 1 in
+def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
+                        "movd\t{$src, $dst|$dst, $src}", []>,
+                   Sched<[WriteVecStore]>;
+
+def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
+                         "movd\t{$src, $dst|$dst, $src}",
+                         [(set GR32:$dst,
+                          (MMX_X86movd2w (x86mmx VR64:$src)))]>,
+                         Sched<[WriteVecMoveToGpr]>, FoldGenData<"MMX_MOVD64rr">;
+
+let isBitcast = 1 in
+def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+                             "movq\t{$src, $dst|$dst, $src}",
+                             [(set VR64:$dst, (bitconvert GR64:$src))]>,
+                             Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst),
+                             (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}",
+                             []>, Sched<[SchedWriteVecMoveLS.MMX.RM]>;
+
+let isBitcast = 1 in {
+def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
+                               (outs GR64:$dst), (ins VR64:$src),
+                               "movq\t{$src, $dst|$dst, $src}",
+                               [(set GR64:$dst, (bitconvert VR64:$src))]>,
+                               Sched<[WriteVecMoveToGpr]>;
+let SchedRW = [WriteVecMove], hasSideEffects = 0, isMoveReg = 1 in {
+def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+                        "movq\t{$src, $dst|$dst, $src}", []>;
+let isCodeGenOnly = 1, ForceDisassemble = 1 in
+def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src),
+                            "movq\t{$src, $dst|$dst, $src}", []>,
+                            FoldGenData<"MMX_MOVQ64rr">;
+} // SchedRW, hasSideEffects, isMoveReg
+} // isBitcast
+
+// def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
+//                (MMX_MOVQ64rr_REV VR64:$dst, VR64:$src), 0>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+def MMX_MOVD64from64rm : MMXRI<0x7E, MRMDestMem,
+                               (outs), (ins i64mem:$dst, VR64:$src),
+                               "movq\t{$src, $dst|$dst, $src}", []>,
+                               Sched<[SchedWriteVecMoveLS.MMX.MR]>;
+
+let SchedRW = [SchedWriteVecMoveLS.MMX.RM] in {
+let canFoldAsLoad = 1 in
+def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                        [(set VR64:$dst, (load_mmx addr:$src))]>;
+} // SchedRW
+
+let SchedRW = [SchedWriteVecMoveLS.MMX.MR] in
+def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                        [(store (x86mmx VR64:$src), addr:$dst)]>;
+
+let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
+def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+                             (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
+                             [(set VR64:$dst,
+                               (x86mmx (bitconvert
+                               (i64 (extractelt (v2i64 VR128:$src),
+                                     (iPTR 0))))))]>;
+
+def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
+                              (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
+                              [(set VR128:$dst,
+                                (v2i64
+                                  (scalar_to_vector
+                                    (i64 (bitconvert (x86mmx VR64:$src))))))]>;
+
+let isCodeGenOnly = 1, hasSideEffects = 1 in {
+def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
+                               (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
+                               []>;
+
+def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+                              (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
+                              []>;
+}
+} // SchedRW
+
+let Predicates = [HasMMX, HasSSE1] in
+def MMX_MOVNTQmr  : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+                         "movntq\t{$src, $dst|$dst, $src}",
+                         [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>,
+                         Sched<[SchedWriteVecMoveLSNT.MMX.MR]>;
+
+let Predicates = [HasMMX] in {
+  // movd to MMX register zero-extends
+  def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))),
+            (MMX_MOVD64rr GR32:$src)>;
+  def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
+            (MMX_MOVD64rm addr:$src)>;
+}
+
+// Arithmetic Instructions
+defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
+                                     SchedWriteVecALU.MMX>;
+defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w,
+                                     SchedWriteVecALU.MMX>;
+defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d,
+                                     SchedWriteVecALU.MMX>;
+// -- Addition
+defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b,
+                                   SchedWriteVecALU.MMX, 1>;
+defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w,
+                                   SchedWriteVecALU.MMX, 1>;
+defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d,
+                                   SchedWriteVecALU.MMX, 1>;
+let Predicates = [HasMMX, HasSSE2] in
+defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q,
+                                   SchedWriteVecALU.MMX, 1>;
+defm MMX_PADDSB  : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b,
+                                     SchedWriteVecALU.MMX, 1>;
+defm MMX_PADDSW  : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w,
+                                     SchedWriteVecALU.MMX, 1>;
+
+defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b,
+                                   SchedWriteVecALU.MMX, 1>;
+defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w,
+                                   SchedWriteVecALU.MMX, 1>;
+
+defm MMX_PHADDW  : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w,
+                                        SchedWritePHAdd.MMX>;
+defm MMX_PHADDD  : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d,
+                                        SchedWritePHAdd.MMX>;
+defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw,
+                                        SchedWritePHAdd.MMX>;
+
+// -- Subtraction
+defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b,
+                                   SchedWriteVecALU.MMX>;
+defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w,
+                                   SchedWriteVecALU.MMX>;
+defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d,
+                                   SchedWriteVecALU.MMX>;
+let Predicates = [HasMMX, HasSSE2] in
+defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q,
+                                   SchedWriteVecALU.MMX>;
+
+defm MMX_PSUBSB  : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b,
+                                   SchedWriteVecALU.MMX>;
+defm MMX_PSUBSW  : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w,
+                                   SchedWriteVecALU.MMX>;
+
+defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b,
+                                   SchedWriteVecALU.MMX>;
+defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w,
+                                   SchedWriteVecALU.MMX>;
+
+defm MMX_PHSUBW  : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w,
+                                        SchedWritePHAdd.MMX>;
+defm MMX_PHSUBD  : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d,
+                                        SchedWritePHAdd.MMX>;
+defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw,
+                                        SchedWritePHAdd.MMX>;
+
+// -- Multiplication
+defm MMX_PMULLW  : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w,
+                                     SchedWriteVecIMul.MMX, 1>;
+
+defm MMX_PMULHW  : MMXI_binop_rm_int<0xE5, "pmulhw",  int_x86_mmx_pmulh_w,
+                                     SchedWriteVecIMul.MMX, 1>;
+let Predicates = [HasMMX, HasSSE1] in
+defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w,
+                                     SchedWriteVecIMul.MMX, 1>;
+let Predicates = [HasMMX, HasSSE2] in
+defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq,
+                                     SchedWriteVecIMul.MMX, 1>;
+defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
+                                     int_x86_ssse3_pmul_hr_sw,
+                                     SchedWriteVecIMul.MMX, 1>;
+
+// -- Miscellanea
+defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd,
+                                     SchedWriteVecIMul.MMX, 1>;
+
+defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
+                                          int_x86_ssse3_pmadd_ub_sw,
+                                          SchedWriteVecIMul.MMX>;
+let Predicates = [HasMMX, HasSSE1] in {
+defm MMX_PAVGB   : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b,
+                                     SchedWriteVecALU.MMX, 1>;
+defm MMX_PAVGW   : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w,
+                                     SchedWriteVecALU.MMX, 1>;
+
+defm MMX_PMINUB  : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b,
+                                     SchedWriteVecALU.MMX, 1>;
+defm MMX_PMINSW  : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w,
+                                     SchedWriteVecALU.MMX, 1>;
+
+defm MMX_PMAXUB  : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b,
+                                     SchedWriteVecALU.MMX, 1>;
+defm MMX_PMAXSW  : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w,
+                                     SchedWriteVecALU.MMX, 1>;
+
+defm MMX_PSADBW  : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw,
+                                     SchedWritePSADBW.MMX, 1>;
+}
+
+defm MMX_PSIGNB :  SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b,
+                                        SchedWriteVecALU.MMX>;
+defm MMX_PSIGNW :  SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w,
+                                        SchedWriteVecALU.MMX>;
+defm MMX_PSIGND :  SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d,
+                                        SchedWriteVecALU.MMX>;
+let Constraints = "$src1 = $dst" in
+  defm MMX_PALIGNR : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b,
+                                     SchedWriteShuffle.MMX>;
+
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand,
+                                  SchedWriteVecLogic.MMX, 1>;
+defm MMX_POR  : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por,
+                                  SchedWriteVecLogic.MMX, 1>;
+defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor,
+                                  SchedWriteVecLogic.MMX, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn,
+                                   SchedWriteVecLogic.MMX>;
+
+// Shift Instructions
+defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+                                    int_x86_mmx_psrl_w, int_x86_mmx_psrli_w,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+                                    int_x86_mmx_psrl_d, int_x86_mmx_psrli_d,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+                                    int_x86_mmx_psrl_q, int_x86_mmx_psrli_q,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+
+defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+                                    int_x86_mmx_psll_w, int_x86_mmx_pslli_w,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+                                    int_x86_mmx_psll_d, int_x86_mmx_pslli_d,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+                                    int_x86_mmx_psll_q, int_x86_mmx_pslli_q,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+
+defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+                                    int_x86_mmx_psra_w, int_x86_mmx_psrai_w,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+                                    int_x86_mmx_psra_d, int_x86_mmx_psrai_d,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+
+// Comparison Instructions
+defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b,
+                                     SchedWriteVecALU.MMX>;
+defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w,
+                                     SchedWriteVecALU.MMX>;
+defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d,
+                                     SchedWriteVecALU.MMX>;
+
+defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b,
+                                     SchedWriteVecALU.MMX>;
+defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w,
+                                     SchedWriteVecALU.MMX>;
+defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d,
+                                     SchedWriteVecALU.MMX>;
+
+// -- Unpack Instructions
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
+                                       int_x86_mmx_punpckhbw,
+                                       SchedWriteShuffle.MMX>;
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
+                                       int_x86_mmx_punpckhwd,
+                                       SchedWriteShuffle.MMX>;
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
+                                       int_x86_mmx_punpckhdq,
+                                       SchedWriteShuffle.MMX>;
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
+                                       int_x86_mmx_punpcklbw,
+                                       SchedWriteShuffle.MMX,
+                                       0, i32mem>;
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
+                                       int_x86_mmx_punpcklwd,
+                                       SchedWriteShuffle.MMX,
+                                       0, i32mem>;
+defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
+                                       int_x86_mmx_punpckldq,
+                                       SchedWriteShuffle.MMX,
+                                       0, i32mem>;
+
+// -- Pack Instructions
+defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb,
+                                      SchedWriteShuffle.MMX>;
+defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw,
+                                      SchedWriteShuffle.MMX>;
+defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb,
+                                      SchedWriteShuffle.MMX>;
+
+// -- Shuffle Instructions
+defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b,
+                                       SchedWriteVarShuffle.MMX>;
+
+def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
+                          (outs VR64:$dst), (ins VR64:$src1, u8imm:$src2),
+                          "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                          [(set VR64:$dst,
+                             (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>,
+                          Sched<[SchedWriteShuffle.MMX]>;
+def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
+                          (outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2),
+                          "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                          [(set VR64:$dst,
+                             (int_x86_sse_pshuf_w (load_mmx addr:$src1),
+                                                   imm:$src2))]>,
+                          Sched<[SchedWriteShuffle.MMX.Folded]>;
+
+// -- Conversion Instructions
+defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+                      f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+                      WriteCvtPS2I, SSEPackedSingle>, PS;
+defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+                      f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+                      WriteCvtPD2I, SSEPackedDouble>, PD;
+defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+                       f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+                       WriteCvtPS2I, SSEPackedSingle>, PS;
+defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+                       f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+                       WriteCvtPD2I, SSEPackedDouble>, PD;
+defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+                         i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+                         WriteCvtI2PD, SSEPackedDouble>, PD;
+let Constraints = "$src1 = $dst" in {
+  defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+                         int_x86_sse_cvtpi2ps,
+                         i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedSingle>, PS;
+}
+
+// Extract / Insert
+let Predicates = [HasMMX, HasSSE1] in
+def MMX_PEXTRWrr: MMXIi8<0xC5, MRMSrcReg,
+                     (outs GR32orGR64:$dst), (ins VR64:$src1, i32u8imm:$src2),
+                     "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1,
+                                             imm:$src2))]>,
+                     Sched<[WriteVecExtract]>;
+let Constraints = "$src1 = $dst" in {
+let Predicates = [HasMMX, HasSSE1] in {
+  def MMX_PINSRWrr : MMXIi8<0xC4, MRMSrcReg,
+                    (outs VR64:$dst),
+                    (ins VR64:$src1, GR32orGR64:$src2, i32u8imm:$src3),
+                    "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+                                      GR32orGR64:$src2, imm:$src3))]>,
+                    Sched<[WriteVecInsert]>;
+
+  def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem,
+                   (outs VR64:$dst),
+                   (ins VR64:$src1, i16mem:$src2, i32u8imm:$src3),
+                   "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                   [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+                                       (i32 (anyext (loadi16 addr:$src2))),
+                                     imm:$src3))]>,
+                   Sched<[WriteVecInsertLd, ReadAfterLd]>;
+}
+}
+
+// Mask creation
+let Predicates = [HasMMX, HasSSE1] in
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
+                          (ins VR64:$src),
+                          "pmovmskb\t{$src, $dst|$dst, $src}",
+                          [(set GR32orGR64:$dst,
+                                (int_x86_mmx_pmovmskb VR64:$src))]>,
+                          Sched<[WriteMMXMOVMSK]>;
+
+// Low word of XMM to MMX.
+def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
+                            [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
+          (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
+          (x86mmx (MMX_MOVQ64rm addr:$src))>;
+
+// Misc.
+let SchedRW = [SchedWriteShuffle.MMX] in {
+let Uses = [EDI], Predicates = [HasMMX, HasSSE1,Not64BitMode] in
+def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+                          "maskmovq\t{$mask, $src|$src, $mask}",
+                          [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
+let Uses = [RDI], Predicates = [HasMMX, HasSSE1,In64BitMode] in
+def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+                           "maskmovq\t{$mask, $src|$src, $mask}",
+                           [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
+}
+
+// 64-bit bit convert.
+let Predicates = [HasMMX, HasSSE2] in {
+def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
+          (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
+          (MMX_MOVFR642Qrr FR64:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+                   (bc_v2i64 (v4i32 (X86cvtp2Int (v4f32 VR128:$src)))))),
+          (MMX_CVTPS2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+                   (bc_v2i64 (v4i32 (X86cvttp2si (v4f32 VR128:$src)))))),
+          (MMX_CVTTPS2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+                   (bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))),
+          (MMX_CVTTPS2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+                   (bc_v2i64 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
+          (MMX_CVTPD2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+                   (bc_v2i64 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
+          (MMX_CVTTPD2PIirr VR128:$src)>;
+}
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrMPX.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrMPX.td
@@ -0,0 +1,80 @@
+//===-- X86InstrMPX.td - MPX Instruction Set ---------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MPX instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: Investigate a better scheduler class once MPX is used inside LLVM.
+let SchedRW = [WriteSystem] in {
+
+multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
+  def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
+              OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
+              Requires<[HasMPX, Not64BitMode]>;
+  def 64rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
+              OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
+              Requires<[HasMPX, In64BitMode]>;
+}
+
+defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
+
+multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
+  def 32rm: I<opc, MRMSrcMem, (outs), (ins  BNDR:$src1, anymem:$src2),
+              OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
+              Requires<[HasMPX, Not64BitMode]>;
+  def 64rm: I<opc, MRMSrcMem, (outs), (ins  BNDR:$src1, anymem:$src2),
+              OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
+              Requires<[HasMPX, In64BitMode]>;
+
+  def 32rr: I<opc, MRMSrcReg, (outs), (ins  BNDR:$src1, GR32:$src2),
+              OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
+              Requires<[HasMPX, Not64BitMode]>;
+  def 64rr: I<opc, MRMSrcReg, (outs), (ins  BNDR:$src1, GR64:$src2),
+              OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
+              Requires<[HasMPX, In64BitMode]>;
+}
+defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS, NotMemoryFoldable;
+defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD, NotMemoryFoldable;
+defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD, NotMemoryFoldable;
+
+def BNDMOVrr   : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
+                  "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
+                  Requires<[HasMPX]>, NotMemoryFoldable;
+let mayLoad = 1 in {
+def BNDMOV32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
+                  "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
+                  Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable;
+def BNDMOV64rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
+                  "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
+                  Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable;
+}
+let isCodeGenOnly = 1, ForceDisassemble = 1 in
+def BNDMOVrr_REV   : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
+                       "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
+                       Requires<[HasMPX]>, NotMemoryFoldable;
+let mayStore = 1 in {
+def BNDMOV32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
+                  "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
+                  Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable;
+def BNDMOV64mr : I<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src),
+                  "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
+                  Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable;
+
+def BNDSTXmr:      I<0x1B, MRMDestMem, (outs), (ins anymem:$dst, BNDR:$src),
+                    "bndstx\t{$src, $dst|$dst, $src}", []>, PS,
+                    Requires<[HasMPX]>;
+}
+let mayLoad = 1 in
+def BNDLDXrm:      I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
+                    "bndldx\t{$src, $dst|$dst, $src}", []>, PS,
+                    Requires<[HasMPX]>;
+} // SchedRW
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrSGX.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrSGX.td
@@ -0,0 +1,30 @@
+//===-- X86InstrSGX.td - SGX Instruction Set Extension -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel SGX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SGX instructions
+
+let SchedRW = [WriteSystem], Predicates = [HasSGX] in {
+// ENCLS - Execute an Enclave System Function of Specified Leaf Number
+def ENCLS : I<0x01, MRM_CF, (outs), (ins),
+             "encls", []>, TB;
+
+// ENCLU - Execute an Enclave User Function of Specified Leaf Number
+def ENCLU : I<0x01, MRM_D7, (outs), (ins),
+             "enclu", []>, TB;
+
+// ENCLV - Execute an Enclave VMM Function of Specified Leaf Number
+def ENCLV : I<0x01, MRM_C0, (outs), (ins),
+             "enclv", []>, TB;
+} // SchedRW
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrSSE.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrSSE.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrSVM.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrSVM.td
@@ -0,0 +1,63 @@
+//===-- X86InstrSVM.td - SVM Instruction Set Extension -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the AMD SVM instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SVM instructions
+
+let SchedRW = [WriteSystem] in {
+// 0F 01 D9
+def VMMCALL : I<0x01, MRM_D9, (outs), (ins), "vmmcall", []>, TB;
+
+// 0F 01 DC
+def STGI : I<0x01, MRM_DC, (outs), (ins), "stgi", []>, TB;
+
+// 0F 01 DD
+def CLGI : I<0x01, MRM_DD, (outs), (ins), "clgi", []>, TB;
+
+// 0F 01 DE
+let Uses = [EAX] in
+def SKINIT : I<0x01, MRM_DE, (outs), (ins), "skinit\t{%eax|eax}", []>, TB;
+
+// 0F 01 D8
+let Uses = [EAX] in
+def VMRUN32 : I<0x01, MRM_D8, (outs), (ins), "vmrun\t{%eax|eax}", []>, TB,
+                Requires<[Not64BitMode]>;
+let Uses = [RAX] in
+def VMRUN64 : I<0x01, MRM_D8, (outs), (ins), "vmrun\t{%rax|rax}", []>, TB,
+                Requires<[In64BitMode]>;
+
+// 0F 01 DA
+let Uses = [EAX] in
+def VMLOAD32 : I<0x01, MRM_DA, (outs), (ins), "vmload\t{%eax|eax}", []>, TB,
+                 Requires<[Not64BitMode]>;
+let Uses = [RAX] in
+def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins), "vmload\t{%rax|rax}", []>, TB,
+                 Requires<[In64BitMode]>;
+
+// 0F 01 DB
+let Uses = [EAX] in
+def VMSAVE32 : I<0x01, MRM_DB, (outs), (ins), "vmsave\t{%eax|eax}", []>, TB,
+                 Requires<[Not64BitMode]>;
+let Uses = [RAX] in
+def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins), "vmsave\t{%rax|rax}", []>, TB,
+                 Requires<[In64BitMode]>;
+
+// 0F 01 DF
+let Uses = [EAX, ECX] in
+def INVLPGA32 : I<0x01, MRM_DF, (outs), (ins),
+                "invlpga\t{%eax, %ecx|eax, ecx}", []>, TB, Requires<[Not64BitMode]>;
+let Uses = [RAX, ECX] in
+def INVLPGA64 : I<0x01, MRM_DF, (outs), (ins),
+                "invlpga\t{%rax, %ecx|rax, ecx}", []>, TB, Requires<[In64BitMode]>;
+} // SchedRW
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrShiftRotate.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrShiftRotate.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrSystem.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrSystem.td
@@ -0,0 +1,755 @@
+//===-- X86InstrSystem.td - System Instructions ------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instructions that are generally used in
+// privileged modes.  These are not typically used by the compiler, but are
+// supported for the assembler and disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+let SchedRW = [WriteSystem] in {
+let Defs = [RAX, RDX] in
+  def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
+
+let Defs = [RAX, RCX, RDX] in
+  def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB;
+
+// CPU flow control instructions
+
+let mayLoad = 1, mayStore = 0, hasSideEffects = 1, isTrap = 1 in {
+  def TRAP    : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
+
+  def UD1Wm   : I<0xB9, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2),
+                  "ud1{w} {$src2, $src1|$src1, $src2}", []>, TB, OpSize16;
+  def UD1Lm   : I<0xB9, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2),
+                  "ud1{l} {$src2, $src1|$src1, $src2}", []>, TB, OpSize32;
+  def UD1Qm   : RI<0xB9, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
+                   "ud1{q} {$src2, $src1|$src1, $src2}", []>, TB;
+
+  def UD1Wr   : I<0xB9, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2),
+                  "ud1{w} {$src2, $src1|$src1, $src2}", []>, TB, OpSize16;
+  def UD1Lr   : I<0xB9, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
+                  "ud1{l} {$src2, $src1|$src1, $src2}", []>, TB, OpSize32;
+  def UD1Qr   : RI<0xB9, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
+                   "ud1{q} {$src2, $src1|$src1, $src2}", []>, TB;
+}
+
+def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
+def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
+
+// Interrupt and SysCall Instructions.
+let Uses = [EFLAGS] in
+  def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>, Requires<[Not64BitMode]>;
+
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>;
+} // SchedRW
+
+// The long form of "int $3" turns into int3 as a size optimization.
+// FIXME: This doesn't work because InstAlias can't match immediate constants.
+//// def : InstAlias<"int\t$3", (INT3)>;
+
+let SchedRW = [WriteSystem] in {
+
+def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap",
+              [(int_x86_int imm:$trap)]>;
+
+
+def SYSCALL  : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
+def SYSRET   : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB;
+def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+               Requires<[In64BitMode]>;
+
+def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
+
+def SYSEXIT   : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexitq", []>, TB,
+                  Requires<[In64BitMode]>;
+} // SchedRW
+
+def : Pat<(debugtrap),
+          (INT3)>, Requires<[NotPS4]>;
+def : Pat<(debugtrap),
+          (INT (i8 0x41))>, Requires<[IsPS4]>;
+
+//===----------------------------------------------------------------------===//
+//  Input/Output Instructions.
+//
+let SchedRW = [WriteSystem] in {
+let Defs = [AL], Uses = [DX] in
+def IN8rr  : I<0xEC, RawFrm, (outs), (ins), "in{b}\t{%dx, %al|al, dx}", []>;
+let Defs = [AX], Uses = [DX] in
+def IN16rr : I<0xED, RawFrm, (outs), (ins), "in{w}\t{%dx, %ax|ax, dx}", []>,
+               OpSize16;
+let Defs = [EAX], Uses = [DX] in
+def IN32rr : I<0xED, RawFrm, (outs), (ins), "in{l}\t{%dx, %eax|eax, dx}", []>,
+               OpSize32;
+
+let Defs = [AL] in
+def IN8ri  : Ii8<0xE4, RawFrm, (outs), (ins u8imm:$port),
+                 "in{b}\t{$port, %al|al, $port}", []>;
+let Defs = [AX] in
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
+                 "in{w}\t{$port, %ax|ax, $port}", []>, OpSize16;
+let Defs = [EAX] in
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
+                 "in{l}\t{$port, %eax|eax, $port}", []>, OpSize32;
+
+let Uses = [DX, AL] in
+def OUT8rr  : I<0xEE, RawFrm, (outs), (ins), "out{b}\t{%al, %dx|dx, al}", []>;
+let Uses = [DX, AX] in
+def OUT16rr : I<0xEF, RawFrm, (outs), (ins), "out{w}\t{%ax, %dx|dx, ax}", []>,
+                OpSize16;
+let Uses = [DX, EAX] in
+def OUT32rr : I<0xEF, RawFrm, (outs), (ins), "out{l}\t{%eax, %dx|dx, eax}", []>,
+                OpSize32;
+
+let Uses = [AL] in
+def OUT8ir  : Ii8<0xE6, RawFrm, (outs), (ins u8imm:$port),
+                   "out{b}\t{%al, $port|$port, al}", []>;
+let Uses = [AX] in
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
+                   "out{w}\t{%ax, $port|$port, ax}", []>, OpSize16;
+let Uses = [EAX] in
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
+                  "out{l}\t{%eax, $port|$port, eax}", []>, OpSize32;
+
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Moves to and from debug registers
+
+let SchedRW = [WriteSystem] in {
+def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[Not64BitMode]>;
+def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[In64BitMode]>;
+
+def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[Not64BitMode]>;
+def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[In64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Moves to and from control registers
+
+let SchedRW = [WriteSystem] in {
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[Not64BitMode]>;
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[In64BitMode]>;
+
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[Not64BitMode]>;
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[In64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Segment override instruction prefixes
+
+//let SchedRW = [WriteNop] in {
+//def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
+//def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
+//def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
+//def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
+//def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
+//def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
+//} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Moves to and from segment registers.
+//
+
+let SchedRW = [WriteMove] in {
+def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
+def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+let mayStore = 1 in {
+def MOV16ms : I<0x8C, MRMDestMem, (outs), (ins i16mem:$dst, SEGMENT_REG:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>;
+}
+def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
+def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in {
+def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>;
+}
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Segmentation support instructions.
+
+let SchedRW = [WriteSystem] in {
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
+
+let mayLoad = 1 in
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize16, NotMemoryFoldable;
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize16, NotMemoryFoldable;
+
+// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+let mayLoad = 1 in
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize32, NotMemoryFoldable;
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize32, NotMemoryFoldable;
+// i16mem operand in LAR64rm and GR32 operand in LAR64rr is not a typo.
+let mayLoad = 1 in
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
+
+// i16mem operand in LSL32rm and GR32 operand in LSL32rr is not a typo.
+let mayLoad = 1 in
+def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize16, NotMemoryFoldable;
+def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize16, NotMemoryFoldable;
+// i16mem operand in LSL64rm and GR32 operand in LSL64rr is not a typo.
+let mayLoad = 1 in
+def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize32, NotMemoryFoldable;
+def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize32, NotMemoryFoldable;
+let mayLoad = 1 in
+def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
+def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
+
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
+
+def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+               "str{w}\t$dst", []>, TB, OpSize16;
+def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
+               "str{l}\t$dst", []>, TB, OpSize32;
+def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
+                "str{q}\t$dst", []>, TB;
+let mayStore = 1 in
+def STRm   : I<0x00, MRM1m, (outs), (ins i16mem:$dst), "str{w}\t$dst", []>, TB;
+
+def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), "ltr{w}\t$src", []>, TB, NotMemoryFoldable;
+let mayLoad = 1 in
+def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), "ltr{w}\t$src", []>, TB, NotMemoryFoldable;
+
+def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), "push{w}\t{%cs|cs}", []>,
+                 OpSize16, Requires<[Not64BitMode]>;
+def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins), "push{l}\t{%cs|cs}", []>,
+                 OpSize32, Requires<[Not64BitMode]>;
+def PUSHSS16 : I<0x16, RawFrm, (outs), (ins), "push{w}\t{%ss|ss}", []>,
+                 OpSize16, Requires<[Not64BitMode]>;
+def PUSHSS32 : I<0x16, RawFrm, (outs), (ins), "push{l}\t{%ss|ss}", []>,
+                 OpSize32, Requires<[Not64BitMode]>;
+def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins), "push{w}\t{%ds|ds}", []>,
+                 OpSize16, Requires<[Not64BitMode]>;
+def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins), "push{l}\t{%ds|ds}", []>,
+                 OpSize32, Requires<[Not64BitMode]>;
+def PUSHES16 : I<0x06, RawFrm, (outs), (ins), "push{w}\t{%es|es}", []>,
+                 OpSize16, Requires<[Not64BitMode]>;
+def PUSHES32 : I<0x06, RawFrm, (outs), (ins), "push{l}\t{%es|es}", []>,
+                 OpSize32, Requires<[Not64BitMode]>;
+def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), "push{w}\t{%fs|fs}", []>,
+                 OpSize16, TB;
+def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), "push{l}\t{%fs|fs}", []>, TB,
+                 OpSize32, Requires<[Not64BitMode]>;
+def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), "push{w}\t{%gs|gs}", []>,
+                 OpSize16, TB;
+def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), "push{l}\t{%gs|gs}", []>, TB,
+                 OpSize32, Requires<[Not64BitMode]>;
+def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), "push{q}\t{%fs|fs}", []>, TB,
+                 OpSize32, Requires<[In64BitMode]>;
+def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), "push{q}\t{%gs|gs}", []>, TB,
+                 OpSize32, Requires<[In64BitMode]>;
+
+// No "pop cs" instruction.
+def POPSS16 : I<0x17, RawFrm, (outs), (ins), "pop{w}\t{%ss|ss}", []>,
+              OpSize16, Requires<[Not64BitMode]>;
+def POPSS32 : I<0x17, RawFrm, (outs), (ins), "pop{l}\t{%ss|ss}", []>,
+              OpSize32, Requires<[Not64BitMode]>;
+
+def POPDS16 : I<0x1F, RawFrm, (outs), (ins), "pop{w}\t{%ds|ds}", []>,
+              OpSize16, Requires<[Not64BitMode]>;
+def POPDS32 : I<0x1F, RawFrm, (outs), (ins), "pop{l}\t{%ds|ds}", []>,
+              OpSize32, Requires<[Not64BitMode]>;
+
+def POPES16 : I<0x07, RawFrm, (outs), (ins), "pop{w}\t{%es|es}", []>,
+              OpSize16, Requires<[Not64BitMode]>;
+def POPES32 : I<0x07, RawFrm, (outs), (ins), "pop{l}\t{%es|es}", []>,
+              OpSize32, Requires<[Not64BitMode]>;
+
+def POPFS16 : I<0xa1, RawFrm, (outs), (ins), "pop{w}\t{%fs|fs}", []>,
+                OpSize16, TB;
+def POPFS32 : I<0xa1, RawFrm, (outs), (ins), "pop{l}\t{%fs|fs}", []>, TB,
+                OpSize32, Requires<[Not64BitMode]>;
+def POPFS64 : I<0xa1, RawFrm, (outs), (ins), "pop{q}\t{%fs|fs}", []>, TB,
+                OpSize32, Requires<[In64BitMode]>;
+
+def POPGS16 : I<0xa9, RawFrm, (outs), (ins), "pop{w}\t{%gs|gs}", []>,
+                OpSize16, TB;
+def POPGS32 : I<0xa9, RawFrm, (outs), (ins), "pop{l}\t{%gs|gs}", []>, TB,
+                OpSize32, Requires<[Not64BitMode]>;
+def POPGS64 : I<0xa9, RawFrm, (outs), (ins), "pop{q}\t{%gs|gs}", []>, TB,
+                OpSize32, Requires<[In64BitMode]>;
+
+def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
+                "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize16,
+                Requires<[Not64BitMode]>;
+def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
+                "lds{l}\t{$src, $dst|$dst, $src}", []>, OpSize32,
+                Requires<[Not64BitMode]>;
+
+def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
+                "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
+def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
+                "lss{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
+def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
+                 "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
+                "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize16,
+                Requires<[Not64BitMode]>;
+def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
+                "les{l}\t{$src, $dst|$dst, $src}", []>, OpSize32,
+                Requires<[Not64BitMode]>;
+
+def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
+                "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
+def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
+                "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
+def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
+                 "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
+                "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
+def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
+                "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
+
+def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
+                 "lgs\t{$src, $dst|$dst, $src}", []>, TB;
+
+def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), "verr\t$seg", []>, TB, NotMemoryFoldable;
+def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", []>, TB, NotMemoryFoldable;
+let mayLoad = 1 in {
+def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), "verr\t$seg", []>, TB, NotMemoryFoldable;
+def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", []>, TB, NotMemoryFoldable;
+}
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Descriptor-table support instructions
+
+let SchedRW = [WriteSystem] in {
+def SGDT16m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
+                "sgdt{w}\t$dst", []>, TB, OpSize16, Requires<[Not64BitMode]>;
+def SGDT32m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
+                "sgdt{l}\t$dst", []>, OpSize32, TB, Requires <[Not64BitMode]>;
+def SGDT64m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
+                "sgdt{q}\t$dst", []>, TB, Requires <[In64BitMode]>;
+def SIDT16m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
+                "sidt{w}\t$dst", []>, TB, OpSize16, Requires<[Not64BitMode]>;
+def SIDT32m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
+                "sidt{l}\t$dst", []>, OpSize32, TB, Requires <[Not64BitMode]>;
+def SIDT64m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
+                "sidt{q}\t$dst", []>, TB, Requires <[In64BitMode]>;
+def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
+                "sldt{w}\t$dst", []>, TB, OpSize16;
+let mayStore = 1 in
+def SLDT16m : I<0x00, MRM0m, (outs), (ins i16mem:$dst),
+                "sldt{w}\t$dst", []>, TB;
+def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
+                "sldt{l}\t$dst", []>, OpSize32, TB;
+
+// LLDT is not interpreted specially in 64-bit mode because there is no sign
+//   extension.
+def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
+                 "sldt{q}\t$dst", []>, TB, Requires<[In64BitMode]>;
+
+def LGDT16m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
+                "lgdt{w}\t$src", []>, TB, OpSize16, Requires<[Not64BitMode]>;
+def LGDT32m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
+                "lgdt{l}\t$src", []>, OpSize32, TB, Requires<[Not64BitMode]>;
+def LGDT64m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
+                "lgdt{q}\t$src", []>, TB, Requires<[In64BitMode]>;
+def LIDT16m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
+                "lidt{w}\t$src", []>, TB, OpSize16, Requires<[Not64BitMode]>;
+def LIDT32m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
+                "lidt{l}\t$src", []>, OpSize32, TB, Requires<[Not64BitMode]>;
+def LIDT64m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
+                "lidt{q}\t$src", []>, TB, Requires<[In64BitMode]>;
+def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
+                "lldt{w}\t$src", []>, TB, NotMemoryFoldable;
+let mayLoad = 1 in
+def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
+                "lldt{w}\t$src", []>, TB, NotMemoryFoldable;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Specialized register support
+let SchedRW = [WriteSystem] in {
+let Uses = [EAX, ECX, EDX] in
+def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
+let Defs = [EAX, EDX], Uses = [ECX] in
+def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
+
+let Defs = [RAX, RDX], Uses = [ECX] in
+  def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)]>, TB;
+
+def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
+                "smsw{w}\t$dst", []>, OpSize16, TB;
+def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
+                "smsw{l}\t$dst", []>, OpSize32, TB;
+// no m form encodable; use SMSW16m
+def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
+                 "smsw{q}\t$dst", []>, TB;
+
+// For memory operands, there is only a 16-bit form
+def SMSW16m : I<0x01, MRM4m, (outs), (ins i16mem:$dst),
+                "smsw{w}\t$dst", []>, TB;
+
+def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
+                "lmsw{w}\t$src", []>, TB, NotMemoryFoldable;
+let mayLoad = 1 in
+def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
+                "lmsw{w}\t$src", []>, TB, NotMemoryFoldable;
+
+let Defs = [EAX, EBX, ECX, EDX], Uses = [EAX, ECX] in
+  def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Cache instructions
+let SchedRW = [WriteSystem] in {
+def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
+def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [(int_x86_wbinvd)]>, TB;
+
+// wbnoinvd is like wbinvd, except without invalidation
+// encoding: like wbinvd + an 0xF3 prefix
+def WBNOINVD : I<0x09, RawFrm, (outs), (ins), "wbnoinvd",
+                 [(int_x86_wbnoinvd)]>, XS,
+                 Requires<[HasWBNOINVD]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// CET instructions
+// Use with caution, availability is not predicated on features.
+let SchedRW = [WriteSystem] in {
+  let Uses = [SSP] in {
+    let Defs = [SSP] in {
+      def INCSSPD : I<0xAE, MRM5r, (outs), (ins GR32:$src), "incsspd\t$src",
+                       [(int_x86_incsspd GR32:$src)]>, XS;
+      def INCSSPQ : RI<0xAE, MRM5r, (outs), (ins GR64:$src), "incsspq\t$src",
+                       [(int_x86_incsspq GR64:$src)]>, XS;
+    } // Defs SSP
+
+    let Constraints = "$src = $dst" in {
+      def RDSSPD : I<0x1E, MRM1r, (outs GR32:$dst), (ins GR32:$src),
+                     "rdsspd\t$dst",
+                     [(set GR32:$dst, (int_x86_rdsspd GR32:$src))]>, XS;
+      def RDSSPQ : RI<0x1E, MRM1r, (outs GR64:$dst), (ins GR64:$src),
+                     "rdsspq\t$dst",
+                     [(set GR64:$dst, (int_x86_rdsspq GR64:$src))]>, XS;
+    }
+
+    let Defs = [SSP] in {
+      def SAVEPREVSSP : I<0x01, MRM_EA, (outs), (ins), "saveprevssp",
+                       [(int_x86_saveprevssp)]>, XS;
+      def RSTORSSP : I<0x01, MRM5m, (outs), (ins i32mem:$src),
+                       "rstorssp\t$src",
+                       [(int_x86_rstorssp addr:$src)]>, XS;
+    } // Defs SSP
+  } // Uses SSP
+
+  def WRSSD : I<0xF6, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                "wrssd\t{$src, $dst|$dst, $src}",
+                [(int_x86_wrssd GR32:$src, addr:$dst)]>, T8PS;
+  def WRSSQ : RI<0xF6, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                 "wrssq\t{$src, $dst|$dst, $src}",
+                 [(int_x86_wrssq GR64:$src, addr:$dst)]>, T8PS;
+  def WRUSSD : I<0xF5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                 "wrussd\t{$src, $dst|$dst, $src}",
+                 [(int_x86_wrussd GR32:$src, addr:$dst)]>, T8PD;
+  def WRUSSQ : RI<0xF5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                  "wrussq\t{$src, $dst|$dst, $src}",
+                  [(int_x86_wrussq GR64:$src, addr:$dst)]>, T8PD;
+
+  let Defs = [SSP] in {
+    let Uses = [SSP] in {
+        def SETSSBSY : I<0x01, MRM_E8, (outs), (ins), "setssbsy",
+                         [(int_x86_setssbsy)]>, XS;
+    } // Uses SSP
+
+    def CLRSSBSY : I<0xAE, MRM6m, (outs), (ins i32mem:$src),
+                     "clrssbsy\t$src",
+                     [(int_x86_clrssbsy addr:$src)]>, XS;
+  } // Defs SSP
+} // SchedRW
+
+let SchedRW = [WriteSystem] in {
+    def ENDBR64 : I<0x1E, MRM_FA, (outs), (ins), "endbr64", []>, XS;
+    def ENDBR32 : I<0x1E, MRM_FB, (outs), (ins), "endbr32", []>, XS;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// XSAVE instructions
+let SchedRW = [WriteSystem] in {
+let Predicates = [HasXSAVE] in {
+let Defs = [EDX, EAX], Uses = [ECX] in
+  def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
+
+let Uses = [EDX, EAX, ECX] in
+  def XSETBV : I<0x01, MRM_D1, (outs), (ins),
+                "xsetbv",
+                [(int_x86_xsetbv ECX, EDX, EAX)]>, TB;
+
+} // HasXSAVE
+
+let Uses = [EDX, EAX] in {
+def XSAVE : I<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
+              "xsave\t$dst",
+              [(int_x86_xsave addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>;
+def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
+                 "xsave64\t$dst",
+                 [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
+def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaquemem:$dst),
+               "xrstor\t$dst",
+               [(int_x86_xrstor addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>;
+def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaquemem:$dst),
+                  "xrstor64\t$dst",
+                  [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
+def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaquemem:$dst),
+                 "xsaveopt\t$dst",
+                 [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT]>;
+def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaquemem:$dst),
+                    "xsaveopt64\t$dst",
+                    [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT, In64BitMode]>;
+def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
+               "xsavec\t$dst",
+               [(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC]>;
+def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
+                 "xsavec64\t$dst",
+                 [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC, In64BitMode]>;
+def XSAVES : I<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
+               "xsaves\t$dst",
+               [(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
+def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
+                  "xsaves64\t$dst",
+                  [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE, In64BitMode]>;
+def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
+                "xrstors\t$dst",
+                [(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
+def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
+                   "xrstors64\t$dst",
+                   [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES, In64BitMode]>;
+} // Uses
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// VIA PadLock crypto instructions
+let Defs = [RAX, RDI], Uses = [RDX, RDI], SchedRW = [WriteSystem] in
+  def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB;
+
+// def : InstAlias<"xstorerng", (XSTORE)>;
+
+let SchedRW = [WriteSystem] in {
+let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
+  def XCRYPTECB : I<0xa7, MRM_C8, (outs), (ins), "xcryptecb", []>, TB;
+  def XCRYPTCBC : I<0xa7, MRM_D0, (outs), (ins), "xcryptcbc", []>, TB;
+  def XCRYPTCTR : I<0xa7, MRM_D8, (outs), (ins), "xcryptctr", []>, TB;
+  def XCRYPTCFB : I<0xa7, MRM_E0, (outs), (ins), "xcryptcfb", []>, TB;
+  def XCRYPTOFB : I<0xa7, MRM_E8, (outs), (ins), "xcryptofb", []>, TB;
+}
+
+let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
+  def XSHA1 : I<0xa6, MRM_C8, (outs), (ins), "xsha1", []>, TB;
+  def XSHA256 : I<0xa6, MRM_D0, (outs), (ins), "xsha256", []>, TB;
+}
+let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
+  def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB;
+} // SchedRW
+
+/*
+//==-----------------------------------------------------------------------===//
+// PKU  - enable protection key
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+  def WRPKRU : PseudoI<(outs), (ins GR32:$src),
+                [(int_x86_wrpkru GR32:$src)]>;
+  def RDPKRU : PseudoI<(outs GR32:$dst), (ins),
+                [(set GR32:$dst, (int_x86_rdpkru))]>;
+}
+*/
+
+let SchedRW = [WriteSystem] in {
+let Defs = [EAX, EDX], Uses = [ECX] in
+  def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB;
+let Uses = [EAX, ECX, EDX] in
+  def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// FS/GS Base Instructions
+let Predicates = [HasFSGSBase, In64BitMode], SchedRW = [WriteSystem] in {
+  def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins),
+                   "rdfsbase{l}\t$dst",
+                   [(set GR32:$dst, (int_x86_rdfsbase_32))]>, XS;
+  def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins),
+                     "rdfsbase{q}\t$dst",
+                     [(set GR64:$dst, (int_x86_rdfsbase_64))]>, XS;
+  def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins),
+                   "rdgsbase{l}\t$dst",
+                   [(set GR32:$dst, (int_x86_rdgsbase_32))]>, XS;
+  def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins),
+                     "rdgsbase{q}\t$dst",
+                     [(set GR64:$dst, (int_x86_rdgsbase_64))]>, XS;
+  def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$src),
+                   "wrfsbase{l}\t$src",
+                   [(int_x86_wrfsbase_32 GR32:$src)]>, XS;
+  def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$src),
+                      "wrfsbase{q}\t$src",
+                      [(int_x86_wrfsbase_64 GR64:$src)]>, XS;
+  def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$src),
+                   "wrgsbase{l}\t$src",
+                   [(int_x86_wrgsbase_32 GR32:$src)]>, XS;
+  def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$src),
+                      "wrgsbase{q}\t$src",
+                      [(int_x86_wrgsbase_64 GR64:$src)]>, XS;
+}
+
+//===----------------------------------------------------------------------===//
+// INVPCID Instruction
+let SchedRW = [WriteSystem] in {
+def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+                  "invpcid\t{$src2, $src1|$src1, $src2}",
+                  [(int_x86_invpcid GR32:$src1, addr:$src2)]>, T8PD,
+                  Requires<[Not64BitMode, HasINVPCID]>;
+def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+                  "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+                  Requires<[In64BitMode, HasINVPCID]>;
+} // SchedRW
+
+let Predicates = [In64BitMode, HasINVPCID] in {
+  // The instruction can only use a 64 bit register as the register argument
+  // in 64 bit mode, while the intrinsic only accepts a 32 bit argument
+  // corresponding to it.
+  // The accepted values for now are 0,1,2,3 anyways (see Intel SDM -- INVCPID
+  // type),/ so it doesn't hurt us that one can't supply a 64 bit value here.
+  def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2),
+            (INVPCID64
+              (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit),
+              addr:$src2)>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SMAP Instruction
+let Defs = [EFLAGS], SchedRW = [WriteSystem] in {
+  def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
+  def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
+}
+
+//===----------------------------------------------------------------------===//
+// SMX Instruction
+let SchedRW = [WriteSystem] in {
+let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in {
+  def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, TB;
+} // Uses, Defs
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// TS flag control instruction.
+let SchedRW = [WriteSystem] in {
+def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
+}
+
+//===----------------------------------------------------------------------===//
+// IF (inside EFLAGS) management instructions.
+let SchedRW = [WriteSystem], Uses = [EFLAGS], Defs = [EFLAGS] in {
+def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
+def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// RDPID Instruction
+let SchedRW = [WriteSystem] in {
+def RDPID32 : I<0xC7, MRM7r, (outs GR32:$dst), (ins),
+                "rdpid\t$dst", [(set GR32:$dst, (int_x86_rdpid))]>, XS,
+                Requires<[Not64BitMode, HasRDPID]>;
+def RDPID64 : I<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdpid\t$dst", []>, XS,
+                Requires<[In64BitMode, HasRDPID]>;
+} // SchedRW
+
+let Predicates = [In64BitMode, HasRDPID] in {
+  // Due to silly instruction definition, we have to compensate for the
+  // instruction outputting a 64-bit register.
+  def : Pat<(int_x86_rdpid),
+            (EXTRACT_SUBREG (RDPID64), sub_32bit)>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PTWRITE Instruction - Write Data to a Processor Trace Packet
+let SchedRW = [WriteSystem] in {
+def PTWRITEm: I<0xAE, MRM4m, (outs), (ins i32mem:$dst),
+                "ptwrite{l}\t$dst", [(int_x86_ptwrite32 (loadi32 addr:$dst))]>, XS,
+                Requires<[HasPTWRITE]>;
+def PTWRITE64m : RI<0xAE, MRM4m, (outs), (ins i64mem:$dst),
+                    "ptwrite{q}\t$dst", [(int_x86_ptwrite64 (loadi64 addr:$dst))]>, XS,
+                    Requires<[In64BitMode, HasPTWRITE]>;
+
+def PTWRITEr : I<0xAE, MRM4r, (outs), (ins GR32:$dst),
+                 "ptwrite{l}\t$dst", [(int_x86_ptwrite32 GR32:$dst)]>, XS,
+                    Requires<[HasPTWRITE]>;
+def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst),
+                    "ptwrite{q}\t$dst", [(int_x86_ptwrite64 GR64:$dst)]>, XS,
+                    Requires<[In64BitMode, HasPTWRITE]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Platform Configuration instruction
+
+// From ISA docs:
+//  "This instruction is used to execute functions for configuring platform
+//   features.
+//   EAX: Leaf function to be invoked.
+//   RBX/RCX/RDX: Leaf-specific purpose."
+//  "Successful execution of the leaf clears RAX (set to zero) and ZF, CF, PF,
+//   AF, OF, and SF are cleared. In case of failure, the failure reason is
+//   indicated in RAX with ZF set to 1 and CF, PF, AF, OF, and SF are cleared."
+// Thus all these mentioned registers are considered clobbered.
+
+let SchedRW = [WriteSystem] in {
+let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX, RDX, EFLAGS] in
+    def PCONFIG : I<0x01, MRM_C5, (outs), (ins), "pconfig", []>, TB,
+                  Requires<[HasPCONFIG]>;
+} // SchedRW
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrTSX.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrTSX.td
@@ -0,0 +1,60 @@
+//===-- X86InstrVMX.td - TSX Instruction Set Extension -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel TSX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TSX instructions
+
+def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
+                     [SDNPHasChain, SDNPSideEffect]>;
+
+let SchedRW = [WriteSystem] in {
+
+//let usesCustomInserter = 1 in
+//def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
+//               "# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>,
+//             Requires<[HasRTM]>;
+
+let isBranch = 1, isTerminator = 1, Defs = [EAX] in {
+def XBEGIN_2 : Ii16PCRel<0xc7, MRM_F8, (outs), (ins brtarget16:$dst),
+                         "xbegin\t$dst", []>, OpSize16;
+def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget32:$dst),
+                         "xbegin\t$dst", []>, OpSize32;
+}
+
+// Pseudo instruction to fake the definition of EAX on the fallback code path.
+//let isPseudo = 1, Defs = [EAX] in {
+//def XABORT_DEF : I<0, Pseudo, (outs), (ins), "# XABORT DEF", []>;
+//}
+
+def XEND : I<0x01, MRM_D5, (outs), (ins),
+             "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
+
+let Defs = [EFLAGS] in
+def XTEST : I<0x01, MRM_D6, (outs), (ins),
+              "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasRTM]>;
+
+def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
+                 "xabort\t$imm",
+                 [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
+} // SchedRW
+
+// HLE prefixes
+let SchedRW = [WriteSystem] in {
+
+let isAsmParserOnly = 1 in {
+def XACQUIRE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "xacquire", []>;
+def XRELEASE_PREFIX : I<0xF3, RawFrm, (outs), (ins), "xrelease", []>;
+}
+
+} // SchedRW
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrVMX.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrVMX.td
@@ -0,0 +1,88 @@
+//===-- X86InstrVMX.td - VMX Instruction Set Extension -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel VMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VMX instructions
+
+let SchedRW = [WriteSystem] in {
+// 66 0F 38 80
+def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+               "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+               Requires<[Not64BitMode]>;
+def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+               "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+               Requires<[In64BitMode]>;
+
+// 66 0F 38 81
+def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+                "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+                Requires<[Not64BitMode]>;
+def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+                "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+                Requires<[In64BitMode]>;
+
+// 0F 01 C1
+def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
+def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+  "vmclear\t$vmcs", []>, PD;
+
+// OF 01 D4
+def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB;
+
+// 0F 01 C2
+def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
+
+// 0F 01 C3
+def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
+def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+  "vmptrld\t$vmcs", []>, PS;
+def VMPTRSTm : I<0xC7, MRM7m, (outs), (ins i64mem:$vmcs),
+  "vmptrst\t$vmcs", []>, PS;
+def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+  "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
+  NotMemoryFoldable;
+def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+  "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
+  NotMemoryFoldable;
+
+let mayStore = 1 in {
+def VMREAD64mr : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+  "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
+  NotMemoryFoldable;
+def VMREAD32mr : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+  "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
+  NotMemoryFoldable;
+} // mayStore
+
+def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
+  NotMemoryFoldable;
+def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
+  NotMemoryFoldable;
+
+let mayLoad = 1 in {
+def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
+  NotMemoryFoldable;
+def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
+  NotMemoryFoldable;
+} // mayLoad
+
+// 0F 01 C4
+def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
+def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
+  "vmxon\t$vmxon", []>, XS;
+} // SchedRW
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrVecCompiler.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrVecCompiler.td
@@ -0,0 +1,511 @@
+//===- X86InstrVecCompiler.td - Vector Compiler Patterns ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various vector pseudo instructions used by the
+// compiler, as well as Pat patterns used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// No op bitconverts
+//===----------------------------------------------------------------------===//
+
+// Bitcasts between 128-bit vector types. Return the original type since
+// no instruction is needed for the conversion
+def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+
+// Bitcasts between 256-bit vector types. Return the original type since
+// no instruction is needed for the conversion
+def : Pat<(v4i64  (bitconvert (v8i32  VR256:$src))), (v4i64  VR256:$src)>;
+def : Pat<(v4i64  (bitconvert (v16i16 VR256:$src))), (v4i64  VR256:$src)>;
+def : Pat<(v4i64  (bitconvert (v32i8  VR256:$src))), (v4i64  VR256:$src)>;
+def : Pat<(v4i64  (bitconvert (v8f32  VR256:$src))), (v4i64  VR256:$src)>;
+def : Pat<(v4i64  (bitconvert (v4f64  VR256:$src))), (v4i64  VR256:$src)>;
+def : Pat<(v8i32  (bitconvert (v4i64  VR256:$src))), (v8i32  VR256:$src)>;
+def : Pat<(v8i32  (bitconvert (v16i16 VR256:$src))), (v8i32  VR256:$src)>;
+def : Pat<(v8i32  (bitconvert (v32i8  VR256:$src))), (v8i32  VR256:$src)>;
+def : Pat<(v8i32  (bitconvert (v4f64  VR256:$src))), (v8i32  VR256:$src)>;
+def : Pat<(v8i32  (bitconvert (v8f32  VR256:$src))), (v8i32  VR256:$src)>;
+def : Pat<(v16i16 (bitconvert (v4i64  VR256:$src))), (v16i16 VR256:$src)>;
+def : Pat<(v16i16 (bitconvert (v8i32  VR256:$src))), (v16i16 VR256:$src)>;
+def : Pat<(v16i16 (bitconvert (v32i8  VR256:$src))), (v16i16 VR256:$src)>;
+def : Pat<(v16i16 (bitconvert (v4f64  VR256:$src))), (v16i16 VR256:$src)>;
+def : Pat<(v16i16 (bitconvert (v8f32  VR256:$src))), (v16i16 VR256:$src)>;
+def : Pat<(v32i8  (bitconvert (v4i64  VR256:$src))), (v32i8  VR256:$src)>;
+def : Pat<(v32i8  (bitconvert (v8i32  VR256:$src))), (v32i8  VR256:$src)>;
+def : Pat<(v32i8  (bitconvert (v16i16 VR256:$src))), (v32i8  VR256:$src)>;
+def : Pat<(v32i8  (bitconvert (v4f64  VR256:$src))), (v32i8  VR256:$src)>;
+def : Pat<(v32i8  (bitconvert (v8f32  VR256:$src))), (v32i8  VR256:$src)>;
+def : Pat<(v8f32  (bitconvert (v4i64  VR256:$src))), (v8f32  VR256:$src)>;
+def : Pat<(v8f32  (bitconvert (v8i32  VR256:$src))), (v8f32  VR256:$src)>;
+def : Pat<(v8f32  (bitconvert (v16i16 VR256:$src))), (v8f32  VR256:$src)>;
+def : Pat<(v8f32  (bitconvert (v32i8  VR256:$src))), (v8f32  VR256:$src)>;
+def : Pat<(v8f32  (bitconvert (v4f64  VR256:$src))), (v8f32  VR256:$src)>;
+def : Pat<(v4f64  (bitconvert (v4i64  VR256:$src))), (v4f64  VR256:$src)>;
+def : Pat<(v4f64  (bitconvert (v8i32  VR256:$src))), (v4f64  VR256:$src)>;
+def : Pat<(v4f64  (bitconvert (v16i16 VR256:$src))), (v4f64  VR256:$src)>;
+def : Pat<(v4f64  (bitconvert (v32i8  VR256:$src))), (v4f64  VR256:$src)>;
+def : Pat<(v4f64  (bitconvert (v8f32  VR256:$src))), (v4f64  VR256:$src)>;
+
+// Bitcasts between 512-bit vector types. Return the original type since
+// no instruction is needed for the conversion.
+def : Pat<(v8f64  (bitconvert (v8i64  VR512:$src))), (v8f64  VR512:$src)>;
+def : Pat<(v8f64  (bitconvert (v16i32 VR512:$src))), (v8f64  VR512:$src)>;
+def : Pat<(v8f64  (bitconvert (v32i16 VR512:$src))), (v8f64  VR512:$src)>;
+def : Pat<(v8f64  (bitconvert (v64i8  VR512:$src))), (v8f64  VR512:$src)>;
+def : Pat<(v8f64  (bitconvert (v16f32 VR512:$src))), (v8f64  VR512:$src)>;
+def : Pat<(v16f32 (bitconvert (v8i64  VR512:$src))), (v16f32 VR512:$src)>;
+def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
+def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
+def : Pat<(v16f32 (bitconvert (v64i8  VR512:$src))), (v16f32 VR512:$src)>;
+def : Pat<(v16f32 (bitconvert (v8f64  VR512:$src))), (v16f32 VR512:$src)>;
+def : Pat<(v8i64  (bitconvert (v16i32 VR512:$src))), (v8i64  VR512:$src)>;
+def : Pat<(v8i64  (bitconvert (v32i16 VR512:$src))), (v8i64  VR512:$src)>;
+def : Pat<(v8i64  (bitconvert (v64i8  VR512:$src))), (v8i64  VR512:$src)>;
+def : Pat<(v8i64  (bitconvert (v8f64  VR512:$src))), (v8i64  VR512:$src)>;
+def : Pat<(v8i64  (bitconvert (v16f32 VR512:$src))), (v8i64  VR512:$src)>;
+def : Pat<(v16i32 (bitconvert (v8i64  VR512:$src))), (v16i32 VR512:$src)>;
+def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
+def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
+def : Pat<(v16i32 (bitconvert (v64i8  VR512:$src))), (v16i32 VR512:$src)>;
+def : Pat<(v16i32 (bitconvert (v8f64  VR512:$src))), (v16i32 VR512:$src)>;
+def : Pat<(v32i16 (bitconvert (v8i64  VR512:$src))), (v32i16 VR512:$src)>;
+def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
+def : Pat<(v32i16 (bitconvert (v64i8  VR512:$src))), (v32i16 VR512:$src)>;
+def : Pat<(v32i16 (bitconvert (v8f64  VR512:$src))), (v32i16 VR512:$src)>;
+def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
+def : Pat<(v64i8  (bitconvert (v8i64  VR512:$src))), (v64i8  VR512:$src)>;
+def : Pat<(v64i8  (bitconvert (v16i32 VR512:$src))), (v64i8  VR512:$src)>;
+def : Pat<(v64i8  (bitconvert (v32i16 VR512:$src))), (v64i8  VR512:$src)>;
+def : Pat<(v64i8  (bitconvert (v8f64  VR512:$src))), (v64i8  VR512:$src)>;
+def : Pat<(v64i8  (bitconvert (v16f32 VR512:$src))), (v64i8  VR512:$src)>;
+
+
+//===----------------------------------------------------------------------===//
+//  Non-instruction patterns
+//===----------------------------------------------------------------------===//
+
+// A vector extract of the first f32/f64 position is a subregister copy
+def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
+          (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
+def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
+          (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
+
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+          (COPY_TO_REGCLASS FR32:$src, VR128)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+          (COPY_TO_REGCLASS FR64:$src, VR128)>;
+
+
+//===----------------------------------------------------------------------===//
+// Subvector tricks
+//===----------------------------------------------------------------------===//
+
+// Patterns for insert_subvector/extract_subvector to/from index=0
+multiclass subvector_subreg_lowering<RegisterClass subRC, ValueType subVT,
+                                     RegisterClass RC, ValueType VT,
+                                     SubRegIndex subIdx> {
+  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
+            (subVT (EXTRACT_SUBREG RC:$src, subIdx))>;
+
+  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
+            (VT (INSERT_SUBREG (IMPLICIT_DEF), subRC:$src, subIdx))>;
+}
+
+// A 128-bit subvector extract from the first 256-bit vector position is a
+// subregister copy that needs no instruction. Likewise, a 128-bit subvector
+// insert to the first 256-bit vector position is a subregister copy that needs
+// no instruction.
+defm : subvector_subreg_lowering<VR128, v4i32, VR256, v8i32,  sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v4f32, VR256, v8f32,  sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v2i64, VR256, v4i64,  sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64,  sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8,  sub_xmm>;
+
+// A 128-bit subvector extract from the first 512-bit vector position is a
+// subregister copy that needs no instruction. Likewise, a 128-bit subvector
+// insert to the first 512-bit vector position is a subregister copy that needs
+// no instruction.
+defm : subvector_subreg_lowering<VR128, v4i32, VR512, v16i32, sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v4f32, VR512, v16f32, sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v2i64, VR512, v8i64,  sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64,  sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8,  sub_xmm>;
+
+// A 128-bit subvector extract from the first 512-bit vector position is a
+// subregister copy that needs no instruction. Likewise, a 128-bit subvector
+// insert to the first 512-bit vector position is a subregister copy that needs
+// no instruction.
+defm : subvector_subreg_lowering<VR256, v8i32,  VR512, v16i32, sub_ymm>;
+defm : subvector_subreg_lowering<VR256, v8f32,  VR512, v16f32, sub_ymm>;
+defm : subvector_subreg_lowering<VR256, v4i64,  VR512, v8i64,  sub_ymm>;
+defm : subvector_subreg_lowering<VR256, v4f64,  VR512, v8f64,  sub_ymm>;
+defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
+defm : subvector_subreg_lowering<VR256, v32i8,  VR512, v64i8,  sub_ymm>;
+
+
+multiclass subvector_store_lowering<string AlignedStr, string UnalignedStr,
+                                    RegisterClass RC, ValueType DstTy,
+                                    ValueType SrcTy, SubRegIndex SubIdx> {
+  def : Pat<(alignedstore (DstTy (extract_subvector
+                                  (SrcTy RC:$src), (iPTR 0))), addr:$dst),
+            (!cast<Instruction>("VMOV"#AlignedStr#"mr") addr:$dst,
+             (DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
+
+  def : Pat<(store (DstTy (extract_subvector
+                           (SrcTy RC:$src), (iPTR 0))), addr:$dst),
+            (!cast<Instruction>("VMOV"#UnalignedStr#"mr") addr:$dst,
+             (DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
+}
+
+let Predicates = [HasAVX, NoVLX] in {
+  defm : subvector_store_lowering<"APD", "UPD", VR256X, v2f64, v4f64,  sub_xmm>;
+  defm : subvector_store_lowering<"APS", "UPS", VR256X, v4f32, v8f32,  sub_xmm>;
+  defm : subvector_store_lowering<"DQA", "DQU", VR256X, v2i64, v4i64,  sub_xmm>;
+  defm : subvector_store_lowering<"DQA", "DQU", VR256X, v4i32, v8i32,  sub_xmm>;
+  defm : subvector_store_lowering<"DQA", "DQU", VR256X, v8i16, v16i16, sub_xmm>;
+  defm : subvector_store_lowering<"DQA", "DQU", VR256X, v16i8, v32i8,  sub_xmm>;
+}
+
+let Predicates = [HasVLX] in {
+  // Special patterns for storing subvector extracts of lower 128-bits
+  // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
+  defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR256X, v2f64, v4f64,
+                                  sub_xmm>;
+  defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR256X, v4f32, v8f32,
+                                  sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v2i64,
+                                  v4i64, sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v4i32,
+                                  v8i32, sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v8i16,
+                                  v16i16, sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v16i8,
+                                  v32i8, sub_xmm>;
+
+  // Special patterns for storing subvector extracts of lower 128-bits of 512.
+  // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
+  defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR512, v2f64, v8f64,
+                                  sub_xmm>;
+  defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR512, v4f32, v16f32,
+                                  sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v2i64,
+                                  v8i64, sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v4i32,
+                                  v16i32, sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v8i16,
+                                  v32i16, sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v16i8,
+                                  v64i8, sub_xmm>;
+
+  // Special patterns for storing subvector extracts of lower 256-bits of 512.
+  // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
+  defm : subvector_store_lowering<"APDZ256", "UPDZ256", VR512, v4f64, v8f64,
+                                  sub_ymm>;
+  defm : subvector_store_lowering<"APSZ256", "UPSZ256", VR512, v8f32, v16f32,
+                                  sub_ymm>;
+  defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v4i64,
+                                  v8i64, sub_ymm>;
+  defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v8i32,
+                                  v16i32, sub_ymm>;
+  defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v16i16,
+                                  v32i16, sub_ymm>;
+  defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v32i8,
+                                  v64i8, sub_ymm>;
+}
+
+// If we're inserting into an all zeros vector, just use a plain move which
+// will zero the upper bits. A post-isel hook will take care of removing
+// any moves that we can prove are unnecessary.
+multiclass subvec_zero_lowering<string MoveStr,
+                                RegisterClass RC, ValueType DstTy,
+                                ValueType SrcTy, ValueType ZeroTy,
+                                SubRegIndex SubIdx> {
+  def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
+                                     (SrcTy RC:$src), (iPTR 0))),
+            (SUBREG_TO_REG (i64 0),
+             (SrcTy (!cast<Instruction>("VMOV"#MoveStr#"rr") RC:$src)), SubIdx)>;
+}
+
+let Predicates = [HasAVX, NoVLX] in {
+  defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, sub_xmm>;
+}
+
+let Predicates = [HasVLX] in {
+  defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32, sub_xmm>;
+
+  defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32, sub_xmm>;
+
+  defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32, sub_ymm>;
+}
+
+let Predicates = [HasAVX512, NoVLX] in {
+  defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, sub_xmm>;
+
+  defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32, sub_ymm>;
+}
+
+class maskzeroupper<ValueType vt, RegisterClass RC> :
+  PatLeaf<(vt RC:$src), [{
+    return isMaskZeroExtended(N);
+  }]>;
+
+def maskzeroupperv1i1  : maskzeroupper<v1i1,  VK1>;
+def maskzeroupperv2i1  : maskzeroupper<v2i1,  VK2>;
+def maskzeroupperv4i1  : maskzeroupper<v4i1,  VK4>;
+def maskzeroupperv8i1  : maskzeroupper<v8i1,  VK8>;
+def maskzeroupperv16i1 : maskzeroupper<v16i1, VK16>;
+def maskzeroupperv32i1 : maskzeroupper<v32i1, VK32>;
+
+// The patterns determine if we can depend on the upper bits of a mask register
+// being zeroed by the previous operation so that we can skip explicit
+// zeroing.
+let Predicates = [HasBWI] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     maskzeroupperv1i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK1:$src, VK32)>;
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     maskzeroupperv8i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK8:$src, VK32)>;
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     maskzeroupperv16i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK16:$src, VK32)>;
+
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     maskzeroupperv1i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK1:$src, VK64)>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     maskzeroupperv8i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK8:$src, VK64)>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     maskzeroupperv16i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK16:$src, VK64)>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     maskzeroupperv32i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK32:$src, VK64)>;
+}
+
+let Predicates = [HasAVX512] in {
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     maskzeroupperv1i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK1:$src, VK16)>;
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     maskzeroupperv8i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK8:$src, VK16)>;
+}
+
+let Predicates = [HasDQI] in {
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    maskzeroupperv1i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK1:$src, VK8)>;
+}
+
+let Predicates = [HasVLX, HasDQI] in {
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    maskzeroupperv2i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK2:$src, VK8)>;
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    maskzeroupperv4i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK4:$src, VK8)>;
+}
+
+let Predicates = [HasVLX] in {
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     maskzeroupperv2i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK2:$src, VK16)>;
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     maskzeroupperv4i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK4:$src, VK16)>;
+}
+
+let Predicates = [HasBWI, HasVLX] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     maskzeroupperv2i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK2:$src, VK32)>;
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     maskzeroupperv4i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK4:$src, VK32)>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     maskzeroupperv2i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK2:$src, VK64)>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     maskzeroupperv4i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK4:$src, VK64)>;
+}
+
+// If the bits are not zero we have to fall back to explicitly zeroing by
+// using shifts.
+let Predicates = [HasAVX512] in {
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     (v1i1 VK1:$mask), (iPTR 0))),
+            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK1:$mask, VK16),
+                                    (i8 15)), (i8 15))>;
+
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     (v2i1 VK2:$mask), (iPTR 0))),
+            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16),
+                                    (i8 14)), (i8 14))>;
+
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     (v4i1 VK4:$mask), (iPTR 0))),
+            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16),
+                                    (i8 12)), (i8 12))>;
+}
+
+let Predicates = [HasAVX512, NoDQI] in {
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16),
+                                    (i8 8)), (i8 8))>;
+}
+
+let Predicates = [HasDQI] in {
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>;
+
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    (v1i1 VK1:$mask), (iPTR 0))),
+            (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK1:$mask, VK8),
+                                    (i8 7)), (i8 7))>;
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    (v2i1 VK2:$mask), (iPTR 0))),
+            (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8),
+                                    (i8 6)), (i8 6))>;
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    (v4i1 VK4:$mask), (iPTR 0))),
+            (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK4:$mask, VK8),
+                                    (i8 4)), (i8 4))>;
+}
+
+let Predicates = [HasBWI] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v16i1 VK16:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK32)>;
+
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v16i1 VK16:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK64)>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v32i1 VK32:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVDkk VK32:$mask), VK64)>;
+}
+
+let Predicates = [HasBWI, NoDQI] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32),
+                                    (i8 24)), (i8 24))>;
+
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64),
+                                    (i8 56)), (i8 56))>;
+}
+
+let Predicates = [HasBWI, HasDQI] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK32)>;
+
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>;
+}
+
+let Predicates = [HasBWI, HasVLX] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v1i1 VK1:$mask), (iPTR 0))),
+            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK1:$mask, VK32),
+                                    (i8 31)), (i8 31))>;
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v2i1 VK2:$mask), (iPTR 0))),
+            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK2:$mask, VK32),
+                                    (i8 30)), (i8 30))>;
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v4i1 VK4:$mask), (iPTR 0))),
+            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK4:$mask, VK32),
+                                    (i8 28)), (i8 28))>;
+
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v1i1 VK1:$mask), (iPTR 0))),
+            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK1:$mask, VK64),
+                                    (i8 63)), (i8 63))>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v2i1 VK2:$mask), (iPTR 0))),
+            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK2:$mask, VK64),
+                                    (i8 62)), (i8 62))>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v4i1 VK4:$mask), (iPTR 0))),
+            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
+                                    (i8 60)), (i8 60))>;
+}
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrXOP.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86InstrXOP.td
@@ -0,0 +1,446 @@
+//===-- X86InstrXOP.td - XOP Instruction Set ---------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes XOP (eXtended OPerations)
+//
+//===----------------------------------------------------------------------===//
+
+multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
+  def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWritePHAdd.XMM]>;
+  def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
+           Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>;
+}
+
+let ExeDomain = SSEPackedInt in {
+  defm VPHSUBWD  : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, loadv2i64>;
+  defm VPHSUBDQ  : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, loadv2i64>;
+  defm VPHSUBBW  : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, loadv2i64>;
+  defm VPHADDWQ  : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, loadv2i64>;
+  defm VPHADDWD  : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, loadv2i64>;
+  defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, loadv2i64>;
+  defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, loadv2i64>;
+  defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, loadv2i64>;
+  defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, loadv2i64>;
+  defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, loadv2i64>;
+  defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, loadv2i64>;
+  defm VPHADDDQ  : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, loadv2i64>;
+  defm VPHADDBW  : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, loadv2i64>;
+  defm VPHADDBQ  : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, loadv2i64>;
+  defm VPHADDBD  : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, loadv2i64>;
+}
+
+// Scalar load 2 addr operand instructions
+multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
+                     Operand memop, ComplexPattern mem_cpat,
+                     X86FoldableSchedWrite sched> {
+  def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
+  def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, XOP,
+           Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
+                     PatFrag memop, X86FoldableSchedWrite sched> {
+  def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
+  def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
+           Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
+                     PatFrag memop, X86FoldableSchedWrite sched> {
+  def Yrr : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR256:$dst, (Int VR256:$src))]>, XOP, VEX_L, Sched<[sched]>;
+  def Yrm : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L,
+           Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+let ExeDomain = SSEPackedSingle in {
+  defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
+                           ssmem, sse_load_f32, SchedWriteFRnd.Scl>;
+  defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32,
+                           SchedWriteFRnd.XMM>;
+  defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32,
+                           SchedWriteFRnd.YMM>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+  defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
+                           sdmem, sse_load_f64, SchedWriteFRnd.Scl>;
+  defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64,
+                           SchedWriteFRnd.XMM>;
+  defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64,
+                           SchedWriteFRnd.YMM>;
+}
+
+multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                  ValueType vt128, X86FoldableSchedWrite sched> {
+  def rr : IXOP<opc, MRMSrcReg4VOp3, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst,
+              (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2))))]>,
+           XOP, Sched<[sched]>;
+  def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, i128mem:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst,
+              (vt128 (OpNode (vt128 VR128:$src1),
+                             (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
+           XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd]>;
+  def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst),
+           (ins i128mem:$src1, VR128:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst,
+              (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))),
+                             (vt128 VR128:$src2))))]>,
+             XOP, Sched<[sched.Folded, ReadAfterLd]>;
+  // For disassembler
+  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def rr_REV : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2),
+               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+               []>,
+               XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>;
+}
+
+let ExeDomain = SSEPackedInt in {
+  defm VPROTB : xop3op<0x90, "vprotb", rotl, v16i8, SchedWriteVarVecShift.XMM>;
+  defm VPROTD : xop3op<0x92, "vprotd", rotl, v4i32, SchedWriteVarVecShift.XMM>;
+  defm VPROTQ : xop3op<0x93, "vprotq", rotl, v2i64, SchedWriteVarVecShift.XMM>;
+  defm VPROTW : xop3op<0x91, "vprotw", rotl, v8i16, SchedWriteVarVecShift.XMM>;
+  defm VPSHAB : xop3op<0x98, "vpshab", X86vpsha, v16i8, SchedWriteVarVecShift.XMM>;
+  defm VPSHAD : xop3op<0x9A, "vpshad", X86vpsha, v4i32, SchedWriteVarVecShift.XMM>;
+  defm VPSHAQ : xop3op<0x9B, "vpshaq", X86vpsha, v2i64, SchedWriteVarVecShift.XMM>;
+  defm VPSHAW : xop3op<0x99, "vpshaw", X86vpsha, v8i16, SchedWriteVarVecShift.XMM>;
+  defm VPSHLB : xop3op<0x94, "vpshlb", X86vpshl, v16i8, SchedWriteVarVecShift.XMM>;
+  defm VPSHLD : xop3op<0x96, "vpshld", X86vpshl, v4i32, SchedWriteVarVecShift.XMM>;
+  defm VPSHLQ : xop3op<0x97, "vpshlq", X86vpshl, v2i64, SchedWriteVarVecShift.XMM>;
+  defm VPSHLW : xop3op<0x95, "vpshlw", X86vpshl, v8i16, SchedWriteVarVecShift.XMM>;
+}
+
+multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                     ValueType vt128, X86FoldableSchedWrite sched> {
+  def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, u8imm:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst,
+              (vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>,
+           XOP, Sched<[sched]>;
+  def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins i128mem:$src1, u8imm:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst,
+              (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>,
+           XOP, Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+let ExeDomain = SSEPackedInt in {
+  defm VPROTB : xop3opimm<0xC0, "vprotb", X86vrotli, v16i8,
+                          SchedWriteVecShiftImm.XMM>;
+  defm VPROTD : xop3opimm<0xC2, "vprotd", X86vrotli, v4i32,
+                          SchedWriteVecShiftImm.XMM>;
+  defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vrotli, v2i64,
+                          SchedWriteVecShiftImm.XMM>;
+  defm VPROTW : xop3opimm<0xC1, "vprotw", X86vrotli, v8i16,
+                          SchedWriteVecShiftImm.XMM>;
+}
+
+// Instruction where second source can be memory, but third must be register
+multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int,
+                    X86FoldableSchedWrite sched> {
+  let isCommutable = 1 in
+  def rr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+              (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP_4V,
+           Sched<[sched]>;
+  def rm : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+              (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
+              VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+let ExeDomain = SSEPackedInt in {
+  defm VPMADCSWD  : xop4opm2<0xB6, "vpmadcswd",
+                             int_x86_xop_vpmadcswd, SchedWriteVecIMul.XMM>;
+  defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd",
+                             int_x86_xop_vpmadcsswd, SchedWriteVecIMul.XMM>;
+  defm VPMACSWW   : xop4opm2<0x95, "vpmacsww",
+                             int_x86_xop_vpmacsww, SchedWriteVecIMul.XMM>;
+  defm VPMACSWD   : xop4opm2<0x96, "vpmacswd",
+                             int_x86_xop_vpmacswd, SchedWriteVecIMul.XMM>;
+  defm VPMACSSWW  : xop4opm2<0x85, "vpmacssww",
+                             int_x86_xop_vpmacssww, SchedWriteVecIMul.XMM>;
+  defm VPMACSSWD  : xop4opm2<0x86, "vpmacsswd",
+                             int_x86_xop_vpmacsswd, SchedWriteVecIMul.XMM>;
+  defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql",
+                             int_x86_xop_vpmacssdql, SchedWritePMULLD.XMM>;
+  defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh",
+                             int_x86_xop_vpmacssdqh, SchedWritePMULLD.XMM>;
+  defm VPMACSSDD  : xop4opm2<0x8E, "vpmacssdd",
+                             int_x86_xop_vpmacssdd, SchedWritePMULLD.XMM>;
+  defm VPMACSDQL  : xop4opm2<0x97, "vpmacsdql",
+                             int_x86_xop_vpmacsdql, SchedWritePMULLD.XMM>;
+  defm VPMACSDQH  : xop4opm2<0x9F, "vpmacsdqh",
+                             int_x86_xop_vpmacsdqh, SchedWritePMULLD.XMM>;
+  defm VPMACSDD   : xop4opm2<0x9E, "vpmacsdd",
+                             int_x86_xop_vpmacsdd, SchedWritePMULLD.XMM>;
+}
+
+// IFMA patterns - for cases where we can safely ignore the overflow bits from
+// the multiply or easily match with existing intrinsics.
+let Predicates = [HasXOP] in {
+  def : Pat<(v8i16 (add (mul (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
+                        (v8i16 VR128:$src3))),
+            (VPMACSWWrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(v4i32 (add (mul (v4i32 VR128:$src1), (v4i32 VR128:$src2)),
+                        (v4i32 VR128:$src3))),
+            (VPMACSDDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(v2i64 (add (X86pmuldq (bc_v2i64 (X86PShufd (v4i32 VR128:$src1), (i8 -11))),
+                                   (bc_v2i64 (X86PShufd (v4i32 VR128:$src2), (i8 -11)))),
+                        (v2i64 VR128:$src3))),
+            (VPMACSDQHrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(v2i64 (add (X86pmuldq (v2i64 VR128:$src1), (v2i64 VR128:$src2)),
+                        (v2i64 VR128:$src3))),
+            (VPMACSDQLrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(v4i32 (add (X86vpmaddwd (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
+                        (v4i32 VR128:$src3))),
+            (VPMADCSWDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+}
+
+// Transforms to swizzle an immediate to help matching memory operand in first
+// operand.
+def CommuteVPCOMCC : SDNodeXForm<imm, [{
+  uint8_t Imm = N->getZExtValue() & 0x7;
+  Imm = X86::getSwappedVPCOMImm(Imm);
+  return getI8Imm(Imm, SDLoc(N));
+}]>;
+
+// Instruction where second source can be memory, third must be imm8
+multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
+                    X86FoldableSchedWrite sched> {
+  let ExeDomain = SSEPackedInt in { // SSE integer instructions
+    let isCommutable = 1 in
+    def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+             (ins VR128:$src1, VR128:$src2, XOPCC:$cc),
+             !strconcat("vpcom${cc}", Suffix,
+             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR128:$dst,
+                (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
+                               imm:$cc)))]>,
+             XOP_4V, Sched<[sched]>;
+    def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+             (ins VR128:$src1, i128mem:$src2, XOPCC:$cc),
+             !strconcat("vpcom${cc}", Suffix,
+             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR128:$dst,
+                (vt128 (OpNode (vt128 VR128:$src1),
+                               (vt128 (bitconvert (loadv2i64 addr:$src2))),
+                                imm:$cc)))]>,
+             XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
+    let isAsmParserOnly = 1, hasSideEffects = 0 in {
+      def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+                   (ins VR128:$src1, VR128:$src2, u8imm:$src3),
+                   !strconcat("vpcom", Suffix,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                   []>, XOP_4V, Sched<[sched]>, NotMemoryFoldable;
+      let mayLoad = 1 in
+      def mi_alt : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+                   (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
+                   !strconcat("vpcom", Suffix,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                   []>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>,
+                   NotMemoryFoldable;
+    }
+  }
+
+  def : Pat<(OpNode (bitconvert (loadv2i64 addr:$src2)),
+                    (vt128 VR128:$src1), imm:$cc),
+            (!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2,
+                                           (CommuteVPCOMCC imm:$cc))>;
+}
+
+defm VPCOMB  : xopvpcom<0xCC, "b", X86vpcom, v16i8, SchedWriteVecALU.XMM>;
+defm VPCOMW  : xopvpcom<0xCD, "w", X86vpcom, v8i16, SchedWriteVecALU.XMM>;
+defm VPCOMD  : xopvpcom<0xCE, "d", X86vpcom, v4i32, SchedWriteVecALU.XMM>;
+defm VPCOMQ  : xopvpcom<0xCF, "q", X86vpcom, v2i64, SchedWriteVecALU.XMM>;
+defm VPCOMUB : xopvpcom<0xEC, "ub", X86vpcomu, v16i8, SchedWriteVecALU.XMM>;
+defm VPCOMUW : xopvpcom<0xED, "uw", X86vpcomu, v8i16, SchedWriteVecALU.XMM>;
+defm VPCOMUD : xopvpcom<0xEE, "ud", X86vpcomu, v4i32, SchedWriteVecALU.XMM>;
+defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64, SchedWriteVecALU.XMM>;
+
+multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                  ValueType vt128, X86FoldableSchedWrite sched> {
+  def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
+            (ins VR128:$src1, VR128:$src2, VR128:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            [(set VR128:$dst,
+              (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
+                             (vt128 VR128:$src3))))]>,
+            XOP_4V, Sched<[sched]>;
+  def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
+            (ins VR128:$src1, VR128:$src2, i128mem:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            [(set VR128:$dst,
+              (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
+                             (vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
+            XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+  def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
+            (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            [(set VR128:$dst,
+              (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
+                             (vt128 VR128:$src3))))]>,
+            XOP_4V, Sched<[sched.Folded, ReadAfterLd,
+                           // 128mem:$src2
+                           ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                           ReadDefault,
+                           // VR128:$src3
+                           ReadAfterLd]>;
+  // For disassembler
+  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
+                (ins VR128:$src1, VR128:$src2, VR128:$src3),
+                !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                []>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>;
+}
+
+let ExeDomain = SSEPackedInt in {
+  defm VPPERM : xop4op<0xA3, "vpperm", X86vpperm, v16i8,
+                       SchedWriteVarShuffle.XMM>;
+}
+
+// Instruction where either second or third source can be memory
+multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                      X86MemOperand x86memop, ValueType VT,
+                      X86FoldableSchedWrite sched> {
+  def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs RC:$dst),
+            (ins RC:$src1, RC:$src2, RC:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
+                                   (X86andnp RC:$src3, RC:$src2))))]>, XOP_4V,
+            Sched<[sched]>;
+  def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst),
+            (ins RC:$src1, RC:$src2, x86memop:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            [(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
+                                   (X86andnp (load addr:$src3), RC:$src2))))]>,
+            XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+  def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
+            (ins RC:$src1, x86memop:$src2, RC:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
+                                   (X86andnp RC:$src3, (load addr:$src2)))))]>,
+            XOP_4V, Sched<[sched.Folded, ReadAfterLd,
+                           // x86memop:$src2
+                           ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                           ReadDefault,
+                           // RC::$src3
+                           ReadAfterLd]>;
+  // For disassembler
+  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
+            (ins RC:$src1, RC:$src2, RC:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            []>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>;
+}
+
+let ExeDomain = SSEPackedInt in {
+  defm VPCMOV : xop4op_int<0xA2, "vpcmov", VR128, i128mem, v2i64,
+                           SchedWriteShuffle.XMM>;
+  defm VPCMOVY : xop4op_int<0xA2, "vpcmov", VR256, i256mem, v4i64,
+                            SchedWriteShuffle.YMM>, VEX_L;
+}
+
+multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
+                        X86MemOperand intmemop, X86MemOperand fpmemop,
+                        ValueType VT, PatFrag FPLdFrag, PatFrag IntLdFrag,
+                        X86FoldableSchedWrite sched> {
+  def rr : IXOP5<Opc, MRMSrcReg, (outs RC:$dst),
+        (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        [(set RC:$dst,
+           (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>,
+        Sched<[sched]>;
+  def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
+        (ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        [(set RC:$dst,
+          (VT (X86vpermil2 RC:$src1, RC:$src2,
+                           (bitconvert (IntLdFrag addr:$src3)),
+                           (i8 imm:$src4))))]>, VEX_W,
+        Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+  def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
+        (ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        [(set RC:$dst,
+          (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
+                           RC:$src3, (i8 imm:$src4))))]>,
+        Sched<[sched.Folded, ReadAfterLd,
+               // fpmemop:$src2
+               ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+               // RC:$src3
+               ReadAfterLd]>;
+  // For disassembler
+  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
+        (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        []>, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+  defm VPERMIL2PD : xop_vpermil2<0x49, "vpermil2pd", VR128, i128mem, f128mem,
+                                 v2f64, loadv2f64, loadv2i64,
+                                 SchedWriteFVarShuffle.XMM>;
+  defm VPERMIL2PDY : xop_vpermil2<0x49, "vpermil2pd", VR256, i256mem, f256mem,
+                                  v4f64, loadv4f64, loadv4i64,
+                                  SchedWriteFVarShuffle.YMM>, VEX_L;
+}
+
+let ExeDomain = SSEPackedSingle in {
+  defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem,
+                                 v4f32, loadv4f32, loadv2i64,
+                                 SchedWriteFVarShuffle.XMM>;
+  defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem,
+                                  v8f32, loadv8f32, loadv4i64,
+                                  SchedWriteFVarShuffle.YMM>, VEX_L;
+}
+
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86PfmCounters.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86PfmCounters.td
@@ -0,0 +1,77 @@
+//===-- X86PfmCounters.td - X86 Hardware Counters ----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the available hardware counters for various subtargets.
+//
+//===----------------------------------------------------------------------===//
+
+let SchedModel = SandyBridgeModel in {
+def SBCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
+def SBPort0Counter : PfmIssueCounter<SBPort0, ["uops_dispatched_port:port_0"]>;
+def SBPort1Counter : PfmIssueCounter<SBPort1, ["uops_dispatched_port:port_1"]>;
+def SBPort23Counter : PfmIssueCounter<SBPort23,
+                                      ["uops_dispatched_port:port_2",
+                                       "uops_dispatched_port:port_3"]>;
+def SBPort4Counter : PfmIssueCounter<SBPort4, ["uops_dispatched_port:port_4"]>;
+def SBPort5Counter : PfmIssueCounter<SBPort5, ["uops_dispatched_port:port_5"]>;
+}
+
+let SchedModel = HaswellModel in {
+def HWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
+def HWPort0Counter : PfmIssueCounter<HWPort0, ["uops_dispatched_port:port_0"]>;
+def HWPort1Counter : PfmIssueCounter<HWPort1, ["uops_dispatched_port:port_1"]>;
+def HWPort2Counter : PfmIssueCounter<HWPort2, ["uops_dispatched_port:port_2"]>;
+def HWPort3Counter : PfmIssueCounter<HWPort3, ["uops_dispatched_port:port_3"]>;
+def HWPort4Counter : PfmIssueCounter<HWPort4, ["uops_dispatched_port:port_4"]>;
+def HWPort5Counter : PfmIssueCounter<HWPort5, ["uops_dispatched_port:port_5"]>;
+def HWPort6Counter : PfmIssueCounter<HWPort6, ["uops_dispatched_port:port_6"]>;
+def HWPort7Counter : PfmIssueCounter<HWPort7, ["uops_dispatched_port:port_7"]>;
+}
+
+let SchedModel = BroadwellModel in {
+def BWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
+def BWPort0Counter : PfmIssueCounter<BWPort0, ["uops_executed_port:port_0"]>;
+def BWPort1Counter : PfmIssueCounter<BWPort1, ["uops_executed_port:port_1"]>;
+def BWPort2Counter : PfmIssueCounter<BWPort2, ["uops_executed_port:port_2"]>;
+def BWPort3Counter : PfmIssueCounter<BWPort3, ["uops_executed_port:port_3"]>;
+def BWPort4Counter : PfmIssueCounter<BWPort4, ["uops_executed_port:port_4"]>;
+def BWPort5Counter : PfmIssueCounter<BWPort5, ["uops_executed_port:port_5"]>;
+def BWPort6Counter : PfmIssueCounter<BWPort6, ["uops_executed_port:port_6"]>;
+def BWPort7Counter : PfmIssueCounter<BWPort7, ["uops_executed_port:port_7"]>;
+}
+
+let SchedModel = SkylakeClientModel in {
+def SKLCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
+def SKLPort0Counter : PfmIssueCounter<SKLPort0, ["uops_dispatched_port:port_0"]>;
+def SKLPort1Counter : PfmIssueCounter<SKLPort1, ["uops_dispatched_port:port_1"]>;
+def SKLPort2Counter : PfmIssueCounter<SKLPort2, ["uops_dispatched_port:port_2"]>;
+def SKLPort3Counter : PfmIssueCounter<SKLPort3, ["uops_dispatched_port:port_3"]>;
+def SKLPort4Counter : PfmIssueCounter<SKLPort4, ["uops_dispatched_port:port_4"]>;
+def SKLPort5Counter : PfmIssueCounter<SKLPort5, ["uops_dispatched_port:port_5"]>;
+def SKLPort6Counter : PfmIssueCounter<SKLPort6, ["uops_dispatched_port:port_6"]>;
+def SKLPort7Counter : PfmIssueCounter<SKLPort7, ["uops_dispatched_port:port_7"]>;
+}
+
+let SchedModel = SkylakeServerModel in {
+def SKXCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
+def SKXPort0Counter : PfmIssueCounter<SKXPort0, ["uops_dispatched_port:port_0"]>;
+def SKXPort1Counter : PfmIssueCounter<SKXPort1, ["uops_dispatched_port:port_1"]>;
+def SKXPort2Counter : PfmIssueCounter<SKXPort2, ["uops_dispatched_port:port_2"]>;
+def SKXPort3Counter : PfmIssueCounter<SKXPort3, ["uops_dispatched_port:port_3"]>;
+def SKXPort4Counter : PfmIssueCounter<SKXPort4, ["uops_dispatched_port:port_4"]>;
+def SKXPort5Counter : PfmIssueCounter<SKXPort5, ["uops_dispatched_port:port_5"]>;
+def SKXPort6Counter : PfmIssueCounter<SKXPort6, ["uops_dispatched_port:port_6"]>;
+def SKXPort7Counter : PfmIssueCounter<SKXPort7, ["uops_dispatched_port:port_7"]>;
+}
+
+let SchedModel = BtVer2Model in {
+def JCycleCounter : PfmCycleCounter<"cpu_clk_unhalted">;
+def JFPU0Counter  : PfmIssueCounter<JFPU0, ["dispatched_fpu:pipe0"]>;
+def JFPU1Counter  : PfmIssueCounter<JFPU1, ["dispatched_fpu:pipe1"]>;
+}
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86RegisterBanks.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86RegisterBanks.td
@@ -0,0 +1,17 @@
+//=- X86RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+/// General Purpose Registers: RAX, RCX,...
+def GPRRegBank : RegisterBank<"GPR", [GR64]>;
+
+/// Floating Point/Vector Registers
+def VECRRegBank : RegisterBank<"VECR", [VR512]>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86RegisterInfo.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86RegisterInfo.td
@@ -0,0 +1,591 @@
+//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 Register file, defining the registers themselves,
+// aliases between the registers, and the register classes built out of the
+// registers.
+//
+//===----------------------------------------------------------------------===//
+
+class X86Reg<string n, bits<16> Enc, list<Register> subregs = []> : Register<n> {
+  let Namespace = "X86";
+  let HWEncoding = Enc;
+  let SubRegs = subregs;
+}
+
+// Subregister indices.
+let Namespace = "X86" in {
+  def sub_8bit     : SubRegIndex<8>;
+  def sub_8bit_hi  : SubRegIndex<8, 8>;
+  def sub_8bit_hi_phony  : SubRegIndex<8, 8>;
+  def sub_16bit    : SubRegIndex<16>;
+  def sub_16bit_hi : SubRegIndex<16, 16>;
+  def sub_32bit    : SubRegIndex<32>;
+  def sub_xmm      : SubRegIndex<128>;
+  def sub_ymm      : SubRegIndex<256>;
+}
+
+//===----------------------------------------------------------------------===//
+//  Register definitions...
+//
+
+// In the register alias definitions below, we define which registers alias
+// which others.  We only specify which registers the small registers alias,
+// because the register file generator is smart enough to figure out that
+// AL aliases AX if we tell it that AX aliased AL (for example).
+
+// Dwarf numbering is different for 32-bit and 64-bit, and there are
+// variations by target as well. Currently the first entry is for X86-64,
+// second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
+// and debug information on X86-32/Darwin)
+
+// 8-bit registers
+// Low registers
+def AL : X86Reg<"al", 0>;
+def DL : X86Reg<"dl", 2>;
+def CL : X86Reg<"cl", 1>;
+def BL : X86Reg<"bl", 3>;
+
+// High registers. On x86-64, these cannot be used in any instruction
+// with a REX prefix.
+def AH : X86Reg<"ah", 4>;
+def DH : X86Reg<"dh", 6>;
+def CH : X86Reg<"ch", 5>;
+def BH : X86Reg<"bh", 7>;
+
+// X86-64 only, requires REX.
+let CostPerUse = 1 in {
+def SIL  : X86Reg<"sil",   6>;
+def DIL  : X86Reg<"dil",   7>;
+def BPL  : X86Reg<"bpl",   5>;
+def SPL  : X86Reg<"spl",   4>;
+def R8B  : X86Reg<"r8b",   8>;
+def R9B  : X86Reg<"r9b",   9>;
+def R10B : X86Reg<"r10b", 10>;
+def R11B : X86Reg<"r11b", 11>;
+def R12B : X86Reg<"r12b", 12>;
+def R13B : X86Reg<"r13b", 13>;
+def R14B : X86Reg<"r14b", 14>;
+def R15B : X86Reg<"r15b", 15>;
+}
+
+let isArtificial = 1 in {
+// High byte of the low 16 bits of the super-register:
+def SIH   : X86Reg<"", -1>;
+def DIH   : X86Reg<"", -1>;
+def BPH   : X86Reg<"", -1>;
+def SPH   : X86Reg<"", -1>;
+def R8BH  : X86Reg<"", -1>;
+def R9BH  : X86Reg<"", -1>;
+def R10BH : X86Reg<"", -1>;
+def R11BH : X86Reg<"", -1>;
+def R12BH : X86Reg<"", -1>;
+def R13BH : X86Reg<"", -1>;
+def R14BH : X86Reg<"", -1>;
+def R15BH : X86Reg<"", -1>;
+// High word of the low 32 bits of the super-register:
+def HAX   : X86Reg<"", -1>;
+def HDX   : X86Reg<"", -1>;
+def HCX   : X86Reg<"", -1>;
+def HBX   : X86Reg<"", -1>;
+def HSI   : X86Reg<"", -1>;
+def HDI   : X86Reg<"", -1>;
+def HBP   : X86Reg<"", -1>;
+def HSP   : X86Reg<"", -1>;
+def HIP   : X86Reg<"", -1>;
+def R8WH  : X86Reg<"", -1>;
+def R9WH  : X86Reg<"", -1>;
+def R10WH : X86Reg<"", -1>;
+def R11WH : X86Reg<"", -1>;
+def R12WH : X86Reg<"", -1>;
+def R13WH : X86Reg<"", -1>;
+def R14WH : X86Reg<"", -1>;
+def R15WH : X86Reg<"", -1>;
+}
+
+// 16-bit registers
+let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in {
+def AX : X86Reg<"ax", 0, [AL,AH]>;
+def DX : X86Reg<"dx", 2, [DL,DH]>;
+def CX : X86Reg<"cx", 1, [CL,CH]>;
+def BX : X86Reg<"bx", 3, [BL,BH]>;
+}
+let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CoveredBySubRegs = 1 in {
+def SI : X86Reg<"si", 6, [SIL,SIH]>;
+def DI : X86Reg<"di", 7, [DIL,DIH]>;
+def BP : X86Reg<"bp", 5, [BPL,BPH]>;
+def SP : X86Reg<"sp", 4, [SPL,SPH]>;
+}
+def IP : X86Reg<"ip", 0>;
+
+// X86-64 only, requires REX.
+let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CostPerUse = 1,
+    CoveredBySubRegs = 1 in {
+def R8W  : X86Reg<"r8w",   8, [R8B,R8BH]>;
+def R9W  : X86Reg<"r9w",   9, [R9B,R9BH]>;
+def R10W : X86Reg<"r10w", 10, [R10B,R10BH]>;
+def R11W : X86Reg<"r11w", 11, [R11B,R11BH]>;
+def R12W : X86Reg<"r12w", 12, [R12B,R12BH]>;
+def R13W : X86Reg<"r13w", 13, [R13B,R13BH]>;
+def R14W : X86Reg<"r14w", 14, [R14B,R14BH]>;
+def R15W : X86Reg<"r15w", 15, [R15B,R15BH]>;
+}
+
+// 32-bit registers
+let SubRegIndices = [sub_16bit, sub_16bit_hi], CoveredBySubRegs = 1 in {
+def EAX : X86Reg<"eax", 0, [AX, HAX]>, DwarfRegNum<[-2, 0, 0]>;
+def EDX : X86Reg<"edx", 2, [DX, HDX]>, DwarfRegNum<[-2, 2, 2]>;
+def ECX : X86Reg<"ecx", 1, [CX, HCX]>, DwarfRegNum<[-2, 1, 1]>;
+def EBX : X86Reg<"ebx", 3, [BX, HBX]>, DwarfRegNum<[-2, 3, 3]>;
+def ESI : X86Reg<"esi", 6, [SI, HSI]>, DwarfRegNum<[-2, 6, 6]>;
+def EDI : X86Reg<"edi", 7, [DI, HDI]>, DwarfRegNum<[-2, 7, 7]>;
+def EBP : X86Reg<"ebp", 5, [BP, HBP]>, DwarfRegNum<[-2, 4, 5]>;
+def ESP : X86Reg<"esp", 4, [SP, HSP]>, DwarfRegNum<[-2, 5, 4]>;
+def EIP : X86Reg<"eip", 0, [IP, HIP]>, DwarfRegNum<[-2, 8, 8]>;
+}
+
+// X86-64 only, requires REX
+let SubRegIndices = [sub_16bit, sub_16bit_hi], CostPerUse = 1,
+    CoveredBySubRegs = 1 in {
+def R8D  : X86Reg<"r8d",   8, [R8W,R8WH]>;
+def R9D  : X86Reg<"r9d",   9, [R9W,R9WH]>;
+def R10D : X86Reg<"r10d", 10, [R10W,R10WH]>;
+def R11D : X86Reg<"r11d", 11, [R11W,R11WH]>;
+def R12D : X86Reg<"r12d", 12, [R12W,R12WH]>;
+def R13D : X86Reg<"r13d", 13, [R13W,R13WH]>;
+def R14D : X86Reg<"r14d", 14, [R14W,R14WH]>;
+def R15D : X86Reg<"r15d", 15, [R15W,R15WH]>;
+}
+
+// 64-bit registers, X86-64 only
+let SubRegIndices = [sub_32bit] in {
+def RAX : X86Reg<"rax", 0, [EAX]>, DwarfRegNum<[0, -2, -2]>;
+def RDX : X86Reg<"rdx", 2, [EDX]>, DwarfRegNum<[1, -2, -2]>;
+def RCX : X86Reg<"rcx", 1, [ECX]>, DwarfRegNum<[2, -2, -2]>;
+def RBX : X86Reg<"rbx", 3, [EBX]>, DwarfRegNum<[3, -2, -2]>;
+def RSI : X86Reg<"rsi", 6, [ESI]>, DwarfRegNum<[4, -2, -2]>;
+def RDI : X86Reg<"rdi", 7, [EDI]>, DwarfRegNum<[5, -2, -2]>;
+def RBP : X86Reg<"rbp", 5, [EBP]>, DwarfRegNum<[6, -2, -2]>;
+def RSP : X86Reg<"rsp", 4, [ESP]>, DwarfRegNum<[7, -2, -2]>;
+
+// These also require REX.
+let CostPerUse = 1 in {
+def R8  : X86Reg<"r8",   8, [R8D]>,  DwarfRegNum<[ 8, -2, -2]>;
+def R9  : X86Reg<"r9",   9, [R9D]>,  DwarfRegNum<[ 9, -2, -2]>;
+def R10 : X86Reg<"r10", 10, [R10D]>, DwarfRegNum<[10, -2, -2]>;
+def R11 : X86Reg<"r11", 11, [R11D]>, DwarfRegNum<[11, -2, -2]>;
+def R12 : X86Reg<"r12", 12, [R12D]>, DwarfRegNum<[12, -2, -2]>;
+def R13 : X86Reg<"r13", 13, [R13D]>, DwarfRegNum<[13, -2, -2]>;
+def R14 : X86Reg<"r14", 14, [R14D]>, DwarfRegNum<[14, -2, -2]>;
+def R15 : X86Reg<"r15", 15, [R15D]>, DwarfRegNum<[15, -2, -2]>;
+def RIP : X86Reg<"rip",  0, [EIP]>,  DwarfRegNum<[16, -2, -2]>;
+}}
+
+// MMX Registers. These are actually aliased to ST0 .. ST7
+def MM0 : X86Reg<"mm0", 0>, DwarfRegNum<[41, 29, 29]>;
+def MM1 : X86Reg<"mm1", 1>, DwarfRegNum<[42, 30, 30]>;
+def MM2 : X86Reg<"mm2", 2>, DwarfRegNum<[43, 31, 31]>;
+def MM3 : X86Reg<"mm3", 3>, DwarfRegNum<[44, 32, 32]>;
+def MM4 : X86Reg<"mm4", 4>, DwarfRegNum<[45, 33, 33]>;
+def MM5 : X86Reg<"mm5", 5>, DwarfRegNum<[46, 34, 34]>;
+def MM6 : X86Reg<"mm6", 6>, DwarfRegNum<[47, 35, 35]>;
+def MM7 : X86Reg<"mm7", 7>, DwarfRegNum<[48, 36, 36]>;
+
+// Pseudo Floating Point registers
+def FP0 : X86Reg<"fp0", 0>;
+def FP1 : X86Reg<"fp1", 0>;
+def FP2 : X86Reg<"fp2", 0>;
+def FP3 : X86Reg<"fp3", 0>;
+def FP4 : X86Reg<"fp4", 0>;
+def FP5 : X86Reg<"fp5", 0>;
+def FP6 : X86Reg<"fp6", 0>;
+def FP7 : X86Reg<"fp7", 0>;
+
+// XMM Registers, used by the various SSE instruction set extensions.
+def XMM0: X86Reg<"xmm0", 0>, DwarfRegNum<[17, 21, 21]>;
+def XMM1: X86Reg<"xmm1", 1>, DwarfRegNum<[18, 22, 22]>;
+def XMM2: X86Reg<"xmm2", 2>, DwarfRegNum<[19, 23, 23]>;
+def XMM3: X86Reg<"xmm3", 3>, DwarfRegNum<[20, 24, 24]>;
+def XMM4: X86Reg<"xmm4", 4>, DwarfRegNum<[21, 25, 25]>;
+def XMM5: X86Reg<"xmm5", 5>, DwarfRegNum<[22, 26, 26]>;
+def XMM6: X86Reg<"xmm6", 6>, DwarfRegNum<[23, 27, 27]>;
+def XMM7: X86Reg<"xmm7", 7>, DwarfRegNum<[24, 28, 28]>;
+
+// X86-64 only
+let CostPerUse = 1 in {
+def XMM8:  X86Reg<"xmm8",   8>, DwarfRegNum<[25, -2, -2]>;
+def XMM9:  X86Reg<"xmm9",   9>, DwarfRegNum<[26, -2, -2]>;
+def XMM10: X86Reg<"xmm10", 10>, DwarfRegNum<[27, -2, -2]>;
+def XMM11: X86Reg<"xmm11", 11>, DwarfRegNum<[28, -2, -2]>;
+def XMM12: X86Reg<"xmm12", 12>, DwarfRegNum<[29, -2, -2]>;
+def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>;
+def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>;
+def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>;
+
+def XMM16:  X86Reg<"xmm16", 16>, DwarfRegNum<[67, -2, -2]>;
+def XMM17:  X86Reg<"xmm17", 17>, DwarfRegNum<[68, -2, -2]>;
+def XMM18:  X86Reg<"xmm18", 18>, DwarfRegNum<[69, -2, -2]>;
+def XMM19:  X86Reg<"xmm19", 19>, DwarfRegNum<[70, -2, -2]>;
+def XMM20:  X86Reg<"xmm20", 20>, DwarfRegNum<[71, -2, -2]>;
+def XMM21:  X86Reg<"xmm21", 21>, DwarfRegNum<[72, -2, -2]>;
+def XMM22:  X86Reg<"xmm22", 22>, DwarfRegNum<[73, -2, -2]>;
+def XMM23:  X86Reg<"xmm23", 23>, DwarfRegNum<[74, -2, -2]>;
+def XMM24:  X86Reg<"xmm24", 24>, DwarfRegNum<[75, -2, -2]>;
+def XMM25:  X86Reg<"xmm25", 25>, DwarfRegNum<[76, -2, -2]>;
+def XMM26:  X86Reg<"xmm26", 26>, DwarfRegNum<[77, -2, -2]>;
+def XMM27:  X86Reg<"xmm27", 27>, DwarfRegNum<[78, -2, -2]>;
+def XMM28:  X86Reg<"xmm28", 28>, DwarfRegNum<[79, -2, -2]>;
+def XMM29:  X86Reg<"xmm29", 29>, DwarfRegNum<[80, -2, -2]>;
+def XMM30:  X86Reg<"xmm30", 30>, DwarfRegNum<[81, -2, -2]>;
+def XMM31:  X86Reg<"xmm31", 31>, DwarfRegNum<[82, -2, -2]>;
+
+} // CostPerUse
+
+// YMM0-15 registers, used by AVX instructions and
+// YMM16-31 registers, used by AVX-512 instructions.
+let SubRegIndices = [sub_xmm] in {
+  foreach  Index = 0-31 in {
+    def YMM#Index : X86Reg<"ymm"#Index, Index, [!cast<X86Reg>("XMM"#Index)]>,
+                    DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
+  }
+}
+
+// ZMM Registers, used by AVX-512 instructions.
+let SubRegIndices = [sub_ymm] in {
+  foreach  Index = 0-31 in {
+    def ZMM#Index : X86Reg<"zmm"#Index, Index, [!cast<X86Reg>("YMM"#Index)]>,
+                    DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
+  }
+}
+
+// Mask Registers, used by AVX-512 instructions.
+def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118,  93,  93]>;
+def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119,  94,  94]>;
+def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120,  95,  95]>;
+def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121,  96,  96]>;
+def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122,  97,  97]>;
+def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123,  98,  98]>;
+def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124,  99,  99]>;
+def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, 100, 100]>;
+
+// Floating point stack registers. These don't map one-to-one to the FP
+// pseudo registers, but we still mark them as aliasing FP registers. That
+// way both kinds can be live without exceeding the stack depth. ST registers
+// are only live around inline assembly.
+def ST0 : X86Reg<"st(0)", 0>, DwarfRegNum<[33, 12, 11]>;
+def ST1 : X86Reg<"st(1)", 1>, DwarfRegNum<[34, 13, 12]>;
+def ST2 : X86Reg<"st(2)", 2>, DwarfRegNum<[35, 14, 13]>;
+def ST3 : X86Reg<"st(3)", 3>, DwarfRegNum<[36, 15, 14]>;
+def ST4 : X86Reg<"st(4)", 4>, DwarfRegNum<[37, 16, 15]>;
+def ST5 : X86Reg<"st(5)", 5>, DwarfRegNum<[38, 17, 16]>;
+def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]>;
+def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
+
+// Floating-point status word
+def FPSW : X86Reg<"fpsw", 0>;
+
+// Status flags register.
+//
+// Note that some flags that are commonly thought of as part of the status
+// flags register are modeled separately. Typically this is due to instructions
+// reading and updating those flags independently of all the others. We don't
+// want to create false dependencies between these instructions and so we use
+// a separate register to model them.
+def EFLAGS : X86Reg<"flags", 0>;
+
+// The direction flag.
+def DF : X86Reg<"dirflag", 0>;
+
+
+// Segment registers
+def CS : X86Reg<"cs", 1>;
+def DS : X86Reg<"ds", 3>;
+def SS : X86Reg<"ss", 2>;
+def ES : X86Reg<"es", 0>;
+def FS : X86Reg<"fs", 4>;
+def GS : X86Reg<"gs", 5>;
+
+// Debug registers
+def DR0  : X86Reg<"dr0",   0>;
+def DR1  : X86Reg<"dr1",   1>;
+def DR2  : X86Reg<"dr2",   2>;
+def DR3  : X86Reg<"dr3",   3>;
+def DR4  : X86Reg<"dr4",   4>;
+def DR5  : X86Reg<"dr5",   5>;
+def DR6  : X86Reg<"dr6",   6>;
+def DR7  : X86Reg<"dr7",   7>;
+def DR8  : X86Reg<"dr8",   8>;
+def DR9  : X86Reg<"dr9",   9>;
+def DR10 : X86Reg<"dr10", 10>;
+def DR11 : X86Reg<"dr11", 11>;
+def DR12 : X86Reg<"dr12", 12>;
+def DR13 : X86Reg<"dr13", 13>;
+def DR14 : X86Reg<"dr14", 14>;
+def DR15 : X86Reg<"dr15", 15>;
+
+// Control registers
+def CR0  : X86Reg<"cr0",   0>;
+def CR1  : X86Reg<"cr1",   1>;
+def CR2  : X86Reg<"cr2",   2>;
+def CR3  : X86Reg<"cr3",   3>;
+def CR4  : X86Reg<"cr4",   4>;
+def CR5  : X86Reg<"cr5",   5>;
+def CR6  : X86Reg<"cr6",   6>;
+def CR7  : X86Reg<"cr7",   7>;
+def CR8  : X86Reg<"cr8",   8>;
+def CR9  : X86Reg<"cr9",   9>;
+def CR10 : X86Reg<"cr10", 10>;
+def CR11 : X86Reg<"cr11", 11>;
+def CR12 : X86Reg<"cr12", 12>;
+def CR13 : X86Reg<"cr13", 13>;
+def CR14 : X86Reg<"cr14", 14>;
+def CR15 : X86Reg<"cr15", 15>;
+
+// Pseudo index registers
+def EIZ : X86Reg<"eiz", 4>;
+def RIZ : X86Reg<"riz", 4>;
+
+// Bound registers, used in MPX instructions
+def BND0 : X86Reg<"bnd0",   0>;
+def BND1 : X86Reg<"bnd1",   1>;
+def BND2 : X86Reg<"bnd2",   2>;
+def BND3 : X86Reg<"bnd3",   3>;
+
+// CET registers - Shadow Stack Pointer
+def SSP : X86Reg<"ssp", 0>;
+
+//===----------------------------------------------------------------------===//
+// Register Class Definitions... now that we have all of the pieces, define the
+// top-level register classes.  The order specified in the register list is
+// implicitly defined to be the register allocation order.
+//
+
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// R12, R13, R14, and R15 for X86-64) are callee-save registers.
+// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
+// R8B, ... R15B.
+// Allocate R12 and R13 last, as these require an extra byte when
+// encoded in x86_64 instructions.
+// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
+// 64-bit mode. The main complication is that they cannot be encoded in an
+// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
+// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
+// cannot be encoded.
+def GR8 : RegisterClass<"X86", [i8],  8,
+                        (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
+                             R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
+  let AltOrders = [(sub GR8, AH, BH, CH, DH)];
+  let AltOrderSelect = [{
+    return MF.getSubtarget<X86Subtarget>().is64Bit();
+  }];
+}
+
+let isAllocatable = 0 in
+def GRH8 : RegisterClass<"X86", [i8],  8,
+                         (add SIH, DIH, BPH, SPH, R8BH, R9BH, R10BH, R11BH,
+                              R12BH, R13BH, R14BH, R15BH)>;
+
+def GR16 : RegisterClass<"X86", [i16], 16,
+                         (add AX, CX, DX, SI, DI, BX, BP, SP,
+                              R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)>;
+
+let isAllocatable = 0 in
+def GRH16 : RegisterClass<"X86", [i16], 16,
+                          (add HAX, HCX, HDX, HSI, HDI, HBX, HBP, HSP, HIP,
+                               R8WH, R9WH, R10WH, R11WH, R12WH, R13WH, R14WH,
+                               R15WH)>;
+
+def GR32 : RegisterClass<"X86", [i32], 32,
+                         (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+                              R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>;
+
+// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
+// RIP isn't really a register and it can't be used anywhere except in an
+// address, but it doesn't cause trouble.
+// FIXME: it *does* cause trouble - CheckBaseRegAndIndexReg() has extra
+// tests because of the inclusion of RIP in this register class.
+def GR64 : RegisterClass<"X86", [i64], 64,
+                         (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+                              RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
+
+// Segment registers for use by MOV instructions (and others) that have a
+//   segment register as one operand.  Always contain a 16-bit segment
+//   descriptor.
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>;
+
+// Debug registers.
+def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 15)>;
+
+// Control registers.
+def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>;
+
+// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
+// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
+// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers
+// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
+// and GR64_ABCD are classes for registers that support 8-bit h-register
+// operations.
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>;
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
+def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>;
+def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>;
+def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>;
+def GR32_TC   : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
+def GR64_TC   : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
+                                                     R8, R9, R11, RIP)>;
+def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
+                                                      R8, R9, R10, R11, RIP)>;
+
+// GR8_NOREX - GR8 registers which do not require a REX prefix.
+def GR8_NOREX : RegisterClass<"X86", [i8], 8,
+                              (add AL, CL, DL, AH, CH, DH, BL, BH)> {
+  let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
+  let AltOrderSelect = [{
+    return MF.getSubtarget<X86Subtarget>().is64Bit();
+  }];
+}
+// GR16_NOREX - GR16 registers which do not require a REX prefix.
+def GR16_NOREX : RegisterClass<"X86", [i16], 16,
+                               (add AX, CX, DX, SI, DI, BX, BP, SP)>;
+// GR32_NOREX - GR32 registers which do not require a REX prefix.
+def GR32_NOREX : RegisterClass<"X86", [i32], 32,
+                               (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)>;
+// GR64_NOREX - GR64 registers which do not require a REX prefix.
+def GR64_NOREX : RegisterClass<"X86", [i64], 64,
+                            (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>;
+
+// GR32_NOSP - GR32 registers except ESP.
+def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>;
+
+// GR64_NOSP - GR64 registers except RSP (and RIP).
+def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)>;
+
+// GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
+// ESP.
+def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
+                                    (and GR32_NOREX, GR32_NOSP)>;
+
+// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
+def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
+                                    (and GR64_NOREX, GR64_NOSP)>;
+
+// Register classes used for ABIs that use 32-bit address accesses,
+// while using the whole x84_64 ISA.
+
+// In such cases, it is fine to use RIP as we are sure the 32 high
+// bits are not set. We do not need variants for NOSP as RIP is not
+// allowed there.
+// RIP is not spilled anywhere for now, so stick to 32-bit alignment
+// to save on memory space.
+// FIXME: We could allow all 64bit registers, but we would need
+// something to check that the 32 high bits are not set,
+// which we do not have right now.
+def LOW32_ADDR_ACCESS : RegisterClass<"X86", [i32], 32, (add GR32, RIP)>;
+
+// When RBP is used as a base pointer in a 32-bit addresses environment,
+// this is also safe to use the full register to access addresses.
+// Since RBP will never be spilled, stick to a 32 alignment to save
+// on memory consumption.
+def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32,
+                                          (add LOW32_ADDR_ACCESS, RBP)>;
+
+// A class to support the 'A' assembler constraint: [ER]AX then [ER]DX.
+def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
+def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>;
+
+// Scalar SSE2 floating point registers.
+def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
+
+def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
+
+
+// FIXME: This sets up the floating point register files as though they are f64
+// values, though they really are f80 values.  This will cause us to spill
+// values as 64-bit quantities instead of 80-bit quantities, which is much much
+// faster on common hardware.  In reality, this should be controlled by a
+// command line option or something.
+
+def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
+def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
+def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
+
+// Floating point stack registers (these are not allocatable by the
+// register allocator - the floating point stackifier is responsible
+// for transforming FPn allocations to STn registers)
+def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
+  let isAllocatable = 0;
+}
+
+// Generic vector registers: VR64 and VR128.
+// Ensure that float types are declared first - only float is legal on SSE1.
+def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
+def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
+                          128, (add FR32)>;
+def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+                          256, (sequence "YMM%u", 0, 15)>;
+
+// Special classes that help the assembly parser choose some alternate
+// instructions to favor 2-byte VEX encodings.
+def VR128L : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
+                           128, (sequence "XMM%u", 0, 7)>;
+def VR128H : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
+                           128, (sequence "XMM%u", 8, 15)>;
+def VR256L : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+                           256, (sequence "YMM%u", 0, 7)>;
+def VR256H : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+                           256, (sequence "YMM%u", 8, 15)>;
+
+// Status flags registers.
+def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+  let isAllocatable = 0;
+}
+def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+  let isAllocatable = 0;
+}
+def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+  let isAllocatable = 0;
+}
+
+// AVX-512 vector/mask registers.
+def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
+                          512, (sequence "ZMM%u", 0, 31)>;
+
+// Scalar AVX-512 floating point registers.
+def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
+
+def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
+
+// Extended VR128 and VR256 for AVX-512 instructions
+def VR128X : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
+                           128, (add FR32X)>;
+def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+                           256, (sequence "YMM%u", 0, 31)>;
+
+// Mask registers
+def VK1     : RegisterClass<"X86", [v1i1],  16,  (sequence "K%u", 0, 7)> {let Size = 16;}
+def VK2     : RegisterClass<"X86", [v2i1],  16,  (add VK1)> {let Size = 16;}
+def VK4     : RegisterClass<"X86", [v4i1],  16,  (add VK2)> {let Size = 16;}
+def VK8     : RegisterClass<"X86", [v8i1],  16,  (add VK4)> {let Size = 16;}
+def VK16    : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
+def VK32    : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
+def VK64    : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
+
+def VK1WM   : RegisterClass<"X86", [v1i1],  16,  (sub VK1, K0)> {let Size = 16;}
+def VK2WM   : RegisterClass<"X86", [v2i1],  16,  (sub VK2, K0)> {let Size = 16;}
+def VK4WM   : RegisterClass<"X86", [v4i1],  16,  (sub VK4, K0)> {let Size = 16;}
+def VK8WM   : RegisterClass<"X86", [v8i1],  16,  (sub VK8, K0)> {let Size = 16;}
+def VK16WM  : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>   {let Size = 16;}
+def VK32WM  : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
+def VK64WM  : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
+
+// Bound registers
+def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86SchedBroadwell.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86SchedBroadwell.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86SchedHaswell.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86SchedHaswell.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86SchedPredicates.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86SchedPredicates.td
@@ -0,0 +1,49 @@
+//===-- X86SchedPredicates.td - X86 Scheduling Predicates --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are common to
+// all X86 subtargets.
+//
+//===----------------------------------------------------------------------===//
+
+// A predicate used to identify dependency-breaking instructions that clear the
+// content of the destination register. Note that this predicate only checks if
+// input registers are the same. This predicate doesn't make any assumptions on
+// the expected instruction opcodes, because different processors may implement
+// different zero-idioms.
+def ZeroIdiomPredicate : CheckSameRegOperand<1, 2>;
+
+// A predicate used to check if an instruction is a LEA, and if it uses all
+// three source operands: base, index, and offset.
+def IsThreeOperandsLEAPredicate: CheckAll<[
+  CheckOpcode<[LEA32r, LEA64r, LEA64_32r, LEA16r]>,
+
+  // isRegOperand(Base)
+  CheckIsRegOperand<1>,
+  CheckNot<CheckInvalidRegOperand<1>>,
+
+  // isRegOperand(Index)
+  CheckIsRegOperand<3>,
+  CheckNot<CheckInvalidRegOperand<3>>,
+
+  // hasLEAOffset(Offset)
+  CheckAny<[
+    CheckAll<[
+      CheckIsImmOperand<4>,
+      CheckNot<CheckZeroOperand<4>>
+    ]>,
+    CheckNonPortable<"MI.getOperand(4).isGlobal()">
+  ]>
+]>;
+
+// This predicate evaluates to true only if the input machine instruction is a
+// 3-operands LEA.  Tablegen automatically generates a new method for it in
+// X86GenInstrInfo.
+def IsThreeOperandsLEAFn :
+    TIIPredicate<"X86", "isThreeOperandsLEA", IsThreeOperandsLEAPredicate>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86SchedSandyBridge.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86SchedSandyBridge.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86SchedSkylakeClient.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86SchedSkylakeClient.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86SchedSkylakeServer.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86SchedSkylakeServer.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86Schedule.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86Schedule.td
@@ -0,0 +1,661 @@
+//===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// InstrSchedModel annotations for out-of-order CPUs.
+
+// Instructions with folded loads need to read the memory operand immediately,
+// but other register operands don't have to be read until the load is ready.
+// These operands are marked with ReadAfterLd.
+def ReadAfterLd : SchedRead;
+
+// Instructions with both a load and a store folded are modeled as a folded
+// load + WriteRMW.
+def WriteRMW : SchedWrite;
+
+// Helper to set SchedWrite ExePorts/Latency/ResourceCycles/NumMicroOps.
+multiclass X86WriteRes<SchedWrite SchedRW,
+                       list<ProcResourceKind> ExePorts,
+                       int Lat, list<int> Res, int UOps> {
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ResourceCycles = Res;
+    let NumMicroOps = UOps;
+  }
+}
+
+// Most instructions can fold loads, so almost every SchedWrite comes in two
+// variants: With and without a folded load.
+// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
+// with a folded load.
+class X86FoldableSchedWrite : SchedWrite {
+  // The SchedWrite to use when a load is folded into the instruction.
+  SchedWrite Folded;
+}
+
+// Multiclass that produces a linked pair of SchedWrites.
+multiclass X86SchedWritePair {
+  // Register-Memory operation.
+  def Ld : SchedWrite;
+  // Register-Register operation.
+  def NAME : X86FoldableSchedWrite {
+    let Folded = !cast<SchedWrite>(NAME#"Ld");
+  }
+}
+
+// Helpers to mark SchedWrites as unsupported.
+multiclass X86WriteResUnsupported<SchedWrite SchedRW> {
+  let Unsupported = 1 in {
+    def : WriteRes<SchedRW, []>;
+  }
+}
+multiclass X86WriteResPairUnsupported<X86FoldableSchedWrite SchedRW> {
+  let Unsupported = 1 in {
+    def : WriteRes<SchedRW, []>;
+    def : WriteRes<SchedRW.Folded, []>;
+  }
+}
+
+// Multiclass that wraps X86FoldableSchedWrite for each vector width.
+class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
+                          X86FoldableSchedWrite s128,
+                          X86FoldableSchedWrite s256,
+                          X86FoldableSchedWrite s512> {
+  X86FoldableSchedWrite Scl = sScl; // Scalar float/double operations.
+  X86FoldableSchedWrite MMX = sScl; // MMX operations.
+  X86FoldableSchedWrite XMM = s128; // XMM operations.
+  X86FoldableSchedWrite YMM = s256; // YMM operations.
+  X86FoldableSchedWrite ZMM = s512; // ZMM operations.
+}
+
+// Multiclass that wraps X86SchedWriteWidths for each fp vector type.
+class X86SchedWriteSizes<X86SchedWriteWidths sPS,
+                         X86SchedWriteWidths sPD> {
+  X86SchedWriteWidths PS = sPS;
+  X86SchedWriteWidths PD = sPD;
+}
+
+// Multiclass that wraps move/load/store triple for a vector width.
+class X86SchedWriteMoveLS<SchedWrite MoveRR,
+                          SchedWrite LoadRM,
+                          SchedWrite StoreMR> {
+  SchedWrite RR = MoveRR;
+  SchedWrite RM = LoadRM;
+  SchedWrite MR = StoreMR;
+}
+
+// Multiclass that wraps X86SchedWriteMoveLS for each vector width.
+class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
+                                X86SchedWriteMoveLS s128,
+                                X86SchedWriteMoveLS s256,
+                                X86SchedWriteMoveLS s512> {
+  X86SchedWriteMoveLS Scl = sScl; // Scalar float/double operations.
+  X86SchedWriteMoveLS MMX = sScl; // MMX operations.
+  X86SchedWriteMoveLS XMM = s128; // XMM operations.
+  X86SchedWriteMoveLS YMM = s256; // YMM operations.
+  X86SchedWriteMoveLS ZMM = s512; // ZMM operations.
+}
+
+// Loads, stores, and moves, not folded with other operations.
+def WriteLoad    : SchedWrite;
+def WriteStore   : SchedWrite;
+def WriteStoreNT : SchedWrite;
+def WriteMove    : SchedWrite;
+
+// Arithmetic.
+defm WriteALU    : X86SchedWritePair; // Simple integer ALU op.
+defm WriteADC    : X86SchedWritePair; // Integer ALU + flags op.
+def  WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
+def  WriteADCRMW : WriteSequence<[WriteADCLd, WriteStore]>;
+defm WriteIMul   : X86SchedWritePair; // Integer multiplication.
+defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
+def  WriteIMulH  : SchedWrite;        // Integer multiplication, high part.
+def  WriteLEA    : SchedWrite;        // LEA instructions can't fold loads.
+
+def  WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
+def  WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
+
+// Integer division.
+defm WriteDiv8   : X86SchedWritePair;
+defm WriteDiv16  : X86SchedWritePair;
+defm WriteDiv32  : X86SchedWritePair;
+defm WriteDiv64  : X86SchedWritePair;
+defm WriteIDiv8  : X86SchedWritePair;
+defm WriteIDiv16 : X86SchedWritePair;
+defm WriteIDiv32 : X86SchedWritePair;
+defm WriteIDiv64 : X86SchedWritePair;
+
+defm WriteBSF : X86SchedWritePair; // Bit scan forward.
+defm WriteBSR : X86SchedWritePair; // Bit scan reverse.
+defm WritePOPCNT : X86SchedWritePair; // Bit population count.
+defm WriteLZCNT : X86SchedWritePair; // Leading zero count.
+defm WriteTZCNT : X86SchedWritePair; // Trailing zero count.
+defm WriteCMOV  : X86SchedWritePair; // Conditional move.
+defm WriteCMOV2 : X86SchedWritePair; // Conditional (CF + ZF flag) move.
+def  WriteFCMOV : SchedWrite; // X87 conditional move.
+def  WriteSETCC : SchedWrite; // Set register based on condition code.
+def  WriteSETCCStore : SchedWrite;
+def  WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
+def  WriteBitTest  : SchedWrite; // Bit Test - TODO add memory folding support
+
+// Integer shifts and rotates.
+defm WriteShift : X86SchedWritePair;
+// Double shift instructions.
+def  WriteSHDrri  : SchedWrite;
+def  WriteSHDrrcl : SchedWrite;
+def  WriteSHDmri  : SchedWrite;
+def  WriteSHDmrcl : SchedWrite;
+
+// BMI1 BEXTR, BMI2 BZHI
+defm WriteBEXTR : X86SchedWritePair;
+defm WriteBZHI  : X86SchedWritePair;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+def WriteZero : SchedWrite;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm WriteJump : X86SchedWritePair;
+
+// Floating point. This covers both scalar and vector operations.
+def  WriteFLD0          : SchedWrite;
+def  WriteFLD1          : SchedWrite;
+def  WriteFLDC          : SchedWrite;
+def  WriteFLoad         : SchedWrite;
+def  WriteFLoadX        : SchedWrite;
+def  WriteFLoadY        : SchedWrite;
+def  WriteFMaskedLoad   : SchedWrite;
+def  WriteFMaskedLoadY  : SchedWrite;
+def  WriteFStore        : SchedWrite;
+def  WriteFStoreX       : SchedWrite;
+def  WriteFStoreY       : SchedWrite;
+def  WriteFStoreNT      : SchedWrite;
+def  WriteFStoreNTX     : SchedWrite;
+def  WriteFStoreNTY     : SchedWrite;
+def  WriteFMaskedStore  : SchedWrite;
+def  WriteFMaskedStoreY : SchedWrite;
+def  WriteFMove         : SchedWrite;
+def  WriteFMoveX        : SchedWrite;
+def  WriteFMoveY        : SchedWrite;
+
+defm WriteFAdd    : X86SchedWritePair; // Floating point add/sub.
+defm WriteFAddX   : X86SchedWritePair; // Floating point add/sub (XMM).
+defm WriteFAddY   : X86SchedWritePair; // Floating point add/sub (YMM).
+defm WriteFAddZ   : X86SchedWritePair; // Floating point add/sub (ZMM).
+defm WriteFAdd64  : X86SchedWritePair; // Floating point double add/sub.
+defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM).
+defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM).
+defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM).
+defm WriteFCmp    : X86SchedWritePair; // Floating point compare.
+defm WriteFCmpX   : X86SchedWritePair; // Floating point compare (XMM).
+defm WriteFCmpY   : X86SchedWritePair; // Floating point compare (YMM).
+defm WriteFCmpZ   : X86SchedWritePair; // Floating point compare (ZMM).
+defm WriteFCmp64  : X86SchedWritePair; // Floating point double compare.
+defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM).
+defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM).
+defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM).
+defm WriteFCom    : X86SchedWritePair; // Floating point compare to flags.
+defm WriteFMul    : X86SchedWritePair; // Floating point multiplication.
+defm WriteFMulX   : X86SchedWritePair; // Floating point multiplication (XMM).
+defm WriteFMulY   : X86SchedWritePair; // Floating point multiplication (YMM).
+defm WriteFMulZ   : X86SchedWritePair; // Floating point multiplication (YMM).
+defm WriteFMul64  : X86SchedWritePair; // Floating point double multiplication.
+defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM).
+defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM).
+defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM).
+defm WriteFDiv    : X86SchedWritePair; // Floating point division.
+defm WriteFDivX   : X86SchedWritePair; // Floating point division (XMM).
+defm WriteFDivY   : X86SchedWritePair; // Floating point division (YMM).
+defm WriteFDivZ   : X86SchedWritePair; // Floating point division (ZMM).
+defm WriteFDiv64  : X86SchedWritePair; // Floating point double division.
+defm WriteFDiv64X : X86SchedWritePair; // Floating point double division (XMM).
+defm WriteFDiv64Y : X86SchedWritePair; // Floating point double division (YMM).
+defm WriteFDiv64Z : X86SchedWritePair; // Floating point double division (ZMM).
+defm WriteFSqrt  : X86SchedWritePair; // Floating point square root.
+defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
+defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
+defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM).
+defm WriteFSqrt64  : X86SchedWritePair; // Floating point double square root.
+defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM).
+defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM).
+defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM).
+defm WriteFSqrt80  : X86SchedWritePair; // Floating point long double square root.
+defm WriteFRcp   : X86SchedWritePair; // Floating point reciprocal estimate.
+defm WriteFRcpX  : X86SchedWritePair; // Floating point reciprocal estimate (XMM).
+defm WriteFRcpY  : X86SchedWritePair; // Floating point reciprocal estimate (YMM).
+defm WriteFRcpZ  : X86SchedWritePair; // Floating point reciprocal estimate (ZMM).
+defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
+defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM).
+defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM).
+defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM).
+defm WriteFMA    : X86SchedWritePair; // Fused Multiply Add.
+defm WriteFMAX   : X86SchedWritePair; // Fused Multiply Add (XMM).
+defm WriteFMAY   : X86SchedWritePair; // Fused Multiply Add (YMM).
+defm WriteFMAZ   : X86SchedWritePair; // Fused Multiply Add (ZMM).
+defm WriteDPPD   : X86SchedWritePair; // Floating point double dot product.
+defm WriteDPPS   : X86SchedWritePair; // Floating point single dot product.
+defm WriteDPPSY  : X86SchedWritePair; // Floating point single dot product (YMM).
+defm WriteDPPSZ  : X86SchedWritePair; // Floating point single dot product (ZMM).
+defm WriteFSign  : X86SchedWritePair; // Floating point fabs/fchs.
+defm WriteFRnd   : X86SchedWritePair; // Floating point rounding.
+defm WriteFRndY  : X86SchedWritePair; // Floating point rounding (YMM).
+defm WriteFRndZ  : X86SchedWritePair; // Floating point rounding (ZMM).
+defm WriteFLogic  : X86SchedWritePair; // Floating point and/or/xor logicals.
+defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM).
+defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM).
+defm WriteFTest   : X86SchedWritePair; // Floating point TEST instructions.
+defm WriteFTestY  : X86SchedWritePair; // Floating point TEST instructions (YMM).
+defm WriteFTestZ  : X86SchedWritePair; // Floating point TEST instructions (ZMM).
+defm WriteFShuffle  : X86SchedWritePair; // Floating point vector shuffles.
+defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM).
+defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM).
+defm WriteFVarShuffle  : X86SchedWritePair; // Floating point vector variable shuffles.
+defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM).
+defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM).
+defm WriteFBlend  : X86SchedWritePair; // Floating point vector blends.
+defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM).
+defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM).
+defm WriteFVarBlend  : X86SchedWritePair; // Fp vector variable blends.
+defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM).
+defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM).
+
+// FMA Scheduling helper class.
+class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
+
+// Horizontal Add/Sub (float and integer)
+defm WriteFHAdd  : X86SchedWritePair;
+defm WriteFHAddY : X86SchedWritePair;
+defm WriteFHAddZ : X86SchedWritePair;
+defm WritePHAdd  : X86SchedWritePair;
+defm WritePHAddX : X86SchedWritePair;
+defm WritePHAddY : X86SchedWritePair;
+defm WritePHAddZ : X86SchedWritePair;
+
+// Vector integer operations.
+def  WriteVecLoad         : SchedWrite;
+def  WriteVecLoadX        : SchedWrite;
+def  WriteVecLoadY        : SchedWrite;
+def  WriteVecLoadNT       : SchedWrite;
+def  WriteVecLoadNTY      : SchedWrite;
+def  WriteVecMaskedLoad   : SchedWrite;
+def  WriteVecMaskedLoadY  : SchedWrite;
+def  WriteVecStore        : SchedWrite;
+def  WriteVecStoreX       : SchedWrite;
+def  WriteVecStoreY       : SchedWrite;
+def  WriteVecStoreNT      : SchedWrite;
+def  WriteVecStoreNTY     : SchedWrite;
+def  WriteVecMaskedStore  : SchedWrite;
+def  WriteVecMaskedStoreY : SchedWrite;
+def  WriteVecMove         : SchedWrite;
+def  WriteVecMoveX        : SchedWrite;
+def  WriteVecMoveY        : SchedWrite;
+def  WriteVecMoveToGpr    : SchedWrite;
+def  WriteVecMoveFromGpr  : SchedWrite;
+
+defm WriteVecALU    : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecALUX   : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).
+defm WriteVecALUY   : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM).
+defm WriteVecALUZ   : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM).
+defm WriteVecLogic  : X86SchedWritePair; // Vector integer and/or/xor logicals.
+defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM).
+defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM).
+defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM).
+defm WriteVecTest  : X86SchedWritePair; // Vector integer TEST instructions.
+defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM).
+defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM).
+defm WriteVecShift  : X86SchedWritePair; // Vector integer shifts (default).
+defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM).
+defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM).
+defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM).
+defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default).
+defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM).
+defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM).
+defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM).
+defm WriteVecIMul  : X86SchedWritePair; // Vector integer multiply (default).
+defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM).
+defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM).
+defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM).
+defm WritePMULLD   : X86SchedWritePair; // Vector PMULLD.
+defm WritePMULLDY   : X86SchedWritePair; // Vector PMULLD (YMM).
+defm WritePMULLDZ   : X86SchedWritePair; // Vector PMULLD (ZMM).
+defm WriteShuffle  : X86SchedWritePair; // Vector shuffles.
+defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM).
+defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM).
+defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM).
+defm WriteVarShuffle  : X86SchedWritePair; // Vector variable shuffles.
+defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM).
+defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM).
+defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM).
+defm WriteBlend  : X86SchedWritePair; // Vector blends.
+defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM).
+defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM).
+defm WriteVarBlend  : X86SchedWritePair; // Vector variable blends.
+defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM).
+defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM).
+defm WritePSADBW  : X86SchedWritePair; // Vector PSADBW.
+defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM).
+defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM).
+defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM).
+defm WriteMPSAD  : X86SchedWritePair; // Vector MPSAD.
+defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM).
+defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM).
+defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
+
+// Vector insert/extract operations.
+defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
+def  WriteVecExtract : SchedWrite; // Extract vector element to gpr.
+def  WriteVecExtractSt : SchedWrite; // Extract vector element and store.
+
+// MOVMSK operations.
+def WriteFMOVMSK    : SchedWrite;
+def WriteVecMOVMSK  : SchedWrite;
+def WriteVecMOVMSKY : SchedWrite;
+def WriteMMXMOVMSK  : SchedWrite;
+
+// Conversion between integer and float.
+defm WriteCvtSD2I  : X86SchedWritePair; // Double -> Integer.
+defm WriteCvtPD2I  : X86SchedWritePair; // Double -> Integer (XMM).
+defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM).
+defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM).
+
+defm WriteCvtSS2I  : X86SchedWritePair; // Float -> Integer.
+defm WriteCvtPS2I  : X86SchedWritePair; // Float -> Integer (XMM).
+defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM).
+defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM).
+
+defm WriteCvtI2SD  : X86SchedWritePair; // Integer -> Double.
+defm WriteCvtI2PD  : X86SchedWritePair; // Integer -> Double (XMM).
+defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM).
+defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM).
+
+defm WriteCvtI2SS  : X86SchedWritePair; // Integer -> Float.
+defm WriteCvtI2PS  : X86SchedWritePair; // Integer -> Float (XMM).
+defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM).
+defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM).
+
+defm WriteCvtSS2SD  : X86SchedWritePair; // Float -> Double size conversion.
+defm WriteCvtPS2PD  : X86SchedWritePair; // Float -> Double size conversion (XMM).
+defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM).
+defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM).
+
+defm WriteCvtSD2SS  : X86SchedWritePair; // Double -> Float size conversion.
+defm WriteCvtPD2PS  : X86SchedWritePair; // Double -> Float size conversion (XMM).
+defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM).
+defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM).
+
+defm WriteCvtPH2PS    : X86SchedWritePair; // Half -> Float size conversion.
+defm WriteCvtPH2PSY   : X86SchedWritePair; // Half -> Float size conversion (YMM).
+defm WriteCvtPH2PSZ   : X86SchedWritePair; // Half -> Float size conversion (ZMM).
+
+def  WriteCvtPS2PH    : SchedWrite; // // Float -> Half size conversion.
+def  WriteCvtPS2PHY   : SchedWrite; // // Float -> Half size conversion (YMM).
+def  WriteCvtPS2PHZ   : SchedWrite; // // Float -> Half size conversion (ZMM).
+def  WriteCvtPS2PHSt  : SchedWrite; // // Float -> Half + store size conversion.
+def  WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM).
+def  WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
+
+// CRC32 instruction.
+defm WriteCRC32 : X86SchedWritePair;
+
+// Strings instructions.
+// Packed Compare Implicit Length Strings, Return Mask
+defm WritePCmpIStrM : X86SchedWritePair;
+// Packed Compare Explicit Length Strings, Return Mask
+defm WritePCmpEStrM : X86SchedWritePair;
+// Packed Compare Implicit Length Strings, Return Index
+defm WritePCmpIStrI : X86SchedWritePair;
+// Packed Compare Explicit Length Strings, Return Index
+defm WritePCmpEStrI : X86SchedWritePair;
+
+// AES instructions.
+defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption.
+defm WriteAESIMC : X86SchedWritePair; // InvMixColumn.
+defm WriteAESKeyGen : X86SchedWritePair; // Key Generation.
+
+// Carry-less multiplication instructions.
+defm WriteCLMul : X86SchedWritePair;
+
+// EMMS/FEMMS
+def WriteEMMS : SchedWrite;
+
+// Load/store MXCSR
+def WriteLDMXCSR : SchedWrite;
+def WriteSTMXCSR : SchedWrite;
+
+// Catch-all for expensive system instructions.
+def WriteSystem : SchedWrite;
+
+// AVX2.
+defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles.
+defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles.
+defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
+defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
+defm WriteVarVecShift  : X86SchedWritePair; // Variable vector shifts.
+defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM).
+defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM).
+
+// Old microcoded instructions that nobody use.
+def WriteMicrocoded : SchedWrite;
+
+// Fence instructions.
+def WriteFence : SchedWrite;
+
+// Nop, not very useful expect it provides a model for nops!
+def WriteNop : SchedWrite;
+
+// Move/Load/Store wrappers.
+def WriteFMoveLS
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStore>;
+def WriteFMoveLSX
+ : X86SchedWriteMoveLS<WriteFMoveX, WriteFLoadX, WriteFStoreX>;
+def WriteFMoveLSY
+ : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreY>;
+def SchedWriteFMoveLS
+  : X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
+                              WriteFMoveLSY, WriteFMoveLSY>;
+
+def WriteFMoveLSNT
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
+def WriteFMoveLSNTX
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>;
+def WriteFMoveLSNTY
+ : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>;
+def SchedWriteFMoveLSNT
+  : X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX,
+                              WriteFMoveLSNTY, WriteFMoveLSNTY>;
+
+def WriteVecMoveLS
+ : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
+def WriteVecMoveLSX
+ : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadX, WriteVecStoreX>;
+def WriteVecMoveLSY
+ : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadY, WriteVecStoreY>;
+def SchedWriteVecMoveLS
+  : X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
+                              WriteVecMoveLSY, WriteVecMoveLSY>;
+
+def WriteVecMoveLSNT
+ : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
+def WriteVecMoveLSNTX
+ : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>;
+def WriteVecMoveLSNTY
+ : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>;
+def SchedWriteVecMoveLSNT
+  : X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
+                              WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
+
+// Vector width wrappers.
+def SchedWriteFAdd
+ : X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>;
+def SchedWriteFAdd64
+ : X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Z>;
+def SchedWriteFHAdd
+ : X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddZ>;
+def SchedWriteFCmp
+ : X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpZ>;
+def SchedWriteFCmp64
+ : X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Z>;
+def SchedWriteFMul
+ : X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulZ>;
+def SchedWriteFMul64
+ : X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Z>;
+def SchedWriteFMA
+ : X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAZ>;
+def SchedWriteDPPD
+ : X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>;
+def SchedWriteDPPS
+ : X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSZ>;
+def SchedWriteFDiv
+ : X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>;
+def SchedWriteFDiv64
+ : X86SchedWriteWidths<WriteFDiv64, WriteFDiv64X, WriteFDiv64Y, WriteFDiv64Z>;
+def SchedWriteFSqrt
+ : X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX,
+                       WriteFSqrtY, WriteFSqrtZ>;
+def SchedWriteFSqrt64
+ : X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X,
+                       WriteFSqrt64Y, WriteFSqrt64Z>;
+def SchedWriteFRcp
+ : X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpZ>;
+def SchedWriteFRsqrt
+ : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtZ>;
+def SchedWriteFRnd
+ : X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndZ>;
+def SchedWriteFLogic
+ : X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicZ>;
+def SchedWriteFTest
+ : X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestZ>;
+
+def SchedWriteFShuffle
+ : X86SchedWriteWidths<WriteFShuffle, WriteFShuffle,
+                       WriteFShuffleY, WriteFShuffleZ>;
+def SchedWriteFVarShuffle
+ : X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle,
+                       WriteFVarShuffleY, WriteFVarShuffleZ>;
+def SchedWriteFBlend
+ : X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendZ>;
+def SchedWriteFVarBlend
+ : X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
+                       WriteFVarBlendY, WriteFVarBlendZ>;
+
+def SchedWriteCvtDQ2PD
+ : X86SchedWriteWidths<WriteCvtI2SD, WriteCvtI2PD,
+                       WriteCvtI2PDY, WriteCvtI2PDZ>;
+def SchedWriteCvtDQ2PS
+ : X86SchedWriteWidths<WriteCvtI2SS, WriteCvtI2PS,
+                       WriteCvtI2PSY, WriteCvtI2PSZ>;
+def SchedWriteCvtPD2DQ
+ : X86SchedWriteWidths<WriteCvtSD2I, WriteCvtPD2I,
+                       WriteCvtPD2IY, WriteCvtPD2IZ>;
+def SchedWriteCvtPS2DQ
+ : X86SchedWriteWidths<WriteCvtSS2I, WriteCvtPS2I,
+                       WriteCvtPS2IY, WriteCvtPS2IZ>;
+def SchedWriteCvtPS2PD
+ : X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD,
+                       WriteCvtPS2PDY, WriteCvtPS2PDZ>;
+def SchedWriteCvtPD2PS
+ : X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS,
+                       WriteCvtPD2PSY, WriteCvtPD2PSZ>;
+
+def SchedWriteVecALU
+ : X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUZ>;
+def SchedWritePHAdd
+ : X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddZ>;
+def SchedWriteVecLogic
+ : X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX,
+                       WriteVecLogicY, WriteVecLogicZ>;
+def SchedWriteVecTest
+ : X86SchedWriteWidths<WriteVecTest, WriteVecTest,
+                       WriteVecTestY, WriteVecTestZ>;
+def SchedWriteVecShift
+ : X86SchedWriteWidths<WriteVecShift, WriteVecShiftX,
+                       WriteVecShiftY, WriteVecShiftZ>;
+def SchedWriteVecShiftImm
+ : X86SchedWriteWidths<WriteVecShiftImm, WriteVecShiftImmX,
+                       WriteVecShiftImmY, WriteVecShiftImmZ>;
+def SchedWriteVarVecShift
+ : X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift,
+                       WriteVarVecShiftY, WriteVarVecShiftZ>;
+def SchedWriteVecIMul
+ : X86SchedWriteWidths<WriteVecIMul, WriteVecIMulX,
+                       WriteVecIMulY, WriteVecIMulZ>;
+def SchedWritePMULLD
+ : X86SchedWriteWidths<WritePMULLD, WritePMULLD,
+                       WritePMULLDY, WritePMULLDZ>;
+def SchedWriteMPSAD
+ : X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
+                       WriteMPSADY, WriteMPSADZ>;
+def SchedWritePSADBW
+ : X86SchedWriteWidths<WritePSADBW, WritePSADBWX,
+                       WritePSADBWY, WritePSADBWZ>;
+
+def SchedWriteShuffle
+ : X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
+                       WriteShuffleY, WriteShuffleZ>;
+def SchedWriteVarShuffle
+ : X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX,
+                       WriteVarShuffleY, WriteVarShuffleZ>;
+def SchedWriteBlend
+ : X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendZ>;
+def SchedWriteVarBlend
+ : X86SchedWriteWidths<WriteVarBlend, WriteVarBlend,
+                       WriteVarBlendY, WriteVarBlendZ>;
+
+// Vector size wrappers.
+def SchedWriteFAddSizes
+ : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd64>;
+def SchedWriteFCmpSizes
+ : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp64>;
+def SchedWriteFMulSizes
+ : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>;
+def SchedWriteFDivSizes
+ : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv64>;
+def SchedWriteFSqrtSizes
+ : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>;
+def SchedWriteFLogicSizes
+ : X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic>;
+def SchedWriteFShuffleSizes
+ : X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle>;
+
+//===----------------------------------------------------------------------===//
+// Generic Processor Scheduler Models.
+
+// IssueWidth is analogous to the number of decode units. Core and its
+// descendants, including Nehalem and SandyBridge have 4 decoders.
+// Resources beyond the decoder operate on micro-ops and are buffered
+// so adjacent micro-ops don't directly compete.
+//
+// MicroOpBufferSize > 1 indicates that RAW dependencies can be
+// decoded in the same cycle. The value 32 is a reasonably arbitrary
+// number of in-flight instructions.
+//
+// HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef
+// indicates high latency opcodes. Alternatively, InstrItinData
+// entries may be included here to define specific operand
+// latencies. Since these latencies are not used for pipeline hazards,
+// they do not need to be exact.
+//
+// The GenericX86Model contains no instruction schedules
+// and disables PostRAScheduler.
+class GenericX86Model : SchedMachineModel {
+  let IssueWidth = 4;
+  let MicroOpBufferSize = 32;
+  let LoadLatency = 4;
+  let HighLatency = 10;
+  let PostRAScheduler = 0;
+  let CompleteModel = 0;
+}
+
+def GenericModel : GenericX86Model;
+
+// Define a model with the PostRAScheduler enabled.
+def GenericPostRAModel : GenericX86Model {
+  let PostRAScheduler = 1;
+}
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86ScheduleAtom.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86ScheduleAtom.td
@@ -0,0 +1,917 @@
+//===- X86ScheduleAtom.td - X86 Atom Scheduling Definitions -*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the schedule class data for the Intel Atom
+// in order (Saltwell-32nm/Bonnell-45nm) processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from the "Intel 64 and IA32 Architectures
+// Optimization Reference Manual", Chapter 13, Section 4.
+
+// Atom machine model.
+def AtomModel : SchedMachineModel {
+  let IssueWidth = 2;  // Allows 2 instructions per scheduling group.
+  let MicroOpBufferSize = 0; // In-order execution, always hide latency.
+  let LoadLatency = 3; // Expected cycles, may be overridden.
+  let HighLatency = 30;// Expected, may be overridden.
+
+  // On the Atom, the throughput for taken branches is 2 cycles. For small
+  // simple loops, expand by a small factor to hide the backedge cost.
+  let LoopMicroOpBufferSize = 10;
+  let PostRAScheduler = 1;
+  let CompleteModel = 0;
+}
+
+let SchedModel = AtomModel in {
+
+// Functional Units
+def AtomPort0 : ProcResource<1>; // ALU: ALU0, shift/rotate, load/store
+                                 // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
+def AtomPort1 : ProcResource<1>; // ALU: ALU1, bit processing, jump, and LEA
+                                 // SIMD/FP: SIMD ALU, FP Adder
+
+def AtomPort01 : ProcResGroup<[AtomPort0, AtomPort1]>;
+
+// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 3>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass AtomWriteResPair<X86FoldableSchedWrite SchedRW,
+                            list<ProcResourceKind> RRPorts,
+                            list<ProcResourceKind> RMPorts,
+                            int RRLat = 1, int RMLat = 1,
+                            list<int> RRRes = [1],
+                            list<int> RMRes = [1]> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, RRPorts> {
+    let Latency = RRLat;
+    let ResourceCycles = RRRes;
+  }
+
+  // Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, RMPorts> {
+    let Latency = RMLat;
+    let ResourceCycles = RMRes;
+  }
+}
+
+// A folded store needs a cycle on Port0 for the store data.
+def : WriteRes<WriteRMW, [AtomPort0]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Arithmetic.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteALU,    [AtomPort01], [AtomPort0]>;
+defm : AtomWriteResPair<WriteADC,    [AtomPort01], [AtomPort0]>;
+defm : AtomWriteResPair<WriteIMul,   [AtomPort01], [AtomPort01],  7,  7,  [7],  [7]>;
+defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+
+defm : X86WriteRes<WriteBSWAP32,     [AtomPort0], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64,     [AtomPort0], 1, [1], 1>;
+
+defm : AtomWriteResPair<WriteDiv8,   [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
+defm : AtomWriteResPair<WriteDiv16,  [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
+defm : AtomWriteResPair<WriteDiv32,  [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
+defm : AtomWriteResPair<WriteDiv64,  [AtomPort01], [AtomPort01],130,130,[130],[130]>;
+defm : AtomWriteResPair<WriteIDiv8,  [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteIDiv16, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteIDiv32, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteIDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
+
+defm : X86WriteResPairUnsupported<WriteCRC32>;
+
+defm : AtomWriteResPair<WriteCMOV,  [AtomPort01], [AtomPort0]>;
+defm : AtomWriteResPair<WriteCMOV2, [AtomPort01], [AtomPort0]>;
+defm : X86WriteRes<WriteFCMOV, [AtomPort01], 9, [9], 1>; // x87 conditional move.
+
+def  : WriteRes<WriteSETCC, [AtomPort01]>;
+def  : WriteRes<WriteSETCCStore, [AtomPort01]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+}
+def  : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+}
+def : WriteRes<WriteBitTest,[AtomPort01]>;
+
+defm : X86WriteResUnsupported<WriteIMulH>;
+
+// This is for simple LEAs with one or two input operands.
+def : WriteRes<WriteLEA, [AtomPort1]>;
+
+def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> {
+  let Latency = 8;
+  let ResourceCycles = [8];
+}
+def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>;
+
+def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 6;
+  let ResourceCycles = [6];
+}
+def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>;
+
+def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
+  let Latency = 14;
+  let ResourceCycles = [14];
+}
+def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
+                                         IMUL64rmi8, IMUL64rmi32)>;
+
+// Bit counts.
+defm : AtomWriteResPair<WriteBSF, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
+defm : AtomWriteResPair<WriteBSR, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
+defm : X86WriteResPairUnsupported<WritePOPCNT>;
+defm : X86WriteResPairUnsupported<WriteLZCNT>;
+defm : X86WriteResPairUnsupported<WriteTZCNT>;
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : X86WriteResPairUnsupported<WriteBEXTR>;
+defm : X86WriteResPairUnsupported<WriteBZHI>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer shifts and rotates.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
+
+defm : X86WriteRes<WriteSHDrri, [AtomPort01], 2, [2], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[AtomPort01], 2, [2], 1>;
+defm : X86WriteRes<WriteSHDmri, [AtomPort01], 4, [4], 1>;
+defm : X86WriteRes<WriteSHDmrcl,[AtomPort01], 4, [4], 1>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads, stores, and moves, not folded with other operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLoad,    [AtomPort0]>;
+def : WriteRes<WriteStore,   [AtomPort0]>;
+def : WriteRes<WriteStoreNT, [AtomPort0]>;
+def : WriteRes<WriteMove,    [AtomPort01]>;
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteZero,  []>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteJump, [AtomPort1], [AtomPort1]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Special case scheduling classes.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteSystem,     [AtomPort01]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [AtomPort01]> { let Latency = 100; }
+def : WriteRes<WriteFence,      [AtomPort0]>;
+
+// Nops don't have dependencies, so there's no actual latency, but we set this
+// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
+def : WriteRes<WriteNop, [AtomPort01]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Floating point. This covers both scalar and vector operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteRes<WriteFLD0,       [AtomPort01], 1, [1], 1>;
+defm : X86WriteRes<WriteFLD1,       [AtomPort01], 6, [6], 1>;
+def  : WriteRes<WriteFLoad,         [AtomPort0]>;
+def  : WriteRes<WriteFLoadX,        [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteFLoadY>;
+defm : X86WriteResUnsupported<WriteFMaskedLoad>;
+defm : X86WriteResUnsupported<WriteFMaskedLoadY>;
+
+def  : WriteRes<WriteFStore,        [AtomPort0]>;
+def  : WriteRes<WriteFStoreX,       [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteFStoreY>;
+def  : WriteRes<WriteFStoreNT,      [AtomPort0]>;
+def  : WriteRes<WriteFStoreNTX,     [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteFStoreNTY>;
+defm : X86WriteResUnsupported<WriteFMaskedStore>;
+defm : X86WriteResUnsupported<WriteFMaskedStoreY>;
+
+def  : WriteRes<WriteFMove,         [AtomPort01]>;
+def  : WriteRes<WriteFMoveX,        [AtomPort01]>;
+defm : X86WriteResUnsupported<WriteFMoveY>;
+
+defm : X86WriteRes<WriteEMMS,       [AtomPort01], 5, [5], 1>;
+
+defm : AtomWriteResPair<WriteFAdd,           [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFAddX,          [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : X86WriteResPairUnsupported<WriteFAddY>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
+defm : AtomWriteResPair<WriteFAdd64,         [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFAdd64X,       [AtomPort01], [AtomPort01],  6,  7,  [6],  [7]>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Y>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
+defm : AtomWriteResPair<WriteFCmp,           [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFCmpX,          [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : X86WriteResPairUnsupported<WriteFCmpY>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
+defm : AtomWriteResPair<WriteFCmp64,         [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFCmp64X,       [AtomPort01], [AtomPort01],  6,  7,  [6],  [7]>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Y>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
+defm : AtomWriteResPair<WriteFCom,           [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFMul,           [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
+defm : AtomWriteResPair<WriteFMulX,          [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : X86WriteResPairUnsupported<WriteFMulY>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
+defm : AtomWriteResPair<WriteFMul64,         [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFMul64X,       [AtomPort01], [AtomPort01],  9, 10,  [9], [10]>;
+defm : X86WriteResPairUnsupported<WriteFMul64Y>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
+defm : AtomWriteResPair<WriteFRcp,           [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
+defm : AtomWriteResPair<WriteFRcpX,         [AtomPort01], [AtomPort01],  9, 10,  [9], [10]>;
+defm : X86WriteResPairUnsupported<WriteFRcpY>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
+defm : AtomWriteResPair<WriteFRsqrt,         [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
+defm : AtomWriteResPair<WriteFRsqrtX,       [AtomPort01], [AtomPort01],  9, 10,  [9], [10]>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
+defm : AtomWriteResPair<WriteFDiv,          [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFDivX,         [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : X86WriteResPairUnsupported<WriteFDivY>;
+defm : X86WriteResPairUnsupported<WriteFDivZ>;
+defm : AtomWriteResPair<WriteFDiv64,        [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteFDiv64X,       [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
+defm : AtomWriteResPair<WriteFSqrt,         [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFSqrtX,        [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : X86WriteResPairUnsupported<WriteFSqrtY>;
+defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
+defm : AtomWriteResPair<WriteFSqrt64,       [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteFSqrt64X,      [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
+defm : AtomWriteResPair<WriteFSqrt80,       [AtomPort01], [AtomPort01], 71, 71, [71], [71]>;
+defm : AtomWriteResPair<WriteFSign,          [AtomPort1],  [AtomPort1]>;
+defm : AtomWriteResPair<WriteFRnd,           [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : X86WriteResPairUnsupported<WriteFRndY>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
+defm : AtomWriteResPair<WriteFLogic,        [AtomPort01],  [AtomPort0]>;
+defm : X86WriteResPairUnsupported<WriteFLogicY>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
+defm : AtomWriteResPair<WriteFTest,         [AtomPort01],  [AtomPort0]>;
+defm : X86WriteResPairUnsupported<WriteFTestY>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
+defm : AtomWriteResPair<WriteFShuffle,       [AtomPort0],  [AtomPort0]>;
+defm : X86WriteResPairUnsupported<WriteFShuffleY>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffle>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleY>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
+defm : X86WriteResPairUnsupported<WriteFMA>;
+defm : X86WriteResPairUnsupported<WriteFMAX>;
+defm : X86WriteResPairUnsupported<WriteFMAY>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
+defm : X86WriteResPairUnsupported<WriteDPPD>;
+defm : X86WriteResPairUnsupported<WriteDPPS>;
+defm : X86WriteResPairUnsupported<WriteDPPSY>;
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
+defm : X86WriteResPairUnsupported<WriteFBlend>;
+defm : X86WriteResPairUnsupported<WriteFBlendY>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
+defm : X86WriteResPairUnsupported<WriteFVarBlend>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
+defm : X86WriteResPairUnsupported<WriteFShuffle256>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Conversions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteCvtSS2I,   [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WriteCvtPS2I,   [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IY>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
+defm : AtomWriteResPair<WriteCvtSD2I,   [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WriteCvtPD2I,   [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IY>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
+
+defm : AtomWriteResPair<WriteCvtI2SS,   [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteCvtI2PS,   [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSY>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
+defm : AtomWriteResPair<WriteCvtI2SD,   [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteCvtI2PD,   [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDY>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
+
+defm : AtomWriteResPair<WriteCvtSS2SD,  [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteCvtPS2PD,  [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDY>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
+defm : AtomWriteResPair<WriteCvtSD2SS,  [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteCvtPD2PS,  [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSY>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
+
+defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
+defm : X86WriteResUnsupported<WriteCvtPS2PH>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector integer operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def  : WriteRes<WriteVecLoad,         [AtomPort0]>;
+def  : WriteRes<WriteVecLoadX,        [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteVecLoadY>;
+def  : WriteRes<WriteVecLoadNT,       [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteVecLoadNTY>;
+defm : X86WriteResUnsupported<WriteVecMaskedLoad>;
+defm : X86WriteResUnsupported<WriteVecMaskedLoadY>;
+
+def  : WriteRes<WriteVecStore,        [AtomPort0]>;
+def  : WriteRes<WriteVecStoreX,       [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteVecStoreY>;
+def  : WriteRes<WriteVecStoreNT,      [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteVecStoreNTY>;
+def  : WriteRes<WriteVecMaskedStore,  [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteVecMaskedStoreY>;
+
+def  : WriteRes<WriteVecMove,          [AtomPort0]>;
+def  : WriteRes<WriteVecMoveX,        [AtomPort01]>;
+defm : X86WriteResUnsupported<WriteVecMoveY>;
+defm : X86WriteRes<WriteVecMoveToGpr,   [AtomPort0], 3, [3], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr, [AtomPort0], 1, [1], 1>;
+
+defm : AtomWriteResPair<WriteVecALU,       [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecALUX,      [AtomPort01],  [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WriteVecALUY>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
+defm : AtomWriteResPair<WriteVecLogic,     [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecLogicX,    [AtomPort01],  [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WriteVecLogicY>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
+defm : AtomWriteResPair<WriteVecTest,      [AtomPort01],  [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WriteVecTestY>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
+defm : AtomWriteResPair<WriteVecShift,     [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
+defm : AtomWriteResPair<WriteVecShiftX,    [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
+defm : X86WriteResPairUnsupported<WriteVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
+defm : AtomWriteResPair<WriteVecShiftImm,  [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
+defm : AtomWriteResPair<WriteVecShiftImmX, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
+defm : AtomWriteResPair<WriteVecIMul,       [AtomPort0],  [AtomPort0], 4, 4, [4], [4]>;
+defm : AtomWriteResPair<WriteVecIMulX,      [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
+defm : X86WriteResPairUnsupported<WriteVecIMulY>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
+defm : X86WriteResPairUnsupported<WritePMULLD>;
+defm : X86WriteResPairUnsupported<WritePMULLDY>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
+defm : X86WriteResPairUnsupported<WritePHMINPOS>;
+defm : X86WriteResPairUnsupported<WriteMPSAD>;
+defm : X86WriteResPairUnsupported<WriteMPSADY>;
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
+defm : AtomWriteResPair<WritePSADBW,       [AtomPort01], [AtomPort01], 4, 4, [4], [4]>;
+defm : AtomWriteResPair<WritePSADBWX,       [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
+defm : X86WriteResPairUnsupported<WritePSADBWY>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
+defm : AtomWriteResPair<WriteShuffle,       [AtomPort0],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteShuffleX,      [AtomPort0],  [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WriteShuffleY>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
+defm : AtomWriteResPair<WriteVarShuffle,    [AtomPort0],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVarShuffleX,  [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
+defm : X86WriteResPairUnsupported<WriteBlend>;
+defm : X86WriteResPairUnsupported<WriteBlendY>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
+defm : X86WriteResPairUnsupported<WriteVarBlend>;
+defm : X86WriteResPairUnsupported<WriteVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
+defm : X86WriteResPairUnsupported<WriteShuffle256>;
+defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
+defm : X86WriteResPairUnsupported<WriteVarVecShift>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector insert/extract operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteVecInsert,     [AtomPort0],  [AtomPort0], 1, 1>;
+def  : WriteRes<WriteVecExtract,   [AtomPort0]>;
+def  : WriteRes<WriteVecExtractSt, [AtomPort0]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE42 String instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteResPairUnsupported<WritePCmpIStrI>;
+defm : X86WriteResPairUnsupported<WritePCmpIStrM>;
+defm : X86WriteResPairUnsupported<WritePCmpEStrI>;
+defm : X86WriteResPairUnsupported<WritePCmpEStrM>;
+
+////////////////////////////////////////////////////////////////////////////////
+// MOVMSK Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def  : WriteRes<WriteFMOVMSK,    [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+def  : WriteRes<WriteVecMOVMSK,  [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
+def  : WriteRes<WriteMMXMOVMSK,  [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+
+////////////////////////////////////////////////////////////////////////////////
+// AES instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteResPairUnsupported<WriteAESIMC>;
+defm : X86WriteResPairUnsupported<WriteAESKeyGen>;
+defm : X86WriteResPairUnsupported<WriteAESDecEnc>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub  instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteFHAdd,  [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WriteFHAddY, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WritePHAdd,  [AtomPort01], [AtomPort01], 3, 4, [3], [4]>;
+defm : AtomWriteResPair<WritePHAddX, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WritePHAddY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Carry-less multiplication instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteResPairUnsupported<WriteCLMul>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Load/store MXCSR.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLDMXCSR, [AtomPort01]> { let Latency = 5; let ResourceCycles = [5]; }
+def : WriteRes<WriteSTMXCSR, [AtomPort01]> { let Latency = 15; let ResourceCycles = [15]; }
+
+////////////////////////////////////////////////////////////////////////////////
+// Special Cases.
+////////////////////////////////////////////////////////////////////////////////
+
+// Port0
+def AtomWrite0_1 : SchedWriteRes<[AtomPort0]> {
+  let Latency = 1;
+  let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite0_1], (instrs FXAM, LD_Frr,
+                                     MOVSX64rr32)>;
+def : SchedAlias<WriteALURMW, AtomWrite0_1>;
+def : SchedAlias<WriteADCRMW, AtomWrite0_1>;
+def : InstRW<[AtomWrite0_1], (instregex "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m",
+                                        "MOV(S|Z)X(32|64)rr(8|8_NOREX|16)")>;
+
+def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> {
+  let Latency = 5;
+  let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)")>;
+
+// Port1
+def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
+  let Latency = 1;
+  let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>;
+def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r",
+                                        "BT(C|R|S)?(16|32|64)(rr|ri8)")>;
+
+def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {
+  let Latency = 5;
+  let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite1_5], (instrs MMX_CVTPI2PSirr, MMX_CVTPI2PSirm,
+                                     MMX_CVTPS2PIirr, MMX_CVTTPS2PIirr)>;
+
+// Port0 and Port1
+def AtomWrite0_1_1 : SchedWriteRes<[AtomPort0, AtomPort1]> {
+  let Latency = 1;
+  let ResourceCycles = [1, 1];
+}
+def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r,
+                                       POP16rmr, POP32rmr, POP64rmr,
+                                       PUSH16r, PUSH32r, PUSH64r,
+                                       PUSHi16, PUSHi32,
+                                       PUSH16rmr, PUSH32rmr, PUSH64rmr,
+                                       PUSH16i8, PUSH32i8, PUSH64i8, PUSH64i32,
+                                       XCH_F)>;
+def : InstRW<[AtomWrite0_1_1], (instregex "RETI(L|Q|W)$",
+                                          "IRET(16|32|64)?")>;
+
+def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
+  let Latency = 5;
+  let ResourceCycles = [5, 5];
+}
+def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIirm, MMX_CVTTPS2PIirm)>;
+def : InstRW<[AtomWrite0_1_5], (instregex "ILD_F(16|32|64)")>;
+
+// Port0 or Port1
+def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 1;
+  let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, WAIT,
+                                      LFENCE,
+                                      STOSB, STOSL, STOSQ, STOSW,
+                                      MOVSSrr, MOVSSrr_REV,
+                                      PSLLDQri, PSRLDQri)>;
+def : InstRW<[AtomWrite01_1], (instregex "MMX_PACK(SSDW|SSWB|USWB)irr",
+                                         "MMX_PUNPCKH(BW|DQ|WD)irr")>;
+
+def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+}
+def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
+                                      PUSH16rmm, PUSH32rmm, PUSH64rmm,
+                                      LODSB, LODSL, LODSQ, LODSW,
+                                      SCASB, SCASL, SCASQ, SCASW)>;
+def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
+                                         "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
+                                         "XADD(8|16|32|64)rr",
+                                         "XCHG(8|16|32|64)(ar|rr)",
+                                         "(ST|ISTT)_F(P)?(16|32|64)?(m|rr)",
+                                         "MMX_P(ADD|SUB)Qirr",
+                                         "MOV(S|Z)X16rr8",
+                                         "MOV(UPS|UPD|DQU)mr",
+                                         "MASKMOVDQU(64)?",
+                                         "P(ADD|SUB)Qrr")>;
+
+def AtomWrite01_3 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 3;
+  let ResourceCycles = [3];
+}
+def : InstRW<[AtomWrite01_3], (instrs CLD, LDDQUrm,
+                                      CMPSB, CMPSL, CMPSQ, CMPSW,
+                                      MOVSB, MOVSL, MOVSQ, MOVSW,
+                                      POP16rmm, POP32rmm, POP64rmm)>;
+def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm",
+                                         "XCHG(8|16|32|64)rm",
+                                         "PH(ADD|SUB)Drr",
+                                         "MOV(S|Z)X16rm8",
+                                         "MMX_P(ADD|SUB)Qirm",
+                                         "MOV(UPS|UPD|DQU)rm",
+                                         "P(ADD|SUB)Qrm")>;
+
+def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 4;
+  let ResourceCycles = [4];
+}
+def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO,
+                                      JCXZ, JECXZ, JRCXZ,
+                                      LD_F80m)>;
+def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm",
+                                         "(MMX_)?PEXTRWrr(_REV)?")>;
+
+def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 5;
+  let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, ST_FP80m)>;
+def : InstRW<[AtomWrite01_5], (instregex "MMX_PH(ADD|SUB)S?Wrr")>;
+
+def AtomWrite01_6 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 6;
+  let ResourceCycles = [6];
+}
+def : InstRW<[AtomWrite01_6], (instrs CMPXCHG8rm, INTO, XLAT,
+                                      SHLD16rrCL, SHRD16rrCL,
+                                      SHLD16rri8, SHRD16rri8,
+                                      SHLD16mrCL, SHRD16mrCL,
+                                      SHLD16mri8, SHRD16mri8)>;
+def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr",
+                                         "IST_F(P)?(16|32|64)?m",
+                                         "MMX_PH(ADD|SUB)S?Wrm")>;
+
+def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 7;
+  let ResourceCycles = [7];
+}
+def : InstRW<[AtomWrite01_7], (instrs AAD8i8)>;
+
+def AtomWrite01_8 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 8;
+  let ResourceCycles = [8];
+}
+def : InstRW<[AtomWrite01_8], (instrs LOOPE,
+                                      PUSHA16, PUSHA32,
+                                      SHLD64rrCL, SHRD64rrCL,
+                                      FNSTCW16m)>;
+
+def AtomWrite01_9 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 9;
+  let ResourceCycles = [9];
+}
+def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr,
+                                      POPA16, POPA32,
+                                      PUSHF16, PUSHF32, PUSHF64,
+                                      SHLD64mrCL, SHRD64mrCL,
+                                      SHLD64mri8, SHRD64mri8,
+                                      SHLD64rri8, SHRD64rri8,
+                                      CMPXCHG8rr)>;
+def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F",
+                                         "(U)?COMIS(D|S)rr",
+                                         "CVT(T)?SS2SI64rr(_Int)?")>;
+
+def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 10;
+  let ResourceCycles = [10];
+}
+def : SchedAlias<WriteFLDC, AtomWrite01_10>;
+def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
+                                          "CVT(T)?SS2SI64rm(_Int)?")>;
+
+def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 11;
+  let ResourceCycles = [11];
+}
+def : InstRW<[AtomWrite01_11], (instrs BOUNDS16rm, BOUNDS32rm)>;
+def : InstRW<[AtomWrite01_11], (instregex "BT(C|R|S)(16|32|64)mr")>;
+
+def AtomWrite01_13 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 13;
+  let ResourceCycles = [13];
+}
+def : InstRW<[AtomWrite01_13], (instrs AAA, AAS)>;
+
+def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 14;
+  let ResourceCycles = [14];
+}
+def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
+
+def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 15;
+  let ResourceCycles = [15];
+}
+def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr)>;
+
+def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 17;
+  let ResourceCycles = [17];
+}
+def : InstRW<[AtomWrite01_17], (instrs LOOPNE, PAUSE)>;
+
+def AtomWrite01_18 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 18;
+  let ResourceCycles = [18];
+}
+def : InstRW<[AtomWrite01_18], (instrs CMPXCHG8B, DAA, LOOP)>;
+
+def AtomWrite01_20 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 20;
+  let ResourceCycles = [20];
+}
+def : InstRW<[AtomWrite01_20], (instrs DAS)>;
+
+def AtomWrite01_21 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 21;
+  let ResourceCycles = [21];
+}
+def : InstRW<[AtomWrite01_21], (instrs AAM8i8, STD)>;
+
+def AtomWrite01_22 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 22;
+  let ResourceCycles = [22];
+}
+def : InstRW<[AtomWrite01_22], (instrs CMPXCHG16B)>;
+
+def AtomWrite01_23 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 23;
+  let ResourceCycles = [23];
+}
+def : InstRW<[AtomWrite01_23], (instrs ARPL16mr, ARPL16rr)>;
+
+def AtomWrite01_25 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 25;
+  let ResourceCycles = [25];
+}
+def : InstRW<[AtomWrite01_25], (instrs FNCLEX, FXTRACT)>;
+
+def AtomWrite01_26 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 26;
+  let ResourceCycles = [26];
+}
+def : InstRW<[AtomWrite01_26], (instrs POPF32, POPF64)>;
+
+def AtomWrite01_29 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 29;
+  let ResourceCycles = [29];
+}
+def : InstRW<[AtomWrite01_29], (instregex "POP(DS|ES|FS|GS)(16|32|64)")>;
+
+def AtomWrite01_30 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 30;
+  let ResourceCycles = [30];
+}
+def : InstRW<[AtomWrite01_30], (instrs RDTSC, RDTSCP)>;
+
+def AtomWrite01_32 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 32;
+  let ResourceCycles = [32];
+}
+def : InstRW<[AtomWrite01_32], (instrs ENTER, POPF16)>;
+
+def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 45;
+  let ResourceCycles = [45];
+}
+def : InstRW<[AtomWrite01_45], (instrs MONITORrrr)>;
+
+def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 46;
+  let ResourceCycles = [46];
+}
+def : InstRW<[AtomWrite01_46], (instrs FRNDINT, MWAITrr, RDPMC)>;
+
+def AtomWrite01_48 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 48;
+  let ResourceCycles = [48];
+}
+def : InstRW<[AtomWrite01_48], (instrs POPSS16, POPSS32)>;
+
+def AtomWrite01_55 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 55;
+  let ResourceCycles = [55];
+}
+def : InstRW<[AtomWrite01_55], (instrs FPREM)>;
+
+def AtomWrite01_59 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 59;
+  let ResourceCycles = [59];
+}
+def : InstRW<[AtomWrite01_59], (instrs INSB, INSL, INSW)>;
+
+def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 63;
+  let ResourceCycles = [63];
+}
+def : InstRW<[AtomWrite01_63], (instrs FNINIT)>;
+
+def AtomWrite01_68 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 68;
+  let ResourceCycles = [68];
+}
+def : InstRW<[AtomWrite01_68], (instrs OUT8rr, OUT16rr, OUT32rr)>;
+
+def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 71;
+  let ResourceCycles = [71];
+}
+def : InstRW<[AtomWrite01_71], (instrs FPREM1,
+                                       INVLPG, INVLPGA32, INVLPGA64)>;
+
+def AtomWrite01_72 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 72;
+  let ResourceCycles = [72];
+}
+def : InstRW<[AtomWrite01_72], (instrs OUT8ir, OUT16ir, OUT32ir)>;
+
+def AtomWrite01_74 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 74;
+  let ResourceCycles = [74];
+}
+def : InstRW<[AtomWrite01_74], (instrs OUTSB, OUTSL, OUTSW)>;
+
+def AtomWrite01_77 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 77;
+  let ResourceCycles = [77];
+}
+def : InstRW<[AtomWrite01_77], (instrs FSCALE)>;
+
+def AtomWrite01_78 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 78;
+  let ResourceCycles = [78];
+}
+def : InstRW<[AtomWrite01_78], (instrs RDMSR)>;
+
+def AtomWrite01_79 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 79;
+  let ResourceCycles = [79];
+}
+def : InstRW<[AtomWrite01_79], (instregex "RET(L|Q|W)?$",
+                                          "LRETI?(L|Q|W)")>;
+
+def AtomWrite01_92 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 92;
+  let ResourceCycles = [92];
+}
+def : InstRW<[AtomWrite01_92], (instrs IN8ri, IN16ri, IN32ri)>;
+
+def AtomWrite01_94 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 94;
+  let ResourceCycles = [94];
+}
+def : InstRW<[AtomWrite01_94], (instrs IN8rr, IN16rr, IN32rr)>;
+
+def AtomWrite01_99 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 99;
+  let ResourceCycles = [99];
+}
+def : InstRW<[AtomWrite01_99], (instrs F2XM1)>;
+
+def AtomWrite01_121 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 121;
+  let ResourceCycles = [121];
+}
+def : InstRW<[AtomWrite01_121], (instrs CPUID)>;
+
+def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 127;
+  let ResourceCycles = [127];
+}
+def : InstRW<[AtomWrite01_127], (instrs INT)>;
+
+def AtomWrite01_130 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 130;
+  let ResourceCycles = [130];
+}
+def : InstRW<[AtomWrite01_130], (instrs INT3)>;
+
+def AtomWrite01_140 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 140;
+  let ResourceCycles = [140];
+}
+def : InstRW<[AtomWrite01_140], (instrs FXSAVE, FXSAVE64)>;
+
+def AtomWrite01_141 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 141;
+  let ResourceCycles = [141];
+}
+def : InstRW<[AtomWrite01_141], (instrs FXRSTOR, FXRSTOR64)>;
+
+def AtomWrite01_146 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 146;
+  let ResourceCycles = [146];
+}
+def : InstRW<[AtomWrite01_146], (instrs FYL2X)>;
+
+def AtomWrite01_147 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 147;
+  let ResourceCycles = [147];
+}
+def : InstRW<[AtomWrite01_147], (instrs FYL2XP1)>;
+
+def AtomWrite01_168 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 168;
+  let ResourceCycles = [168];
+}
+def : InstRW<[AtomWrite01_168], (instrs FPTAN)>;
+
+def AtomWrite01_174 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 174;
+  let ResourceCycles = [174];
+}
+def : InstRW<[AtomWrite01_174], (instrs FSINCOS)>;
+def : InstRW<[AtomWrite01_174], (instregex "(COS|SIN)_F")>;
+
+def AtomWrite01_183 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 183;
+  let ResourceCycles = [183];
+}
+def : InstRW<[AtomWrite01_183], (instrs FPATAN)>;
+
+def AtomWrite01_202 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 202;
+  let ResourceCycles = [202];
+}
+def : InstRW<[AtomWrite01_202], (instrs WRMSR)>;
+
+} // SchedModel
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86ScheduleBtVer2.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86ScheduleBtVer2.td
@@ -0,0 +1,682 @@
+//=- X86ScheduleBtVer2.td - X86 BtVer2 (Jaguar) Scheduling ---*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for AMD btver2 (Jaguar) to support
+// instruction scheduling and other instruction cost heuristics. Based off AMD Software
+// Optimization Guide for AMD Family 16h Processors & Instruction Latency appendix.
+//
+//===----------------------------------------------------------------------===//
+
+def BtVer2Model : SchedMachineModel {
+  // All x86 instructions are modeled as a single micro-op, and btver2 can
+  // decode 2 instructions per cycle.
+  let IssueWidth = 2;
+  let MicroOpBufferSize = 64; // Retire Control Unit
+  let LoadLatency = 5; // FPU latency (worse case cf Integer 3 cycle latency)
+  let HighLatency = 25;
+  let MispredictPenalty = 14; // Minimum branch misdirection penalty
+  let PostRAScheduler = 1;
+
+  // FIXME: SSE4/AVX is unimplemented. This flag is set to allow
+  // the scheduler to assign a default model to unrecognized opcodes.
+  let CompleteModel = 0;
+}
+
+let SchedModel = BtVer2Model in {
+
+// Jaguar can issue up to 6 micro-ops in one cycle
+def JALU0 : ProcResource<1>; // Integer Pipe0: integer ALU0 (also handle FP->INT jam)
+def JALU1 : ProcResource<1>; // Integer Pipe1: integer ALU1/MUL/DIV
+def JLAGU : ProcResource<1>; // Integer Pipe2: LAGU
+def JSAGU : ProcResource<1>; // Integer Pipe3: SAGU (also handles 3-operand LEA)
+def JFPU0 : ProcResource<1>; // Vector/FPU Pipe0: VALU0/VIMUL/FPA
+def JFPU1 : ProcResource<1>; // Vector/FPU Pipe1: VALU1/STC/FPM
+
+// The Integer PRF for Jaguar is 64 entries, and it holds the architectural and
+// speculative version of the 64-bit integer registers.
+// Reference: www.realworldtech.com/jaguar/4/
+//
+// The processor always keeps the different parts of an integer register
+// together. An instruction that writes to a part of a register will therefore
+// have a false dependence on any previous write to the same register or any
+// part of it.
+// Reference: Section 21.10 "AMD Bobcat and Jaguar pipeline: Partial register
+// access" - Agner Fog's "microarchitecture.pdf".
+def JIntegerPRF : RegisterFile<64, [GR64, CCR]>;
+
+// The Jaguar FP Retire Queue renames SIMD and FP uOps onto a pool of 72 SSE
+// registers. Operations on 256-bit data types are cracked into two COPs.
+// Reference: www.realworldtech.com/jaguar/4/
+def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2]>;
+
+// The retire control unit (RCU) can track up to 64 macro-ops in-flight. It can
+// retire up to two macro-ops per cycle.
+// Reference: "Software Optimization Guide for AMD Family 16h Processors"
+def JRCU : RetireControlUnit<64, 2>;
+
+// Integer Pipe Scheduler
+def JALU01 : ProcResGroup<[JALU0, JALU1]> {
+  let BufferSize=20;
+}
+
+// AGU Pipe Scheduler
+def JLSAGU : ProcResGroup<[JLAGU, JSAGU]> {
+  let BufferSize=12;
+}
+
+// Fpu Pipe Scheduler
+def JFPU01 : ProcResGroup<[JFPU0, JFPU1]> {
+  let BufferSize=18;
+}
+
+// Functional units
+def JDiv    : ProcResource<1>; // integer division
+def JMul    : ProcResource<1>; // integer multiplication
+def JVALU0  : ProcResource<1>; // vector integer
+def JVALU1  : ProcResource<1>; // vector integer
+def JVIMUL  : ProcResource<1>; // vector integer multiplication
+def JSTC    : ProcResource<1>; // vector store/convert
+def JFPM    : ProcResource<1>; // FP multiplication
+def JFPA    : ProcResource<1>; // FP addition
+
+// Functional unit groups
+def JFPX  : ProcResGroup<[JFPA, JFPM]>;
+def JVALU : ProcResGroup<[JVALU0, JVALU1]>;
+
+// Integer loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 3>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
+                            list<ProcResourceKind> ExePorts,
+                            int Lat, list<int> Res = [], int UOps = 1> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ResourceCycles = Res;
+    let NumMicroOps = UOps;
+  }
+
+  // Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
+    let Latency = !add(Lat, 3);
+    let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
+    let NumMicroOps = UOps;
+  }
+}
+
+multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
+                            list<ProcResourceKind> ExePorts,
+                            int Lat, list<int> Res = [], int UOps = 1> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ResourceCycles = Res;
+    let NumMicroOps = UOps;
+  }
+
+  // Memory variant also uses a cycle on JLAGU and adds 5 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
+    let Latency = !add(Lat, 5);
+    let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
+    let NumMicroOps = UOps;
+  }
+}
+
+multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
+                            list<ProcResourceKind> ExePorts,
+                            int Lat, list<int> Res = [2], int UOps = 2> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ResourceCycles = Res;
+    let NumMicroOps = UOps;
+  }
+
+  // Memory variant also uses 2 cycles on JLAGU and adds 5 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
+    let Latency = !add(Lat, 5);
+    let ResourceCycles = !listconcat([2], Res);
+    let NumMicroOps = UOps;
+  }
+}
+
+// A folded store needs a cycle on the SAGU for the store data.
+def : WriteRes<WriteRMW, [JSAGU]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Arithmetic.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResIntPair<WriteALU,    [JALU01], 1>;
+defm : JWriteResIntPair<WriteADC,    [JALU01], 1, [2]>;
+defm : JWriteResIntPair<WriteIMul,   [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
+defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication
+defm : X86WriteRes<WriteIMulH,       [JALU1], 6, [4], 1>;
+
+defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
+
+defm : JWriteResIntPair<WriteDiv8,   [JALU1, JDiv], 12, [1, 12], 1>;
+defm : JWriteResIntPair<WriteDiv16,  [JALU1, JDiv], 17, [1, 17], 2>;
+defm : JWriteResIntPair<WriteDiv32,  [JALU1, JDiv], 25, [1, 25], 2>;
+defm : JWriteResIntPair<WriteDiv64,  [JALU1, JDiv], 41, [1, 41], 2>;
+defm : JWriteResIntPair<WriteIDiv8,  [JALU1, JDiv], 12, [1, 12], 1>;
+defm : JWriteResIntPair<WriteIDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
+defm : JWriteResIntPair<WriteIDiv32, [JALU1, JDiv], 25, [1, 25], 2>;
+defm : JWriteResIntPair<WriteIDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
+
+defm : JWriteResIntPair<WriteCRC32,  [JALU01], 3, [4], 3>;
+
+defm : JWriteResIntPair<WriteCMOV,  [JALU01], 1>; // Conditional move.
+defm : JWriteResIntPair<WriteCMOV2, [JALU01], 1>; // Conditional (CF + ZF flag) move.
+defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional move.
+def  : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
+def  : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
+def  : WriteRes<WriteLAHFSAHF, [JALU01]>;
+def  : WriteRes<WriteBitTest,[JALU01]>;
+
+// This is for simple LEAs with one or two input operands.
+def : WriteRes<WriteLEA, [JALU01]>;
+
+// Bit counts.
+defm : JWriteResIntPair<WriteBSF, [JALU01], 5, [4], 8>;
+defm : JWriteResIntPair<WriteBSR, [JALU01], 5, [4], 8>;
+defm : JWriteResIntPair<WritePOPCNT,         [JALU01], 1>;
+defm : JWriteResIntPair<WriteLZCNT,          [JALU01], 1>;
+defm : JWriteResIntPair<WriteTZCNT,          [JALU01], 2, [2]>;
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : JWriteResIntPair<WriteBEXTR, [JALU01], 1>;
+defm : X86WriteResPairUnsupported<WriteBZHI>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer shifts and rotates.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResIntPair<WriteShift, [JALU01], 1>;
+
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [JALU01], 3, [6], 6>;
+defm : X86WriteRes<WriteSHDrrcl,[JALU01], 4, [8], 7>;
+defm : X86WriteRes<WriteSHDmri, [JLAGU, JALU01], 9, [1, 22], 8>;
+defm : X86WriteRes<WriteSHDmrcl,[JLAGU, JALU01], 9, [1, 22], 8>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads, stores, and moves, not folded with other operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLoad,    [JLAGU]> { let Latency = 5; }
+def : WriteRes<WriteStore,   [JSAGU]>;
+def : WriteRes<WriteStoreNT, [JSAGU]>;
+def : WriteRes<WriteMove,    [JALU01]>;
+
+// Load/store MXCSR.
+// FIXME: These are copy and pasted from WriteLoad/Store.
+def : WriteRes<WriteLDMXCSR, [JLAGU]> { let Latency = 5; }
+def : WriteRes<WriteSTMXCSR, [JSAGU]>;
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteZero,  []>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResIntPair<WriteJump,  [JALU01], 1>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Special case scheduling classes.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteSystem,     [JALU01]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [JALU01]> { let Latency = 100; }
+def : WriteRes<WriteFence,  [JSAGU]>;
+
+// Nops don't have dependencies, so there's no actual latency, but we set this
+// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
+def : WriteRes<WriteNop, [JALU01]> { let Latency = 1; }
+
+////////////////////////////////////////////////////////////////////////////////
+// Floating point. This covers both scalar and vector operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteRes<WriteFLD0,          [JFPU1, JSTC], 3, [1,1], 1>;
+defm : X86WriteRes<WriteFLD1,          [JFPU1, JSTC], 3, [1,1], 1>;
+defm : X86WriteRes<WriteFLDC,          [JFPU1, JSTC], 3, [1,1], 1>;
+defm : X86WriteRes<WriteFLoad,         [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFLoadX,        [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFLoadY,        [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFMaskedLoad,   [JLAGU, JFPU01, JFPX], 6, [1, 1, 2], 1>;
+defm : X86WriteRes<WriteFMaskedLoadY,  [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
+
+defm : X86WriteRes<WriteFStore,        [JSAGU, JFPU1,  JSTC], 2, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreX,       [JSAGU, JFPU1,  JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreY,       [JSAGU, JFPU1,  JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNT,      [JSAGU, JFPU1,  JSTC], 3, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNTX,     [JSAGU, JFPU1,  JSTC], 3, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNTY,     [JSAGU, JFPU1,  JSTC], 3, [2, 2, 2], 1>;
+defm : X86WriteRes<WriteFMaskedStore,  [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
+defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
+
+defm : X86WriteRes<WriteFMove,         [JFPU01, JFPX], 1, [1, 1], 1>;
+defm : X86WriteRes<WriteFMoveX,        [JFPU01, JFPX], 1, [1, 1], 1>;
+defm : X86WriteRes<WriteFMoveY,        [JFPU01, JFPX], 1, [2, 2], 2>;
+
+defm : X86WriteRes<WriteEMMS,          [JFPU01, JFPX], 2, [1, 1], 1>;
+
+defm : JWriteResFpuPair<WriteFAdd,         [JFPU0, JFPA],  3>;
+defm : JWriteResFpuPair<WriteFAddX,        [JFPU0, JFPA],  3>;
+defm : JWriteResYMMPair<WriteFAddY,        [JFPU0, JFPA],  3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
+defm : JWriteResFpuPair<WriteFAdd64,       [JFPU0, JFPA],  3>;
+defm : JWriteResFpuPair<WriteFAdd64X,      [JFPU0, JFPA],  3>;
+defm : JWriteResYMMPair<WriteFAdd64Y,      [JFPU0, JFPA],  3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
+defm : JWriteResFpuPair<WriteFCmp,         [JFPU0, JFPA],  2>;
+defm : JWriteResFpuPair<WriteFCmpX,        [JFPU0, JFPA],  2>;
+defm : JWriteResYMMPair<WriteFCmpY,        [JFPU0, JFPA],  2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
+defm : JWriteResFpuPair<WriteFCmp64,       [JFPU0, JFPA],  2>;
+defm : JWriteResFpuPair<WriteFCmp64X,      [JFPU0, JFPA],  2>;
+defm : JWriteResYMMPair<WriteFCmp64Y,      [JFPU0, JFPA],  2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
+defm : JWriteResFpuPair<WriteFCom,  [JFPU0, JFPA, JALU0],  3>;
+defm : JWriteResFpuPair<WriteFMul,         [JFPU1, JFPM],  2>;
+defm : JWriteResFpuPair<WriteFMulX,        [JFPU1, JFPM],  2>;
+defm : JWriteResYMMPair<WriteFMulY,        [JFPU1, JFPM],  2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
+defm : JWriteResFpuPair<WriteFMul64,       [JFPU1, JFPM],  4, [1,2]>;
+defm : JWriteResFpuPair<WriteFMul64X,      [JFPU1, JFPM],  4, [1,2]>;
+defm : JWriteResYMMPair<WriteFMul64Y,      [JFPU1, JFPM],  4, [2,4], 2>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
+defm : X86WriteResPairUnsupported<WriteFMA>;
+defm : X86WriteResPairUnsupported<WriteFMAX>;
+defm : X86WriteResPairUnsupported<WriteFMAY>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
+defm : JWriteResFpuPair<WriteDPPD,   [JFPU1, JFPM, JFPA],  9, [1, 3, 3],  3>;
+defm : JWriteResFpuPair<WriteDPPS,   [JFPU1, JFPM, JFPA], 11, [1, 3, 3],  5>;
+defm : JWriteResYMMPair<WriteDPPSY,  [JFPU1, JFPM, JFPA], 12, [2, 6, 6], 10>;
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
+defm : JWriteResFpuPair<WriteFRcp,         [JFPU1, JFPM],  2>;
+defm : JWriteResFpuPair<WriteFRcpX,        [JFPU1, JFPM],  2>;
+defm : JWriteResYMMPair<WriteFRcpY,        [JFPU1, JFPM],  2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
+defm : JWriteResFpuPair<WriteFRsqrt,       [JFPU1, JFPM],  2>;
+defm : JWriteResFpuPair<WriteFRsqrtX,      [JFPU1, JFPM],  2>;
+defm : JWriteResYMMPair<WriteFRsqrtY,      [JFPU1, JFPM],  2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
+defm : JWriteResFpuPair<WriteFDiv,         [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResFpuPair<WriteFDivX,        [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResYMMPair<WriteFDivY,        [JFPU1, JFPM], 38, [2, 38], 2>;
+defm : X86WriteResPairUnsupported<WriteFDivZ>;
+defm : JWriteResFpuPair<WriteFDiv64,       [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResFpuPair<WriteFDiv64X,      [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResYMMPair<WriteFDiv64Y,      [JFPU1, JFPM], 38, [2, 38], 2>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
+defm : JWriteResFpuPair<WriteFSqrt,        [JFPU1, JFPM], 21, [1, 21]>;
+defm : JWriteResFpuPair<WriteFSqrtX,       [JFPU1, JFPM], 21, [1, 21]>;
+defm : JWriteResYMMPair<WriteFSqrtY,       [JFPU1, JFPM], 42, [2, 42], 2>;
+defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
+defm : JWriteResFpuPair<WriteFSqrt64,      [JFPU1, JFPM], 27, [1, 27]>;
+defm : JWriteResFpuPair<WriteFSqrt64X,     [JFPU1, JFPM], 27, [1, 27]>;
+defm : JWriteResYMMPair<WriteFSqrt64Y,     [JFPU1, JFPM], 54, [2, 54], 2>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
+defm : JWriteResFpuPair<WriteFSqrt80,      [JFPU1, JFPM], 35, [1, 35]>;
+defm : JWriteResFpuPair<WriteFSign,        [JFPU1, JFPM],  2>;
+defm : JWriteResFpuPair<WriteFRnd,         [JFPU1, JSTC],  3>;
+defm : JWriteResYMMPair<WriteFRndY,        [JFPU1, JSTC],  3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
+defm : JWriteResFpuPair<WriteFLogic,      [JFPU01, JFPX],  1>;
+defm : JWriteResYMMPair<WriteFLogicY,     [JFPU01, JFPX],  1, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
+defm : JWriteResFpuPair<WriteFTest,       [JFPU0, JFPA, JALU0], 3>;
+defm : JWriteResYMMPair<WriteFTestY ,     [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
+defm : JWriteResFpuPair<WriteFShuffle,    [JFPU01, JFPX],  1>;
+defm : JWriteResYMMPair<WriteFShuffleY,   [JFPU01, JFPX],  1, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
+defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX],  2, [1, 4], 3>;
+defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX],  3, [2, 6], 6>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
+defm : JWriteResFpuPair<WriteFBlend,      [JFPU01, JFPX],  1>;
+defm : JWriteResYMMPair<WriteFBlendY,     [JFPU01, JFPX],  1, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
+defm : JWriteResFpuPair<WriteFVarBlend,   [JFPU01, JFPX],  2, [1, 4], 3>;
+defm : JWriteResYMMPair<WriteFVarBlendY,  [JFPU01, JFPX],  3, [2, 6], 6>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
+defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX],  1>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Conversions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResFpuPair<WriteCvtSS2I,      [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtPS2I,      [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPS2IY,     [JFPU1, JSTC], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
+defm : JWriteResFpuPair<WriteCvtSD2I,      [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtPD2I,      [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPD2IY,     [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
+
+// FIXME: f+3 ST, LD+STC latency
+defm : JWriteResFpuPair<WriteCvtI2SS,      [JFPU1, JSTC], 9, [1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtI2PS,      [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtI2PSY,     [JFPU1, JSTC], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
+defm : JWriteResFpuPair<WriteCvtI2SD,      [JFPU1, JSTC], 9, [1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtI2PD,      [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtI2PDY,     [JFPU1, JSTC], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
+
+defm : JWriteResFpuPair<WriteCvtSS2SD,      [JFPU1, JSTC], 7, [1,2], 2>;
+defm : JWriteResFpuPair<WriteCvtPS2PD,      [JFPU1, JSTC], 2, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPS2PDY,     [JFPU1, JSTC], 2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
+
+defm : JWriteResFpuPair<WriteCvtSD2SS,    [JFPU1, JSTC], 7, [1,2], 2>;
+defm : JWriteResFpuPair<WriteCvtPD2PS,    [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPD2PSY,   [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
+
+defm : JWriteResFpuPair<WriteCvtPH2PS,     [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPH2PSY,    [JFPU1, JSTC], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
+
+defm : X86WriteRes<WriteCvtPS2PH,                 [JFPU1, JSTC], 3, [1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHY,          [JFPU1, JSTC, JFPX], 6, [2,2,2], 3>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
+defm : X86WriteRes<WriteCvtPS2PHSt,        [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector integer operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteRes<WriteVecLoad,          [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadX,         [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadY,         [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadNT,        [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY,       [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecMaskedLoad,    [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
+defm : X86WriteRes<WriteVecMaskedLoadY,   [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
+
+defm : X86WriteRes<WriteVecStore,         [JSAGU, JFPU1,   JSTC], 2, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreX,        [JSAGU, JFPU1,   JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreY,        [JSAGU, JFPU1,   JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreNT,       [JSAGU, JFPU1,   JSTC], 2, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreNTY,      [JSAGU, JFPU1,   JSTC], 2, [2, 2, 2], 1>;
+defm : X86WriteRes<WriteVecMaskedStore,   [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
+defm : X86WriteRes<WriteVecMaskedStoreY,  [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
+
+defm : X86WriteRes<WriteVecMove,          [JFPU01, JVALU], 1, [1, 1], 1>;
+defm : X86WriteRes<WriteVecMoveX,         [JFPU01, JVALU], 1, [1, 1], 1>;
+defm : X86WriteRes<WriteVecMoveY,         [JFPU01, JVALU], 1, [2, 2], 2>;
+defm : X86WriteRes<WriteVecMoveToGpr,     [JFPU0, JFPA, JALU0], 4, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr,   [JFPU01, JFPX], 8, [1, 1], 2>;
+
+defm : JWriteResFpuPair<WriteVecALU,      [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecALUX,     [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecALUY>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
+defm : JWriteResFpuPair<WriteVecShift,    [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecShiftX,   [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
+defm : JWriteResFpuPair<WriteVecShiftImm, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
+defm : X86WriteResPairUnsupported<WriteVarVecShift>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
+defm : JWriteResFpuPair<WriteVecIMul,     [JFPU0, JVIMUL], 2>;
+defm : JWriteResFpuPair<WriteVecIMulX,    [JFPU0, JVIMUL], 2>;
+defm : X86WriteResPairUnsupported<WriteVecIMulY>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
+defm : JWriteResFpuPair<WritePMULLD,      [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
+defm : X86WriteResPairUnsupported<WritePMULLDY>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
+defm : JWriteResFpuPair<WriteMPSAD,       [JFPU0, JVIMUL], 3, [1, 2]>;
+defm : X86WriteResPairUnsupported<WriteMPSADY>;
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
+defm : JWriteResFpuPair<WritePSADBW,      [JFPU01, JVALU], 2>;
+defm : JWriteResFpuPair<WritePSADBWX,     [JFPU01, JVALU], 2>;
+defm : X86WriteResPairUnsupported<WritePSADBWY>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
+defm : JWriteResFpuPair<WritePHMINPOS,    [JFPU0,  JVALU], 2>;
+defm : JWriteResFpuPair<WriteShuffle,     [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteShuffleX,    [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteShuffleY>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
+defm : JWriteResFpuPair<WriteVarShuffle,  [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : JWriteResFpuPair<WriteVarShuffleX, [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
+defm : JWriteResFpuPair<WriteBlend,       [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteBlendY>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
+defm : JWriteResFpuPair<WriteVarBlend,    [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : X86WriteResPairUnsupported<WriteVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
+defm : JWriteResFpuPair<WriteVecLogic,    [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecLogicX,   [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecLogicY>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
+defm : JWriteResFpuPair<WriteVecTest,     [JFPU0, JFPA, JALU0], 3>;
+defm : JWriteResYMMPair<WriteVecTestY,    [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
+defm : X86WriteResPairUnsupported<WriteShuffle256>;
+defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector insert/extract operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteRes<WriteVecInsert,      [JFPU01, JVALU], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecInsertLd,    [JFPU01, JVALU, JLAGU], 4, [1,1,1], 1>;
+defm : X86WriteRes<WriteVecExtract,     [JFPU0, JFPA, JALU0], 3, [1,1,1], 1>;
+defm : X86WriteRes<WriteVecExtractSt,   [JFPU1, JSTC, JSAGU], 3, [1,1,1], 1>;
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE42 String instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResFpuPair<WritePCmpIStrI, [JFPU1, JVALU1, JFPA, JALU0], 7, [1, 2, 1, 1], 3>;
+defm : JWriteResFpuPair<WritePCmpIStrM, [JFPU1, JVALU1, JFPA, JALU0], 8, [1, 2, 1, 1], 3>;
+defm : JWriteResFpuPair<WritePCmpEStrI, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
+defm : JWriteResFpuPair<WritePCmpEStrM, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
+
+////////////////////////////////////////////////////////////////////////////////
+// MOVMSK Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def  : WriteRes<WriteFMOVMSK,    [JFPU0, JFPA, JALU0]> { let Latency = 3; }
+def  : WriteRes<WriteVecMOVMSK,  [JFPU0, JFPA, JALU0]> { let Latency = 3; }
+defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
+def  : WriteRes<WriteMMXMOVMSK,  [JFPU0, JFPA, JALU0]> { let Latency = 3; }
+
+////////////////////////////////////////////////////////////////////////////////
+// AES Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResFpuPair<WriteAESIMC,      [JFPU0, JVIMUL], 2>;
+defm : JWriteResFpuPair<WriteAESKeyGen,   [JFPU0, JVIMUL], 2>;
+defm : JWriteResFpuPair<WriteAESDecEnc,   [JFPU0, JVIMUL], 3, [1, 1], 2>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub  instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResFpuPair<WriteFHAdd,         [JFPU0, JFPA], 3>;
+defm : JWriteResYMMPair<WriteFHAddY,        [JFPU0, JFPA], 3, [2,2], 2>;
+defm : JWriteResFpuPair<WritePHAdd,       [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WritePHAddX,      [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WritePHAddY>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Carry-less multiplication instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResFpuPair<WriteCLMul,       [JFPU0, JVIMUL], 2>;
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE4A instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def JWriteINSERTQ: SchedWriteRes<[JFPU01, JVALU]> {
+  let Latency = 2;
+  let ResourceCycles = [1, 4];
+}
+def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// AVX instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
+  let Latency = 6;
+  let ResourceCycles = [1, 2, 4];
+  let NumMicroOps = 2;
+}
+def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
+                                                         VBROADCASTSSYrm)>;
+
+def JWriteJVZEROALL: SchedWriteRes<[]> {
+  let Latency = 90;
+  let NumMicroOps = 73;
+}
+def : InstRW<[JWriteJVZEROALL], (instrs VZEROALL)>;
+
+def JWriteJVZEROUPPER: SchedWriteRes<[]> {
+  let Latency = 46;
+  let NumMicroOps = 37;
+}
+def : InstRW<[JWriteJVZEROUPPER], (instrs VZEROUPPER)>;
+
+///////////////////////////////////////////////////////////////////////////////
+//  SchedWriteVariant definitions.
+///////////////////////////////////////////////////////////////////////////////
+
+def JWriteZeroLatency : SchedWriteRes<[]> {
+  let Latency = 0;
+}
+
+// Certain instructions that use the same register for both source
+// operands do not have a real dependency on the previous contents of the
+// register, and thus, do not have to wait before completing. They can be
+// optimized out at register renaming stage.
+// Reference: Section 10.8 of the "Software Optimization Guide for AMD Family
+// 15h Processors".
+// Reference: Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// Section 21.8 [Dependency-breaking instructions].
+
+def JWriteZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteALU]>
+]>;
+def : InstRW<[JWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+                                        XOR32rr, XOR64rr)>;
+
+def JWriteFZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteFLogic]>
+]>;
+def : InstRW<[JWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, VXORPDrr,
+                                         ANDNPSrr, VANDNPSrr,
+                                         ANDNPDrr, VANDNPDrr)>;
+
+def JWriteVZeroIdiomLogic : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecLogic]>
+]>;
+def : InstRW<[JWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
+
+def JWriteVZeroIdiomLogicX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecLogicX]>
+]>;
+def : InstRW<[JWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
+                                               PANDNrr, VPANDNrr)>;
+
+def JWriteVZeroIdiomALU : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecALU]>
+]>;
+def : InstRW<[JWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr,
+                                            MMX_PSUBQirr, MMX_PSUBWirr,
+                                            MMX_PCMPGTBirr, MMX_PCMPGTDirr,
+                                            MMX_PCMPGTWirr)>;
+
+def JWriteVZeroIdiomALUX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecALUX]>
+]>;
+def : InstRW<[JWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+                                             PSUBDrr, VPSUBDrr,
+                                             PSUBQrr, VPSUBQrr,
+                                             PSUBWrr, VPSUBWrr,
+                                             PCMPGTBrr, VPCMPGTBrr,
+                                             PCMPGTDrr, VPCMPGTDrr,
+                                             PCMPGTQrr, VPCMPGTQrr,
+                                             PCMPGTWrr, VPCMPGTWrr)>;
+
+// This write is used for slow LEA instructions.
+def JWrite3OpsLEA : SchedWriteRes<[JALU1, JSAGU]> {
+  let Latency = 2;
+}
+
+// On Jaguar, a slow LEA is either a 3Ops LEA (base, index, offset), or an LEA
+// with a `Scale` value different than 1.
+def JSlowLEAPredicate : MCSchedPredicate<
+  CheckAny<[
+    // A 3-operand LEA (base, index, offset).
+    IsThreeOperandsLEAFn,
+    // An LEA with a "Scale" different than 1.
+    CheckAll<[
+      CheckIsImmOperand<2>,
+      CheckNot<CheckImmOperand<2, 1>>
+    ]>
+  ]>
+>;
+
+def JWriteLEA : SchedWriteVariant<[
+    SchedVar<JSlowLEAPredicate,          [JWrite3OpsLEA]>,
+    SchedVar<MCSchedPredicate<TruePred>, [WriteLEA]>
+]>;
+
+def : InstRW<[JWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
+
+def JSlowLEA16r : SchedWriteRes<[JALU01]> {
+  let Latency = 3;
+  let ResourceCycles = [4];
+}
+
+def : InstRW<[JSlowLEA16r], (instrs LEA16r)>;
+
+} // SchedModel
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86ScheduleSLM.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86ScheduleSLM.td
@@ -0,0 +1,486 @@
+//=- X86ScheduleSLM.td - X86 Silvermont Scheduling -----------*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Intel Silvermont to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def SLMModel : SchedMachineModel {
+  // All x86 instructions are modeled as a single micro-op, and SLM can decode 2
+  // instructions per cycle.
+  let IssueWidth = 2;
+  let MicroOpBufferSize = 32; // Based on the reorder buffer.
+  let LoadLatency = 3;
+  let MispredictPenalty = 10;
+  let PostRAScheduler = 1;
+
+  // For small loops, expand by a small factor to hide the backedge cost.
+  let LoopMicroOpBufferSize = 10;
+
+  // FIXME: SSE4 is unimplemented. This flag is set to allow
+  // the scheduler to assign a default model to unrecognized opcodes.
+  let CompleteModel = 0;
+}
+
+let SchedModel = SLMModel in {
+
+// Silvermont has 5 reservation stations for micro-ops
+def SLM_IEC_RSV0 : ProcResource<1>;
+def SLM_IEC_RSV1 : ProcResource<1>;
+def SLM_FPC_RSV0 : ProcResource<1> { let BufferSize = 1; }
+def SLM_FPC_RSV1 : ProcResource<1> { let BufferSize = 1; }
+def SLM_MEC_RSV  : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def SLM_IEC_RSV01  : ProcResGroup<[SLM_IEC_RSV0, SLM_IEC_RSV1]>;
+def SLM_FPC_RSV01  : ProcResGroup<[SLM_FPC_RSV0, SLM_FPC_RSV1]>;
+
+def SLMDivider      : ProcResource<1>;
+def SLMFPMultiplier : ProcResource<1>;
+def SLMFPDivider    : ProcResource<1>;
+
+// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 3>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
+                           list<ProcResourceKind> ExePorts,
+                           int Lat, list<int> Res = [1], int UOps = 1,
+                           int LoadLat = 3> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ResourceCycles = Res;
+    let NumMicroOps = UOps;
+  }
+
+  // Memory variant also uses a cycle on MEC_RSV and adds LoadLat cycles to
+  // the latency (default = 3).
+  def : WriteRes<SchedRW.Folded, !listconcat([SLM_MEC_RSV], ExePorts)> {
+    let Latency = !add(Lat, LoadLat);
+    let ResourceCycles = !listconcat([1], Res);
+    let NumMicroOps = UOps;
+  }
+}
+
+// A folded store needs a cycle on MEC_RSV for the store data, but it does not
+// need an extra port cycle to recompute the address.
+def : WriteRes<WriteRMW, [SLM_MEC_RSV]>;
+
+def : WriteRes<WriteStore,   [SLM_IEC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteStoreNT, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteLoad,    [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteMove,    [SLM_IEC_RSV01]>;
+def : WriteRes<WriteZero,    []>;
+
+// Load/store MXCSR.
+// FIXME: These are probably wrong. They are copy pasted from WriteStore/Load.
+def : WriteRes<WriteSTMXCSR, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteLDMXCSR,  [SLM_MEC_RSV]> { let Latency = 3; }
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+defm : SLMWriteResPair<WriteALU,    [SLM_IEC_RSV01], 1>;
+defm : SLMWriteResPair<WriteADC,    [SLM_IEC_RSV01], 1>;
+defm : SLMWriteResPair<WriteIMul,   [SLM_IEC_RSV1],  3>;
+defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1],  3>;
+
+defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
+
+defm : SLMWriteResPair<WriteShift,  [SLM_IEC_RSV0],  1>;
+
+defm : X86WriteRes<WriteSHDrri, [SLM_IEC_RSV0],  1, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[SLM_IEC_RSV0],  1, [1], 1>;
+defm : X86WriteRes<WriteSHDmri, [SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
+defm : X86WriteRes<WriteSHDmrcl,[SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
+
+defm : SLMWriteResPair<WriteJump,   [SLM_IEC_RSV1],  1>;
+defm : SLMWriteResPair<WriteCRC32,  [SLM_IEC_RSV1],  3>;
+
+defm : SLMWriteResPair<WriteCMOV,  [SLM_IEC_RSV01], 2, [2]>;
+defm : SLMWriteResPair<WriteCMOV2, [SLM_IEC_RSV01], 2, [2]>;
+defm : X86WriteRes<WriteFCMOV, [SLM_FPC_RSV1], 3, [1], 1>; // x87 conditional move.
+def  : WriteRes<WriteSETCC, [SLM_IEC_RSV01]>;
+def  : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
+  // FIXME Latency and NumMicrOps?
+  let ResourceCycles = [2,1];
+}
+def  : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
+def  : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
+
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [SLM_IEC_RSV1]>;
+
+// Bit counts.
+defm : SLMWriteResPair<WriteBSF, [SLM_IEC_RSV01], 10, [20], 10>;
+defm : SLMWriteResPair<WriteBSR, [SLM_IEC_RSV01], 10, [20], 10>;
+defm : SLMWriteResPair<WriteLZCNT,          [SLM_IEC_RSV0], 3>;
+defm : SLMWriteResPair<WriteTZCNT,          [SLM_IEC_RSV0], 3>;
+defm : SLMWriteResPair<WritePOPCNT,         [SLM_IEC_RSV0], 3>;
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : X86WriteResPairUnsupported<WriteBEXTR>;
+defm : X86WriteResPairUnsupported<WriteBZHI>;
+
+defm : SLMWriteResPair<WriteDiv8,   [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteDiv16,  [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteDiv32,  [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteDiv64,  [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteIDiv8,  [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteIDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteIDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteIDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+
+// Scalar and vector floating point.
+defm : X86WriteRes<WriteFLD0,       [SLM_FPC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteFLD1,       [SLM_FPC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteFLDC,       [SLM_FPC_RSV01], 1, [2], 2>;
+def  : WriteRes<WriteFLoad,         [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteFLoadX,        [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteFLoadY,        [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteFMaskedLoad,   [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteFMaskedLoadY,  [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteFStore,        [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFStoreX,       [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFStoreY,       [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFStoreNT,      [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFStoreNTX,     [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFStoreNTY,     [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFMaskedStore,  [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFMove,         [SLM_FPC_RSV01]>;
+def  : WriteRes<WriteFMoveX,        [SLM_FPC_RSV01]>;
+def  : WriteRes<WriteFMoveY,        [SLM_FPC_RSV01]>;
+defm : X86WriteRes<WriteEMMS,       [SLM_FPC_RSV01], 10, [10], 9>;
+
+defm : SLMWriteResPair<WriteFAdd,     [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAddX,    [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAddY,    [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
+defm : SLMWriteResPair<WriteFAdd64,   [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAdd64X,  [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAdd64Y,  [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
+defm : SLMWriteResPair<WriteFCmp,     [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmpX,    [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmpY,    [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
+defm : SLMWriteResPair<WriteFCmp64,   [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmp64X,  [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmp64Y,  [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
+defm : SLMWriteResPair<WriteFCom,     [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFMul,     [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMulX,    [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMulY,    [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
+defm : SLMWriteResPair<WriteFMul64,   [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMul64X,  [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMul64Y,  [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
+defm : SLMWriteResPair<WriteFDiv,     [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
+defm : SLMWriteResPair<WriteFDivX,    [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : SLMWriteResPair<WriteFDivY,    [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : X86WriteResPairUnsupported<WriteFDivZ>;
+defm : SLMWriteResPair<WriteFDiv64,   [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
+defm : SLMWriteResPair<WriteFDiv64X,  [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
+defm : SLMWriteResPair<WriteFDiv64Y,  [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
+defm : SLMWriteResPair<WriteFRcp,     [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRcpX,    [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRcpY,    [SLM_FPC_RSV0], 5>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
+defm : SLMWriteResPair<WriteFRsqrt,   [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRsqrtX,  [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRsqrtY,  [SLM_FPC_RSV0], 5>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
+defm : SLMWriteResPair<WriteFSqrt,    [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrtX,   [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrtY,   [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
+defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
+defm : SLMWriteResPair<WriteFSqrt64,  [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
+defm : SLMWriteResPair<WriteFSqrt80,  [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
+defm : SLMWriteResPair<WriteDPPD,   [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteDPPS,   [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteDPPSY,  [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
+defm : SLMWriteResPair<WriteFSign,  [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteFRnd,   [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFRndY,  [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
+defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
+defm : SLMWriteResPair<WriteFTest,  [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteFTestY, [SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
+defm : SLMWriteResPair<WriteFShuffle,  [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFShuffleY, [SLM_FPC_RSV0], 1>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
+defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
+defm : SLMWriteResPair<WriteFBlend,  [SLM_FPC_RSV0],  1>;
+
+// Conversion between integer and float.
+defm : SLMWriteResPair<WriteCvtSS2I,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPS2I,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPS2IY,  [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
+defm : SLMWriteResPair<WriteCvtSD2I,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPD2I,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPD2IY,  [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
+
+defm : SLMWriteResPair<WriteCvtI2SS,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtI2PS,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtI2PSY,  [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
+defm : SLMWriteResPair<WriteCvtI2SD,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtI2PD,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtI2PDY,  [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
+
+defm : SLMWriteResPair<WriteCvtSS2SD,  [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPS2PD,  [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
+defm : SLMWriteResPair<WriteCvtSD2SS,  [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPD2PS,  [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
+
+// Vector integer operations.
+def  : WriteRes<WriteVecLoad,         [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecLoadX,        [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecLoadY,        [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecLoadNT,       [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecLoadNTY,      [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecMaskedLoad,   [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecMaskedLoadY,  [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecStore,        [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecStoreX,       [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecStoreY,       [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecStoreNT,      [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecStoreNTY,     [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecMaskedStore,  [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecMove,         [SLM_FPC_RSV01]>;
+def  : WriteRes<WriteVecMoveX,        [SLM_FPC_RSV01]>;
+def  : WriteRes<WriteVecMoveY,        [SLM_FPC_RSV01]>;
+def  : WriteRes<WriteVecMoveToGpr,    [SLM_IEC_RSV01]>;
+def  : WriteRes<WriteVecMoveFromGpr,  [SLM_IEC_RSV01]>;
+
+defm : SLMWriteResPair<WriteVecShift,    [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVecShiftX,   [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVecShiftY,   [SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
+defm : SLMWriteResPair<WriteVecShiftImm, [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVecShiftImmX,[SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVecShiftImmY,[SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
+defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteVecLogicX,[SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
+defm : SLMWriteResPair<WriteVecTest,  [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteVecTestY, [SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
+defm : SLMWriteResPair<WriteVecALU,   [SLM_FPC_RSV01],  1>;
+defm : SLMWriteResPair<WriteVecALUX,  [SLM_FPC_RSV01],  1>;
+defm : SLMWriteResPair<WriteVecALUY,  [SLM_FPC_RSV01],  1>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
+defm : SLMWriteResPair<WriteVecIMul,  [SLM_FPC_RSV0],   4>;
+defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0],   4>;
+defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0],   4>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
+// FIXME: The below is closer to correct, but caused some perf regressions.
+//defm : SLMWriteResPair<WritePMULLD,  [SLM_FPC_RSV0],   11, [11], 7>;
+defm : SLMWriteResPair<WritePMULLD,  [SLM_FPC_RSV0],   4>;
+defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0],   4>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
+defm : SLMWriteResPair<WriteShuffle,  [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
+defm : SLMWriteResPair<WriteShuffleX, [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVarShuffle,  [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
+defm : SLMWriteResPair<WriteBlend,  [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
+defm : SLMWriteResPair<WriteMPSAD,  [SLM_FPC_RSV0],  7>;
+defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0],  7>;
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
+defm : SLMWriteResPair<WritePSADBW,  [SLM_FPC_RSV0],  4>;
+defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0],  4>;
+defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0],  4>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
+defm : SLMWriteResPair<WritePHMINPOS,  [SLM_FPC_RSV0],   4>;
+
+// Vector insert/extract operations.
+defm : SLMWriteResPair<WriteVecInsert, [SLM_FPC_RSV0],  1>;
+
+def  : WriteRes<WriteVecExtract, [SLM_FPC_RSV0]>;
+def  : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1, 2];
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub  instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : SLMWriteResPair<WriteFHAdd,   [SLM_FPC_RSV01], 3, [2]>;
+defm : SLMWriteResPair<WriteFHAddY,  [SLM_FPC_RSV01], 3, [2]>;
+defm : X86WriteResPairUnsupported<WriteFHAddZ>;
+defm : SLMWriteResPair<WritePHAdd,   [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WritePHAddX,  [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WritePHAddY,  [SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WritePHAddZ>;
+
+// String instructions.
+// Packed Compare Implicit Length Strings, Return Mask
+def : WriteRes<WritePCmpIStrM, [SLM_FPC_RSV0]> {
+  let Latency = 13;
+  let ResourceCycles = [13];
+}
+def : WriteRes<WritePCmpIStrMLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 13;
+  let ResourceCycles = [13, 1];
+}
+
+// Packed Compare Explicit Length Strings, Return Mask
+def : WriteRes<WritePCmpEStrM, [SLM_FPC_RSV0]> {
+  let Latency = 17;
+  let ResourceCycles = [17];
+}
+def : WriteRes<WritePCmpEStrMLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 17;
+  let ResourceCycles = [17, 1];
+}
+
+// Packed Compare Implicit Length Strings, Return Index
+def : WriteRes<WritePCmpIStrI, [SLM_FPC_RSV0]> {
+  let Latency = 17;
+  let ResourceCycles = [17];
+}
+def : WriteRes<WritePCmpIStrILd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 17;
+  let ResourceCycles = [17, 1];
+}
+
+// Packed Compare Explicit Length Strings, Return Index
+def : WriteRes<WritePCmpEStrI, [SLM_FPC_RSV0]> {
+  let Latency = 21;
+  let ResourceCycles = [21];
+}
+def : WriteRes<WritePCmpEStrILd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 21;
+  let ResourceCycles = [21, 1];
+}
+
+// MOVMSK Instructions.
+def : WriteRes<WriteFMOVMSK,    [SLM_FPC_RSV1]> { let Latency = 4; }
+def : WriteRes<WriteVecMOVMSK,  [SLM_FPC_RSV1]> { let Latency = 4; }
+def : WriteRes<WriteVecMOVMSKY, [SLM_FPC_RSV1]> { let Latency = 4; }
+def : WriteRes<WriteMMXMOVMSK,  [SLM_FPC_RSV1]> { let Latency = 4; }
+
+// AES Instructions.
+def : WriteRes<WriteAESDecEnc, [SLM_FPC_RSV0]> {
+  let Latency = 8;
+  let ResourceCycles = [5];
+}
+def : WriteRes<WriteAESDecEncLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 8;
+  let ResourceCycles = [5, 1];
+}
+
+def : WriteRes<WriteAESIMC, [SLM_FPC_RSV0]> {
+  let Latency = 8;
+  let ResourceCycles = [5];
+}
+def : WriteRes<WriteAESIMCLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 8;
+  let ResourceCycles = [5, 1];
+}
+
+def : WriteRes<WriteAESKeyGen, [SLM_FPC_RSV0]> {
+  let Latency = 8;
+  let ResourceCycles = [5];
+}
+def : WriteRes<WriteAESKeyGenLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 8;
+  let ResourceCycles = [5, 1];
+}
+
+// Carry-less multiplication instructions.
+def : WriteRes<WriteCLMul, [SLM_FPC_RSV0]> {
+  let Latency = 10;
+  let ResourceCycles = [10];
+}
+def : WriteRes<WriteCLMulLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 10;
+  let ResourceCycles = [10, 1];
+}
+
+def : WriteRes<WriteSystem,     [SLM_FPC_RSV0]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [SLM_FPC_RSV0]> { let Latency = 100; }
+def : WriteRes<WriteFence, [SLM_MEC_RSV]>;
+def : WriteRes<WriteNop, []>;
+
+// AVX/FMA is not supported on that architecture, but we should define the basic
+// scheduling resources anyway.
+def  : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
+defm : X86WriteResPairUnsupported<WriteFBlendY>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
+defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
+defm : X86WriteResPairUnsupported<WriteVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
+defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
+defm : X86WriteResPairUnsupported<WriteFShuffle256>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
+defm : X86WriteResPairUnsupported<WriteShuffle256>;
+defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
+defm : SLMWriteResPair<WriteVarVecShift,  [SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
+defm : X86WriteResPairUnsupported<WriteFMA>;
+defm : X86WriteResPairUnsupported<WriteFMAX>;
+defm : X86WriteResPairUnsupported<WriteFMAY>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
+
+defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
+defm : X86WriteResUnsupported<WriteCvtPS2PH>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+
+} // SchedModel
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86ScheduleZnver1.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86ScheduleZnver1.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/X86_reduce.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/X86_reduce.td
@@ -0,0 +1,459 @@
+//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the Intel i386 architecture, referred
+// to here as the "X86" architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget state
+//
+
+def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
+                                  "64-bit mode (x86_64)">;
+def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
+                                  "32-bit mode (80386)">;
+def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
+                                  "16-bit mode (i8086)">;
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget features
+//===----------------------------------------------------------------------===//
+
+def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
+                                      "Enable X87 float instructions">;
+
+def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
+                                      "Enable NOPL instruction">;
+
+def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
+                                      "Enable conditional move instructions">;
+
+def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
+                                       "Support POPCNT instruction">;
+
+def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
+                                      "Support fxsave/fxrestore instructions">;
+
+def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
+                                       "Support xsave instructions">;
+
+def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
+                                       "Support xsaveopt instructions">;
+
+def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
+                                       "Support xsavec instructions">;
+
+def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
+                                       "Support xsaves instructions">;
+
+def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
+                                      "Enable SSE instructions",
+                                      // SSE codegen depends on cmovs, and all
+                                      // SSE1+ processors support them.
+                                      [FeatureCMOV]>;
+def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
+                                      "Enable SSE2 instructions",
+                                      [FeatureSSE1]>;
+def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
+                                      "Enable SSE3 instructions",
+                                      [FeatureSSE2]>;
+def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
+                                      "Enable SSSE3 instructions",
+                                      [FeatureSSE3]>;
+def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
+                                      "Enable SSE 4.1 instructions",
+                                      [FeatureSSSE3]>;
+def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
+                                      "Enable SSE 4.2 instructions",
+                                      [FeatureSSE41]>;
+// The MMX subtarget feature is separate from the rest of the SSE features
+// because it's important (for odd compatibility reasons) to be able to
+// turn it off explicitly while allowing SSE+ to be on.
+def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
+                                      "Enable MMX instructions">;
+def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
+                                      "Enable 3DNow! instructions",
+                                      [FeatureMMX]>;
+def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
+                                      "Enable 3DNow! Athlon instructions",
+                                      [Feature3DNow]>;
+// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
+// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
+// without disabling 64-bit mode.
+def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
+                                      "Support 64-bit instructions",
+                                      [FeatureCMOV]>;
+def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
+                                      "64-bit with cmpxchg16b",
+                                      [Feature64Bit]>;
+def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
+                                       "SHLD instruction is slow">;
+def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
+                                        "PMULLD instruction is slow">;
+// FIXME: This should not apply to CPUs that do not have SSE.
+def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
+                                "IsUAMem16Slow", "true",
+                                "Slow unaligned 16-byte memory access">;
+def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
+                                "IsUAMem32Slow", "true",
+                                "Slow unaligned 32-byte memory access">;
+def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
+                                      "Support SSE 4a instructions",
+                                      [FeatureSSE3]>;
+
+def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
+                                      "Enable AVX instructions",
+                                      [FeatureSSE42]>;
+def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
+                                      "Enable AVX2 instructions",
+                                      [FeatureAVX]>;
+def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
+                                      "Enable three-operand fused multiple-add",
+                                      [FeatureAVX]>;
+def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
+                       "Support 16-bit floating point conversion instructions",
+                       [FeatureAVX]>;
+def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
+                                      "Enable AVX-512 instructions",
+                                      [FeatureAVX2, FeatureFMA, FeatureF16C]>;
+def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
+                      "Enable AVX-512 Exponential and Reciprocal Instructions",
+                                      [FeatureAVX512]>;
+def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
+                      "Enable AVX-512 Conflict Detection Instructions",
+                                      [FeatureAVX512]>;
+def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
+                       "true", "Enable AVX-512 Population Count Instructions",
+                                      [FeatureAVX512]>;
+def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
+                      "Enable AVX-512 PreFetch Instructions",
+                                      [FeatureAVX512]>;
+def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
+                                   "true",
+                                   "Prefetch with Intent to Write and T1 Hint">;
+def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
+                      "Enable AVX-512 Doubleword and Quadword Instructions",
+                                      [FeatureAVX512]>;
+def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
+                      "Enable AVX-512 Byte and Word Instructions",
+                                      [FeatureAVX512]>;
+def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
+                      "Enable AVX-512 Vector Length eXtensions",
+                                      [FeatureAVX512]>;
+def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
+                      "Enable AVX-512 Vector Byte Manipulation Instructions",
+                                      [FeatureBWI]>;
+def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
+                      "Enable AVX-512 further Vector Byte Manipulation Instructions",
+                                      [FeatureBWI]>;
+def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
+                      "Enable AVX-512 Integer Fused Multiple-Add",
+                                      [FeatureAVX512]>;
+def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
+                      "Enable protection keys">;
+def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
+                          "Enable AVX-512 Vector Neural Network Instructions",
+                                      [FeatureAVX512]>;
+def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
+                       "Enable AVX-512 Bit Algorithms",
+                        [FeatureBWI]>;
+def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
+                         "Enable packed carry-less multiplication instructions",
+                               [FeatureSSE2]>;
+def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
+                         "Enable Galois Field Arithmetic Instructions",
+                               [FeatureSSE2]>;
+def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
+                                         "Enable vpclmulqdq instructions",
+                                         [FeatureAVX, FeaturePCLMUL]>;
+def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
+                                      "Enable four-operand fused multiple-add",
+                                      [FeatureAVX, FeatureSSE4A]>;
+def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
+                                      "Enable XOP instructions",
+                                      [FeatureFMA4]>;
+def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
+                                          "HasSSEUnalignedMem", "true",
+                      "Allow unaligned memory operands with SSE instructions">;
+def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
+                                      "Enable AES instructions",
+                                      [FeatureSSE2]>;
+def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
+                       "Promote selected AES instructions to AVX512/AVX registers",
+                        [FeatureAVX, FeatureAES]>;
+def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
+                                      "Enable TBM instructions">;
+def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
+                                      "Enable LWP instructions">;
+def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
+                                      "Support MOVBE instruction">;
+def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
+                                      "Support RDRAND instruction">;
+def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
+                                       "Support FS/GS Base instructions">;
+def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
+                                      "Support LZCNT instruction">;
+def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
+                                      "Support BMI instructions">;
+def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
+                                      "Support BMI2 instructions">;
+def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
+                                      "Support RTM instructions">;
+def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
+                                      "Support ADX instructions">;
+def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
+                                      "Enable SHA instructions",
+                                      [FeatureSSE2]>;
+def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
+                       "Support CET Shadow-Stack instructions">;
+def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
+                                      "Support PRFCHW instructions">;
+def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
+                                      "Support RDSEED instruction">;
+def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
+                                       "Support LAHF and SAHF instructions">;
+def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
+                                      "Enable MONITORX/MWAITX timer functionality">;
+def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
+                                      "Enable Cache Line Zero">;
+def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
+                                      "Enable Cache Demote">;
+def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
+                                      "Support ptwrite instruction">;
+def FeatureMPX     : SubtargetFeature<"mpx", "HasMPX", "true",
+                                      "Support MPX instructions">;
+def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
+                                     "Use LEA for adjusting the stack pointer">;
+def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
+                                     "HasSlowDivide32", "true",
+                                     "Use 8-bit divide for positive values less than 256">;
+def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
+                                     "HasSlowDivide64", "true",
+                                     "Use 32-bit divide for positive values less than 2^32">;
+def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
+                                     "PadShortFunctions", "true",
+                                     "Pad short functions">;
+def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
+                                      "Invalidate Process-Context Identifier">;
+def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
+                                      "Enable Software Guard Extensions">;
+def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
+                                      "Flush A Cache Line Optimized">;
+def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
+                                      "Cache Line Write Back">;
+def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
+                                      "Write Back No Invalidate">;
+def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
+                                    "Support RDPID instructions">;
+def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
+                                      "Wait and pause enhancements">;
+// On some processors, instructions that implicitly take two memory operands are
+// slow. In practice, this means that CALL, PUSH, and POP with memory operands
+// should be avoided in favor of a MOV + register CALL/PUSH/POP.
+def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
+                                     "SlowTwoMemOps", "true",
+                                     "Two memory operand instructions are slow">;
+def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
+                                   "LEA instruction needs inputs at AG stage">;
+def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
+                                   "LEA instruction with certain arguments is slow">;
+def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
+                                   "LEA instruction with 3 ops or certain registers is slow">;
+def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
+                                   "INC and DEC instructions are slower than ADD and SUB">;
+def FeatureSoftFloat
+    : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
+                       "Use software floating point features.">;
+def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
+                                     "HasPOPCNTFalseDeps", "true",
+                                     "POPCNT has a false dependency on dest register">;
+def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
+                                     "HasLZCNTFalseDeps", "true",
+                                     "LZCNT/TZCNT have a false dependency on dest register">;
+def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
+                                      "platform configuration instruction">;
+// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
+// using a variable mask over multiple fixed shuffles.
+def FeatureFastVariableShuffle
+    : SubtargetFeature<"fast-variable-shuffle",
+                       "HasFastVariableShuffle",
+                       "true", "Shuffles with variable masks are fast">;
+// On some X86 processors, there is no performance hazard to writing only the
+// lower parts of a YMM or ZMM register without clearing the upper part.
+def FeatureFastPartialYMMorZMMWrite
+    : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
+                       "HasFastPartialYMMorZMMWrite",
+                       "true", "Partial writes to YMM/ZMM registers are fast">;
+// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
+// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
+// vector FSQRT has higher throughput than the corresponding NR code.
+// The idea is that throughput bound code is likely to be vectorized, so for
+// vectorized code we should care about the throughput of SQRT operations.
+// But if the code is scalar that probably means that the code has some kind of
+// dependency and we should care more about reducing the latency.
+def FeatureFastScalarFSQRT
+    : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
+                       "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
+def FeatureFastVectorFSQRT
+    : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
+                       "true", "Vector SQRT is fast (disable Newton-Raphson)">;
+// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
+// be used to replace test/set sequences.
+def FeatureFastLZCNT
+    : SubtargetFeature<
+          "fast-lzcnt", "HasFastLZCNT", "true",
+          "LZCNT instructions are as fast as most simple integer ops">;
+// If the target can efficiently decode NOPs upto 11-bytes in length.
+def FeatureFast11ByteNOP
+    : SubtargetFeature<
+          "fast-11bytenop", "HasFast11ByteNOP", "true",
+          "Target can quickly decode up to 11 byte NOPs">;
+// If the target can efficiently decode NOPs upto 15-bytes in length.
+def FeatureFast15ByteNOP
+    : SubtargetFeature<
+          "fast-15bytenop", "HasFast15ByteNOP", "true",
+          "Target can quickly decode up to 15 byte NOPs">;
+// Sandy Bridge and newer processors can use SHLD with the same source on both
+// inputs to implement rotate to avoid the partial flag update of the normal
+// rotate instructions.
+def FeatureFastSHLDRotate
+    : SubtargetFeature<
+          "fast-shld-rotate", "HasFastSHLDRotate", "true",
+          "SHLD can be used as a faster rotate">;
+
+// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
+// "string operations"). See "REP String Enhancement" in the Intel Software
+// Development Manual. This feature essentially means that REP MOVSB will copy
+// using the largest available size instead of copying bytes one by one, making
+// it at least as fast as REPMOVS{W,D,Q}.
+def FeatureERMSB
+    : SubtargetFeature<
+          "ermsb", "HasERMSB", "true",
+          "REP MOVS/STOS are fast">;
+
+// Sandy Bridge and newer processors have many instructions that can be
+// fused with conditional branches and pass through the CPU as a single
+// operation.
+def FeatureMacroFusion
+    : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
+                 "Various instructions can be fused with conditional branches">;
+
+// Gather is available since Haswell (AVX2 set). So technically, we can
+// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
+// Skylake Client processor has faster Gathers than HSW and performance is
+// similar to Skylake Server (AVX-512).
+def FeatureHasFastGather
+    : SubtargetFeature<"fast-gather", "HasFastGather", "true",
+                       "Indicates if gather is reasonably fast.">;
+
+def FeaturePrefer256Bit
+    : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
+                       "Prefer 256-bit AVX instructions">;
+
+// Enable mitigation of some aspects of speculative execution related
+// vulnerabilities by removing speculatable indirect branches. This disables
+// jump-table formation, rewrites explicit `indirectbr` instructions into
+// `switch` instructions, and uses a special construct called a "retpoline" to
+// prevent speculation of the remaining indirect branches (indirect calls and
+// tail calls).
+def FeatureRetpoline
+    : SubtargetFeature<"retpoline", "UseRetpoline", "true",
+                       "Remove speculation of indirect branches from the "
+                       "generated code, either by avoiding them entirely or "
+                       "lowering them with a speculation blocking construct.">;
+
+// Rely on external thunks for the emitted retpoline calls. This allows users
+// to provide their own custom thunk definitions in highly specialized
+// environments such as a kernel that does boot-time hot patching.
+def FeatureRetpolineExternalThunk
+    : SubtargetFeature<
+          "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
+          "Enable retpoline, but with an externally provided thunk.",
+          [FeatureRetpoline]>;
+
+// Direct Move instructions.
+def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
+                                       "Support movdiri instruction">;
+def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
+                                        "Support movdir64b instruction">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "X86RegisterInfo.td"
+include "X86RegisterBanks.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "X86Schedule.td"
+include "X86InstrInfo_reduce.td"
+
+def X86InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Assembly Parser
+//===----------------------------------------------------------------------===//
+
+def ATTAsmParserVariant : AsmParserVariant {
+  int Variant = 0;
+
+  // Variant name.
+  string Name = "att";
+
+  // Discard comments in assembly strings.
+  string CommentDelimiter = "#";
+
+  // Recognize hard coded registers.
+  string RegisterPrefix = "%";
+}
+
+def IntelAsmParserVariant : AsmParserVariant {
+  int Variant = 1;
+
+  // Variant name.
+  string Name = "intel";
+
+  // Discard comments in assembly strings.
+  string CommentDelimiter = ";";
+
+  // Recognize hard coded registers.
+  string RegisterPrefix = "";
+}
+
+//===----------------------------------------------------------------------===//
+// Assembly Printers
+//===----------------------------------------------------------------------===//
+
+// The X86 target supports two different syntaxes for emitting machine code.
+// This is controlled by the -x86-asm-syntax={att|intel}
+def ATTAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "ATTInstPrinter";
+  int Variant = 0;
+}
+def IntelAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "IntelInstPrinter";
+  int Variant = 1;
+}
+
+def X86 : Target {
+  // Information about the instructions...
+  let InstructionSet = X86InstrInfo;
+  let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
+  let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
+  let AllowRegisterRenaming = 1;
+}
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86CallingConv.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86CallingConv.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86Capstone.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86Capstone.td
@@ -0,0 +1,7 @@
+// Capstone definitions fix for X86 LLVM instructions.
+
+let Defs = [EFLAGS] in
+  def INT1 : I<0xf1, RawFrm, (outs), (ins), "int1", []>;
+
+// def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", [], IIC_FNCLEX>;
+def FSETPM : I<0xDB, MRM_E4, (outs), (ins), "fsetpm", []>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86CapstoneFull.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86CapstoneFull.td
@@ -0,0 +1,103 @@
+// Capstone definitions fix for X86 LLVM instructions.
+
+let Defs = [EFLAGS] in
+  def INT1 : I<0xf1, RawFrm, (outs), (ins), "int1", []>;
+
+def FSETPM : I<0xDB, MRM_E4, (outs), (ins), "fsetpm", []>;
+
+// Capstone: comment out below lines for X86 Reduce mode
+
+/*
+// X87 Floating Point Stack.
+include "X86InstrFPStack.td"
+
+// SIMD support (SSE, MMX and AVX)
+include "X86InstrFragmentsSIMD.td"
+
+// FMA - Fused Multiply-Add support (requires FMA)
+include "X86InstrFMA.td"
+
+// XOP
+include "X86InstrXOP.td"
+
+// SSE, MMX and 3DNow! vector support.
+include "X86InstrSSE.td"
+include "X86InstrAVX512.td"
+include "X86InstrMMX.td"
+include "X86Instr3DNow.td"
+
+// MPX instructions
+include "X86InstrMPX.td"
+
+//include "X86InstrTSX.td"
+include "X86InstrSGX.td"
+
+// Various unary fpstack operations default to operating on ST1.
+// For example, "fxch" -> "fxch %st(1)"
+def : InstAlias<"faddp",        (ADD_FPrST0  ST1), 0>;
+def:  InstAlias<"fadd",         (ADD_FPrST0  ST1), 0>;
+def : InstAlias<"fsub{|r}p",    (SUBR_FPrST0 ST1), 0>;
+def : InstAlias<"fsub{r|}p",    (SUB_FPrST0  ST1), 0>;
+def : InstAlias<"fmul",         (MUL_FPrST0  ST1), 0>;
+def : InstAlias<"fmulp",        (MUL_FPrST0  ST1), 0>;
+def : InstAlias<"fdiv{|r}p",    (DIVR_FPrST0 ST1), 0>;
+def : InstAlias<"fdiv{r|}p",    (DIV_FPrST0  ST1), 0>;
+def : InstAlias<"fxch",         (XCH_F       ST1), 0>;
+def : InstAlias<"fcom",         (COM_FST0r   ST1), 0>;
+def : InstAlias<"fcomp",        (COMP_FST0r  ST1), 0>;
+def : InstAlias<"fcomi",        (COM_FIr     ST1), 0>;
+def : InstAlias<"fcompi",       (COM_FIPr    ST1), 0>;
+def : InstAlias<"fucom",        (UCOM_Fr     ST1), 0>;
+def : InstAlias<"fucomp",       (UCOM_FPr    ST1), 0>;
+def : InstAlias<"fucomi",       (UCOM_FIr    ST1), 0>;
+def : InstAlias<"fucompi",      (UCOM_FIPr   ST1), 0>;
+
+// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
+// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)".  We also disambiguate
+// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
+// gas.
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
+ def : InstAlias<!strconcat(Mnemonic, "\t{$op, %st(0)|st(0), $op}"),
+                 (Inst RST:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, "\t{%st(0), %st(0)|st(0), st(0)}"),
+                 (Inst ST0), EmitAlias>;
+}
+
+defm : FpUnaryAlias<"fadd",   ADD_FST0r>;
+defm : FpUnaryAlias<"faddp",  ADD_FPrST0, 0>;
+defm : FpUnaryAlias<"fsub",   SUB_FST0r>;
+defm : FpUnaryAlias<"fsub{|r}p",  SUBR_FPrST0>;
+defm : FpUnaryAlias<"fsubr",  SUBR_FST0r>;
+defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0>;
+defm : FpUnaryAlias<"fmul",   MUL_FST0r>;
+defm : FpUnaryAlias<"fmulp",  MUL_FPrST0>;
+defm : FpUnaryAlias<"fdiv",   DIV_FST0r>;
+defm : FpUnaryAlias<"fdiv{|r}p",  DIVR_FPrST0>;
+defm : FpUnaryAlias<"fdivr",  DIVR_FST0r>;
+defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0>;
+defm : FpUnaryAlias<"fcomi",   COM_FIr, 0>;
+defm : FpUnaryAlias<"fucomi",  UCOM_FIr, 0>;
+defm : FpUnaryAlias<"fcompi",   COM_FIPr>;
+defm : FpUnaryAlias<"fucompi",  UCOM_FIPr>;
+
+
+// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
+// commute.  We also allow fdiv[r]p/fsubrp even though they don't commute,
+// solely because gas supports it.
+def : InstAlias<"faddp\t{%st(0), $op|$op, st(0)}", (ADD_FPrST0 RST:$op), 0>;
+def : InstAlias<"fmulp\t{%st(0), $op|$op, st(0)}", (MUL_FPrST0 RST:$op)>;
+def : InstAlias<"fsub{|r}p\t{%st(0), $op|$op, st(0)}", (SUBR_FPrST0 RST:$op)>;
+def : InstAlias<"fsub{r|}p\t{%st(0), $op|$op, st(0)}", (SUB_FPrST0 RST:$op)>;
+def : InstAlias<"fdiv{|r}p\t{%st(0), $op|$op, st(0)}", (DIVR_FPrST0 RST:$op)>;
+def : InstAlias<"fdiv{r|}p\t{%st(0), $op|$op, st(0)}", (DIV_FPrST0 RST:$op)>;
+
+def : InstAlias<"fnstsw"     , (FNSTSW16r), 0>;
+
+// Match 'movd GR64, MMX' as an alias for movq to be compatible with gas,
+// which supports this due to an old AMD documentation bug when 64-bit mode was
+// created.
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
+                (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
+                (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
+*/
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86CapstoneReduce.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86CapstoneReduce.td
@@ -0,0 +1,101 @@
+// Capstone definitions fix for X86 LLVM instructions.
+
+let Defs = [EFLAGS] in
+  def INT1 : I<0xf1, RawFrm, (outs), (ins), "int1", []>;
+
+def FSETPM : I<0xDB, MRM_E4, (outs), (ins), "fsetpm", []>;
+
+// Capstone: comment out below lines for X86 Reduce mode
+
+// X87 Floating Point Stack.
+//include "X86InstrFPStack.td"
+
+// SIMD support (SSE, MMX and AVX)
+//include "X86InstrFragmentsSIMD.td"
+
+// FMA - Fused Multiply-Add support (requires FMA)
+//include "X86InstrFMA.td"
+
+// XOP
+//include "X86InstrXOP.td"
+
+// SSE, MMX and 3DNow! vector support.
+//include "X86InstrSSE.td"
+//include "X86InstrAVX512.td"
+//include "X86InstrMMX.td"
+//include "X86Instr3DNow.td"
+
+// MPX instructions
+//include "X86InstrMPX.td"
+
+//include "X86InstrTSX.td"
+//include "X86InstrSGX.td"
+
+// Various unary fpstack operations default to operating on ST1.
+// For example, "fxch" -> "fxch %st(1)"
+//def : InstAlias<"faddp",        (ADD_FPrST0  ST1), 0>;
+//def:  InstAlias<"fadd",         (ADD_FPrST0  ST1), 0>;
+//def : InstAlias<"fsub{|r}p",    (SUBR_FPrST0 ST1), 0>;
+//def : InstAlias<"fsub{r|}p",    (SUB_FPrST0  ST1), 0>;
+//def : InstAlias<"fmul",         (MUL_FPrST0  ST1), 0>;
+//def : InstAlias<"fmulp",        (MUL_FPrST0  ST1), 0>;
+//def : InstAlias<"fdiv{|r}p",    (DIVR_FPrST0 ST1), 0>;
+//def : InstAlias<"fdiv{r|}p",    (DIV_FPrST0  ST1), 0>;
+//def : InstAlias<"fxch",         (XCH_F       ST1), 0>;
+//def : InstAlias<"fcom",         (COM_FST0r   ST1), 0>;
+//def : InstAlias<"fcomp",        (COMP_FST0r  ST1), 0>;
+//def : InstAlias<"fcomi",        (COM_FIr     ST1), 0>;
+//def : InstAlias<"fcompi",       (COM_FIPr    ST1), 0>;
+//def : InstAlias<"fucom",        (UCOM_Fr     ST1), 0>;
+//def : InstAlias<"fucomp",       (UCOM_FPr    ST1), 0>;
+//def : InstAlias<"fucomi",       (UCOM_FIr    ST1), 0>;
+//def : InstAlias<"fucompi",      (UCOM_FIPr   ST1), 0>;
+
+// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
+// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)".  We also disambiguate
+// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
+// gas.
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
+ def : InstAlias<!strconcat(Mnemonic, "\t{$op, %st(0)|st(0), $op}"),
+                 (Inst RST:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, "\t{%st(0), %st(0)|st(0), st(0)}"),
+                 (Inst ST0), EmitAlias>;
+}
+
+//defm : FpUnaryAlias<"fadd",   ADD_FST0r>;
+//defm : FpUnaryAlias<"faddp",  ADD_FPrST0, 0>;
+//defm : FpUnaryAlias<"fsub",   SUB_FST0r>;
+//defm : FpUnaryAlias<"fsub{|r}p",  SUBR_FPrST0>;
+//defm : FpUnaryAlias<"fsubr",  SUBR_FST0r>;
+//defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0>;
+//defm : FpUnaryAlias<"fmul",   MUL_FST0r>;
+//defm : FpUnaryAlias<"fmulp",  MUL_FPrST0>;
+//defm : FpUnaryAlias<"fdiv",   DIV_FST0r>;
+//defm : FpUnaryAlias<"fdiv{|r}p",  DIVR_FPrST0>;
+//defm : FpUnaryAlias<"fdivr",  DIVR_FST0r>;
+//defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0>;
+//defm : FpUnaryAlias<"fcomi",   COM_FIr, 0>;
+//defm : FpUnaryAlias<"fucomi",  UCOM_FIr, 0>;
+//defm : FpUnaryAlias<"fcompi",   COM_FIPr>;
+//defm : FpUnaryAlias<"fucompi",  UCOM_FIPr>;
+
+
+// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
+// commute.  We also allow fdiv[r]p/fsubrp even though they don't commute,
+// solely because gas supports it.
+//def : InstAlias<"faddp\t{%st(0), $op|$op, st(0)}", (ADD_FPrST0 RST:$op), 0>;
+//def : InstAlias<"fmulp\t{%st(0), $op|$op, st(0)}", (MUL_FPrST0 RST:$op)>;
+//def : InstAlias<"fsub{|r}p\t{%st(0), $op|$op, st(0)}", (SUBR_FPrST0 RST:$op)>;
+//def : InstAlias<"fsub{r|}p\t{%st(0), $op|$op, st(0)}", (SUB_FPrST0 RST:$op)>;
+//def : InstAlias<"fdiv{|r}p\t{%st(0), $op|$op, st(0)}", (DIVR_FPrST0 RST:$op)>;
+//def : InstAlias<"fdiv{r|}p\t{%st(0), $op|$op, st(0)}", (DIV_FPrST0 RST:$op)>;
+//
+//def : InstAlias<"fnstsw"     , (FNSTSW16r), 0>;
+
+// Match 'movd GR64, MMX' as an alias for movq to be compatible with gas,
+// which supports this due to an old AMD documentation bug when 64-bit mode was
+// created.
+//def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
+//                (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
+//def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
+//                (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86Instr3DNow.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86Instr3DNow.td
@@ -0,0 +1,111 @@
+//===-- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the 3DNow! instruction set, which extends MMX to support
+// floating point and also adds a few more random instructions for good measure.
+//
+//===----------------------------------------------------------------------===//
+
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
+      : I<o, F, outs, ins, asm, pat>, Requires<[Has3DNow]> {
+}
+
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+      : I3DNow<o, F, (outs VR64:$dst), ins,
+          !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>, ThreeDNow {
+  let Constraints = "$src1 = $dst";
+}
+
+class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+      : I3DNow<o, F, (outs VR64:$dst), ins,
+          !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>, ThreeDNow;
+
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn,
+                               X86FoldableSchedWrite sched, bit Commutable = 0,
+                               string Ver = ""> {
+  let isCommutable = Commutable in
+  def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>,
+      Sched<[sched]>;
+  def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
+        (bitconvert (load_mmx addr:$src2))))]>,
+        Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn,
+                              X86FoldableSchedWrite sched, string Ver = ""> {
+  def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>,
+      Sched<[sched]>;
+  def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
+    [(set VR64:$dst, (!cast<Intrinsic>(
+      !strconcat("int_x86_3dnow", Ver, "_", Mn))
+        (bitconvert (load_mmx addr:$src))))]>,
+        Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+defm PAVGUSB  : I3DNow_binop_rm_int<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>;
+defm PF2ID    : I3DNow_conv_rm_int<0x1D, "pf2id", WriteCvtPS2I>;
+defm PFACC    : I3DNow_binop_rm_int<0xAE, "pfacc", WriteFAdd>;
+defm PFADD    : I3DNow_binop_rm_int<0x9E, "pfadd", WriteFAdd, 1>;
+defm PFCMPEQ  : I3DNow_binop_rm_int<0xB0, "pfcmpeq", WriteFAdd, 1>;
+defm PFCMPGE  : I3DNow_binop_rm_int<0x90, "pfcmpge", WriteFAdd>;
+defm PFCMPGT  : I3DNow_binop_rm_int<0xA0, "pfcmpgt", WriteFAdd>;
+defm PFMAX    : I3DNow_binop_rm_int<0xA4, "pfmax", WriteFAdd>;
+defm PFMIN    : I3DNow_binop_rm_int<0x94, "pfmin", WriteFAdd>;
+defm PFMUL    : I3DNow_binop_rm_int<0xB4, "pfmul", WriteFAdd, 1>;
+defm PFRCP    : I3DNow_conv_rm_int<0x96, "pfrcp", WriteFAdd>;
+defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1", WriteFAdd>;
+defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2", WriteFAdd>;
+defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1", WriteFAdd>;
+defm PFRSQRT  : I3DNow_conv_rm_int<0x97, "pfrsqrt", WriteFAdd>;
+defm PFSUB    : I3DNow_binop_rm_int<0x9A, "pfsub", WriteFAdd, 1>;
+defm PFSUBR   : I3DNow_binop_rm_int<0xAA, "pfsubr", WriteFAdd, 1>;
+defm PI2FD    : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2PS>;
+defm PMULHRW  : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>;
+
+let SchedRW = [WriteEMMS] in
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
+                   [(int_x86_mmx_femms)]>, TB;
+
+// PREFETCHWT1 is supported we want to use it for everything but T0.
+def PrefetchWLevel : PatFrag<(ops), (i32 imm), [{
+  return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1();
+}]>;
+
+// Use PREFETCHWT1 for NTA, T2, T1.
+def PrefetchWT1Level : ImmLeaf<i32, [{
+  return Imm < 3;
+}]>;
+
+let SchedRW = [WriteLoad] in {
+let Predicates = [Has3DNow, NoSSEPrefetch] in
+def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
+                      "prefetch\t$addr",
+                      [(prefetch addr:$addr, imm, imm, (i32 1))]>, TB;
+
+def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
+                  [(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))]>,
+                  TB, Requires<[HasPrefetchW]>;
+
+def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr",
+                    [(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))]>,
+                    TB, Requires<[HasPREFETCHWT1]>;
+}
+
+// "3DNowA" instructions
+defm PF2IW    : I3DNow_conv_rm_int<0x1C, "pf2iw", WriteCvtPS2I, "a">;
+defm PI2FW    : I3DNow_conv_rm_int<0x0C, "pi2fw", WriteCvtI2PS, "a">;
+defm PFNACC   : I3DNow_binop_rm_int<0x8A, "pfnacc", WriteFAdd, 0, "a">;
+defm PFPNACC  : I3DNow_binop_rm_int<0x8E, "pfpnacc", WriteFAdd, 0, "a">;
+defm PSWAPD   : I3DNow_conv_rm_int<0xBB, "pswapd", SchedWriteShuffle.MMX, "a">;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrAVX512.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrAVX512.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrArithmetic.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrArithmetic.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrCMovSetCC.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrCMovSetCC.td
@@ -0,0 +1,116 @@
+//===-- X86InstrCMovSetCC.td - Conditional Move and SetCC --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 conditional move and set on condition
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+
+// CMOV instructions.
+multiclass CMOV<bits<8> opc, string Mnemonic, X86FoldableSchedWrite Sched,
+                PatLeaf CondNode> {
+  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+      isCommutable = 1, SchedRW = [Sched] in {
+    def NAME#16rr
+      : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+          !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR16:$dst,
+                (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,
+                TB, OpSize16;
+    def NAME#32rr
+      : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+          !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR32:$dst,
+                (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>,
+                TB, OpSize32;
+    def NAME#64rr
+      :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+          !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR64:$dst,
+                (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
+  }
+
+  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+      SchedRW = [Sched.Folded, ReadAfterLd] in {
+    def NAME#16rm
+      : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+          !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB, OpSize16;
+    def NAME#32rm
+      : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+          !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB, OpSize32;
+    def NAME#64rm
+      :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+          !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB;
+  } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
+} // end multiclass
+
+
+// Conditional Moves.
+defm CMOVO  : CMOV<0x40, "cmovo" , WriteCMOV,  X86_COND_O>;
+defm CMOVNO : CMOV<0x41, "cmovno", WriteCMOV,  X86_COND_NO>;
+defm CMOVB  : CMOV<0x42, "cmovb" , WriteCMOV,  X86_COND_B>;
+defm CMOVAE : CMOV<0x43, "cmovae", WriteCMOV,  X86_COND_AE>;
+defm CMOVE  : CMOV<0x44, "cmove" , WriteCMOV,  X86_COND_E>;
+defm CMOVNE : CMOV<0x45, "cmovne", WriteCMOV,  X86_COND_NE>;
+defm CMOVBE : CMOV<0x46, "cmovbe", WriteCMOV2, X86_COND_BE>;
+defm CMOVA  : CMOV<0x47, "cmova" , WriteCMOV2, X86_COND_A>;
+defm CMOVS  : CMOV<0x48, "cmovs" , WriteCMOV,  X86_COND_S>;
+defm CMOVNS : CMOV<0x49, "cmovns", WriteCMOV,  X86_COND_NS>;
+defm CMOVP  : CMOV<0x4A, "cmovp" , WriteCMOV,  X86_COND_P>;
+defm CMOVNP : CMOV<0x4B, "cmovnp", WriteCMOV,  X86_COND_NP>;
+defm CMOVL  : CMOV<0x4C, "cmovl" , WriteCMOV,  X86_COND_L>;
+defm CMOVGE : CMOV<0x4D, "cmovge", WriteCMOV,  X86_COND_GE>;
+defm CMOVLE : CMOV<0x4E, "cmovle", WriteCMOV,  X86_COND_LE>;
+defm CMOVG  : CMOV<0x4F, "cmovg" , WriteCMOV,  X86_COND_G>;
+
+
+// SetCC instructions.
+multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
+  let Uses = [EFLAGS] in {
+    def r    : I<opc, MRMXr,  (outs GR8:$dst), (ins),
+                     !strconcat(Mnemonic, "\t$dst"),
+                     [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>,
+                     TB, Sched<[WriteSETCC]>;
+    def m    : I<opc, MRMXm,  (outs), (ins i8mem:$dst),
+                     !strconcat(Mnemonic, "\t$dst"),
+                     [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>,
+                     TB, Sched<[WriteSETCCStore]>;
+  } // Uses = [EFLAGS]
+}
+
+defm SETO  : SETCC<0x90, "seto",  X86_COND_O>;   // is overflow bit set
+defm SETNO : SETCC<0x91, "setno", X86_COND_NO>;  // is overflow bit not set
+defm SETB  : SETCC<0x92, "setb",  X86_COND_B>;   // unsigned less than
+defm SETAE : SETCC<0x93, "setae", X86_COND_AE>;  // unsigned greater or equal
+defm SETE  : SETCC<0x94, "sete",  X86_COND_E>;   // equal to
+defm SETNE : SETCC<0x95, "setne", X86_COND_NE>;  // not equal to
+defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>;  // unsigned less than or equal
+defm SETA  : SETCC<0x97, "seta",  X86_COND_A>;   // unsigned greater than
+defm SETS  : SETCC<0x98, "sets",  X86_COND_S>;   // is signed bit set
+defm SETNS : SETCC<0x99, "setns", X86_COND_NS>;  // is not signed
+defm SETP  : SETCC<0x9A, "setp",  X86_COND_P>;   // is parity bit set
+defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>;  // is parity bit not set
+defm SETL  : SETCC<0x9C, "setl",  X86_COND_L>;   // signed less than
+defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>;  // signed greater or equal
+defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>;  // signed less than or equal
+defm SETG  : SETCC<0x9F, "setg",  X86_COND_G>;   // signed greater than
+
+// SALC is an undocumented instruction. Information for this instruction can be found
+// here http://www.rcollins.org/secrets/opcodes/SALC.html
+// Set AL if carry. 
+let Uses = [EFLAGS], Defs = [AL], SchedRW = [WriteALU] in {
+  def SALC : I<0xD6, RawFrm, (outs), (ins), "salc", []>, Requires<[Not64BitMode]>;
+}
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrCompiler.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrCompiler.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrControl.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrControl.td
@@ -0,0 +1,413 @@
+//===-- X86InstrControl.td - Control Flow Instructions -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 jump, return, call, and related instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions.
+//
+
+// Return instructions.
+//
+// The X86retflag return instructions are variadic because we may add ST0 and
+// ST1 arguments when returning values on the x87 stack.
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
+  def RETL   : I   <0xC3, RawFrm, (outs), (ins variable_ops),
+                    "ret{l}", []>, OpSize32, Requires<[Not64BitMode]>;
+  def RETQ   : I   <0xC3, RawFrm, (outs), (ins variable_ops),
+                    "ret{q}", []>, OpSize32, Requires<[In64BitMode]>;
+  def RETW   : I   <0xC3, RawFrm, (outs), (ins),
+                    "ret{w}", []>, OpSize16;
+  def RETIL  : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+                    "ret{l}\t$amt", []>, OpSize32, Requires<[Not64BitMode]>;
+  def RETIQ  : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+                    "ret{q}\t$amt", []>, OpSize32, Requires<[In64BitMode]>;
+  def RETIW  : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
+                    "ret{w}\t$amt", []>, OpSize16;
+  def LRETL  : I   <0xCB, RawFrm, (outs), (ins),
+                    "{l}ret{l|f}", []>, OpSize32;
+  def LRETQ  : RI  <0xCB, RawFrm, (outs), (ins),
+                    "{l}ret{|f}q", []>, Requires<[In64BitMode]>;
+  def LRETW  : I   <0xCB, RawFrm, (outs), (ins),
+                    "{l}ret{w|f}", []>, OpSize16;
+  def LRETIL : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+                    "{l}ret{l|f}\t$amt", []>, OpSize32;
+  def LRETIQ : RIi16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+                    "{l}ret{|f}q\t$amt", []>, Requires<[In64BitMode]>;
+  def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+                    "{l}ret{w|f}\t$amt", []>, OpSize16;
+
+  // The machine return from interrupt instruction, but sometimes we need to
+  // perform a post-epilogue stack adjustment. Codegen emits the pseudo form
+  // which expands to include an SP adjustment if necessary.
+  def IRET16 : I   <0xcf, RawFrm, (outs), (ins), "iret{w}", []>,
+               OpSize16;
+  def IRET32 : I   <0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>, OpSize32;
+  def IRET64 : RI  <0xcf, RawFrm, (outs), (ins), "iretq", []>, Requires<[In64BitMode]>;
+  // let isCodeGenOnly = 1 in
+  // def IRET : PseudoI<(outs), (ins i32imm:$adj), [(X86iret timm:$adj)]>;
+  // def RET  : PseudoI<(outs), (ins i32imm:$adj, variable_ops), [(X86retflag timm:$adj)]>;
+}
+
+// Unconditional branches.
+let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
+  def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+                       "jmp\t$dst", [(br bb:$dst)]>;
+  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
+    def JMP_2 : Ii16PCRel<0xE9, RawFrm, (outs), (ins brtarget16:$dst),
+                          "jmp\t$dst", []>, OpSize16;
+    def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget32:$dst),
+                          "jmp\t$dst", []>, OpSize32;
+  }
+}
+
+// Conditional Branches.
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
+  multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+    def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm,
+                       [(X86brcond bb:$dst, Cond, EFLAGS)]>;
+    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
+      def _2 : Ii16PCRel<opc4, RawFrm, (outs), (ins brtarget16:$dst), asm,
+                         []>, OpSize16, TB;
+      def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget32:$dst), asm,
+                         []>, TB, OpSize32;
+    }
+  }
+}
+
+defm JO  : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst", X86_COND_NO>;
+defm JB  : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
+defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
+defm JE  : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
+defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
+defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
+defm JA  : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
+defm JS  : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
+defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
+defm JP  : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
+defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
+defm JL  : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
+defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
+defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
+defm JG  : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+
+// jcx/jecx/jrcx instructions.
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in {
+  // These are the 32-bit versions of this instruction for the asmparser.  In
+  // 32-bit mode, the address size prefix is jcxz and the unprefixed version is
+  // jecxz.
+  let Uses = [CX] in
+    def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                        "jcxz\t$dst", []>, AdSize16, Requires<[Not64BitMode]>;
+  let Uses = [ECX] in
+    def JECXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                        "jecxz\t$dst", []>, AdSize32;
+
+  let Uses = [RCX] in
+    def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                         "jrcxz\t$dst", []>, AdSize64, Requires<[In64BitMode]>;
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def JMP16r     : I<0xFF, MRM4r, (outs), (ins GR16:$dst), "jmp{w}\t{*}$dst",
+                     [(brind GR16:$dst)]>, Requires<[Not64BitMode]>,
+                     OpSize16, Sched<[WriteJump]>;
+  def JMP16m     : I<0xFF, MRM4m, (outs), (ins i16mem:$dst), "jmp{w}\t{*}$dst",
+                     [(brind (loadi16 addr:$dst))]>, Requires<[Not64BitMode]>,
+                     OpSize16, Sched<[WriteJumpLd]>;
+
+  def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
+                     [(brind GR32:$dst)]>, Requires<[Not64BitMode]>,
+                     OpSize32, Sched<[WriteJump]>;
+  def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
+                     [(brind (loadi32 addr:$dst))]>, Requires<[Not64BitMode]>,
+                     OpSize32, Sched<[WriteJumpLd]>;
+
+  def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
+                     [(brind GR64:$dst)]>, Requires<[In64BitMode]>,
+                     Sched<[WriteJump]>;
+  def JMP64m     : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
+                     [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>,
+                     Sched<[WriteJumpLd]>;
+
+  // Non-tracking jumps for IBT, use with caution.
+  let isCodeGenOnly = 1 in {
+    def JMP16r_NT : I<0xFF, MRM4r, (outs), (ins GR16 : $dst), "jmp{w}\t{*}$dst",
+                      [(X86NoTrackBrind GR16 : $dst)]>, Requires<[Not64BitMode]>,
+                      OpSize16, Sched<[WriteJump]>, NOTRACK;
+
+    def JMP16m_NT : I<0xFF, MRM4m, (outs), (ins i16mem : $dst), "jmp{w}\t{*}$dst",
+                      [(X86NoTrackBrind (loadi16 addr : $dst))]>,
+                      Requires<[Not64BitMode]>, OpSize16, Sched<[WriteJumpLd]>,
+                      NOTRACK;
+
+    def JMP32r_NT : I<0xFF, MRM4r, (outs), (ins GR32 : $dst), "jmp{l}\t{*}$dst",
+                      [(X86NoTrackBrind GR32 : $dst)]>, Requires<[Not64BitMode]>,
+                      OpSize32, Sched<[WriteJump]>, NOTRACK;
+    def JMP32m_NT : I<0xFF, MRM4m, (outs), (ins i32mem : $dst), "jmp{l}\t{*}$dst",
+                      [(X86NoTrackBrind (loadi32 addr : $dst))]>,
+                      Requires<[Not64BitMode]>, OpSize32, Sched<[WriteJumpLd]>,
+                      NOTRACK;
+
+    def JMP64r_NT : I<0xFF, MRM4r, (outs), (ins GR64 : $dst), "jmp{q}\t{*}$dst",
+                      [(X86NoTrackBrind GR64 : $dst)]>, Requires<[In64BitMode]>,
+                      Sched<[WriteJump]>, NOTRACK;
+    def JMP64m_NT : I<0xFF, MRM4m, (outs), (ins i64mem : $dst), "jmp{q}\t{*}$dst",
+                      [(X86NoTrackBrind(loadi64 addr : $dst))]>,
+                      Requires<[In64BitMode]>, Sched<[WriteJumpLd]>, NOTRACK;
+  }
+
+  let Predicates = [Not64BitMode], AsmVariantName = "att" in {
+    def FARJMP16i  : Iseg16<0xEA, RawFrmImm16, (outs),
+                            (ins i16imm:$off, i16imm:$seg),
+                            "ljmp{w}\t$seg : $off", []>,
+                            OpSize16, Sched<[WriteJump]>;
+    def FARJMP32i  : Iseg32<0xEA, RawFrmImm16, (outs),
+                            (ins i32imm:$off, i16imm:$seg),
+                            "ljmp{l}\t$seg : $off", []>,
+                            OpSize32, Sched<[WriteJump]>;
+  }
+  def FARJMP64   : RI<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
+                      "ljmp{q}\t{*}$dst", []>, Sched<[WriteJump]>, Requires<[In64BitMode]>;
+
+  let AsmVariantName = "att" in
+  def FARJMP16m  : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
+                     "ljmp{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
+  def FARJMP32m  : I<0xFF, MRM5m, (outs), (ins opaquemem:$dst),
+                     "{l}jmp{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
+}
+
+// Loop instructions
+let SchedRW = [WriteJump] in {
+def LOOP   : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE  : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+}
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. ESP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Uses = [ESP, SSP] in {
+    def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
+                           (outs), (ins i32imm_pcrel:$dst),
+                           "call{l}\t$dst", []>, OpSize32,
+                      Requires<[Not64BitMode]>, Sched<[WriteJump]>;
+    let hasSideEffects = 0 in
+      def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
+                             (outs), (ins i16imm_pcrel:$dst),
+                             "call{w}\t$dst", []>, OpSize16,
+                        Sched<[WriteJump]>;
+    def CALL16r     : I<0xFF, MRM2r, (outs), (ins GR16:$dst),
+                        "call{w}\t{*}$dst", [(X86call GR16:$dst)]>,
+                      OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>;
+    def CALL16m     : I<0xFF, MRM2m, (outs), (ins i16mem:$dst),
+                        "call{w}\t{*}$dst", [(X86call (loadi16 addr:$dst))]>,
+                        OpSize16, Requires<[Not64BitMode,FavorMemIndirectCall]>,
+                        Sched<[WriteJumpLd]>;
+    def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
+                        "call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
+                        Requires<[Not64BitMode,NotUseRetpoline]>, Sched<[WriteJump]>;
+    def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
+                        "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
+                        OpSize32,
+                        Requires<[Not64BitMode,FavorMemIndirectCall,NotUseRetpoline]>,
+                        Sched<[WriteJumpLd]>;
+
+    // Non-tracking calls for IBT, use with caution.
+    let isCodeGenOnly = 1 in {
+      def CALL16r_NT : I<0xFF, MRM2r, (outs), (ins GR16 : $dst),
+                        "call{w}\t{*}$dst",[(X86NoTrackCall GR16 : $dst)]>,
+                        OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>, NOTRACK;
+      def CALL16m_NT : I<0xFF, MRM2m, (outs), (ins i16mem : $dst),
+                        "call{w}\t{*}$dst",[(X86NoTrackCall(loadi16 addr : $dst))]>,
+                        OpSize16, Requires<[Not64BitMode,FavorMemIndirectCall]>,
+                        Sched<[WriteJumpLd]>, NOTRACK;
+      def CALL32r_NT : I<0xFF, MRM2r, (outs), (ins GR32 : $dst),
+                        "call{l}\t{*}$dst",[(X86NoTrackCall GR32 : $dst)]>,
+                        OpSize32, Requires<[Not64BitMode]>, Sched<[WriteJump]>, NOTRACK;
+      def CALL32m_NT : I<0xFF, MRM2m, (outs), (ins i32mem : $dst),
+                        "call{l}\t{*}$dst",[(X86NoTrackCall(loadi32 addr : $dst))]>,
+                        OpSize32, Requires<[Not64BitMode,FavorMemIndirectCall]>,
+                        Sched<[WriteJumpLd]>, NOTRACK;
+    }
+
+    let Predicates = [Not64BitMode], AsmVariantName = "att" in {
+      def FARCALL16i  : Iseg16<0x9A, RawFrmImm16, (outs),
+                               (ins i16imm:$off, i16imm:$seg),
+                               "lcall{w}\t$seg : $off", []>,
+                               OpSize16, Sched<[WriteJump]>;
+      def FARCALL32i  : Iseg32<0x9A, RawFrmImm16, (outs),
+                               (ins i32imm:$off, i16imm:$seg),
+                               "lcall{l}\t$seg : $off", []>,
+                               OpSize32, Sched<[WriteJump]>;
+    }
+
+    def FARCALL16m  : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
+                        "lcall{w}\t{*}$dst", []>, OpSize16, Sched<[WriteJumpLd]>;
+    def FARCALL32m  : I<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
+                        "{l}call{l}\t{*}$dst", []>, OpSize32, Sched<[WriteJumpLd]>;
+  }
+
+
+/*
+// Tail call stuff.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+  let Uses = [ESP, SSP] in {
+  def TCRETURNdi : PseudoI<(outs),
+                     (ins i32imm_pcrel:$dst, i32imm:$offset), []>, NotMemoryFoldable;
+  def TCRETURNri : PseudoI<(outs),
+                     (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable;
+  let mayLoad = 1 in
+  def TCRETURNmi : PseudoI<(outs),
+                     (ins i32mem_TC:$dst, i32imm:$offset), []>;
+
+  // FIXME: The should be pseudo instructions that are lowered when going to
+  // mcinst.
+  def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
+                           (ins i32imm_pcrel:$dst), "jmp\t$dst", []>;
+
+  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
+                   "", []>;  // FIXME: Remove encoding when JIT is dead.
+  let mayLoad = 1 in
+  def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst),
+                   "jmp{l}\t{*}$dst", []>;
+}
+
+// Conditional tail calls are similar to the above, but they are branches
+// rather than barriers, and they use EFLAGS.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
+    isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+  let Uses = [ESP, EFLAGS, SSP] in {
+  def TCRETURNdicc : PseudoI<(outs),
+                     (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
+
+  // This gets substituted to a conditional jump instruction in MC lowering.
+  def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
+                           (ins i32imm_pcrel:$dst, i32imm:$cond), "", []>;
+}
+*/
+
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+
+// RSP is marked as a use to prevent stack-pointer assignments that appear
+// immediately before calls from potentially appearing dead. Uses for argument
+// registers are added manually.
+let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
+  // NOTE: this pattern doesn't match "X86call imm", because we do not know
+  // that the offset between an arbitrary immediate and the call will fit in
+  // the 32-bit pcrel field that we have.
+  def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+                        (outs), (ins i64i32imm_pcrel:$dst),
+                        "call{q}\t$dst", []>, OpSize32,
+                      Requires<[In64BitMode]>;
+  def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
+                        "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
+                      Requires<[In64BitMode,NotUseRetpoline]>;
+  def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
+                        "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
+                      Requires<[In64BitMode,FavorMemIndirectCall,
+                                NotUseRetpoline]>;
+
+  // Non-tracking calls for IBT, use with caution.
+  let isCodeGenOnly = 1 in {
+    def CALL64r_NT : I<0xFF, MRM2r, (outs), (ins GR64 : $dst),
+                      "call{q}\t{*}$dst",[(X86NoTrackCall GR64 : $dst)]>,
+                      Requires<[In64BitMode]>, NOTRACK;
+    def CALL64m_NT : I<0xFF, MRM2m, (outs), (ins i64mem : $dst),
+                       "call{q}\t{*}$dst",
+                       [(X86NoTrackCall(loadi64 addr : $dst))]>,
+                       Requires<[In64BitMode,FavorMemIndirectCall]>, NOTRACK;
+  }
+
+  def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaquemem:$dst),
+                       "lcall{q}\t{*}$dst", []>;
+}
+
+/*
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
+  def TCRETURNdi64   : PseudoI<(outs),
+                        (ins i64i32imm_pcrel:$dst, i32imm:$offset),
+                        []>;
+  def TCRETURNri64   : PseudoI<(outs),
+                        (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable;
+  let mayLoad = 1 in
+  def TCRETURNmi64   : PseudoI<(outs),
+                        (ins i64mem_TC:$dst, i32imm:$offset), []>, NotMemoryFoldable;
+
+  def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst),
+                   "jmp\t$dst", []>;
+
+  def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
+                     "jmp{q}\t{*}$dst", []>;
+
+  let mayLoad = 1 in
+  def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
+                     "jmp{q}\t{*}$dst", []>;
+
+  // Win64 wants indirect jumps leaving the function to have a REX_W prefix.
+  let hasREX_WPrefix = 1 in {
+    def TAILJMPr64_REX : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
+                           "rex64 jmp{q}\t{*}$dst", []>;
+
+    let mayLoad = 1 in
+    def TAILJMPm64_REX : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
+                           "rex64 jmp{q}\t{*}$dst", []>;
+  }
+}
+
+let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
+    Uses = [RSP, SSP],
+    usesCustomInserter = 1,
+    SchedRW = [WriteJump] in {
+  def RETPOLINE_CALL32 :
+    PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
+            Requires<[Not64BitMode,UseRetpoline]>;
+
+  def RETPOLINE_CALL64 :
+    PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
+            Requires<[In64BitMode,UseRetpoline]>;
+
+  // Retpoline variant of indirect tail calls.
+  let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+    def RETPOLINE_TCRETURN64 :
+      PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>;
+    def RETPOLINE_TCRETURN32 :
+      PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;
+  }
+}
+
+// Conditional tail calls are similar to the above, but they are branches
+// rather than barriers, and they use EFLAGS.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
+    isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+  let Uses = [RSP, EFLAGS, SSP] in {
+  def TCRETURNdi64cc : PseudoI<(outs),
+                           (ins i64i32imm_pcrel:$dst, i32imm:$offset,
+                            i32imm:$cond), []>;
+
+  // This gets substituted to a conditional jump instruction in MC lowering.
+  def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
+                           (ins i64i32imm_pcrel:$dst, i32imm:$cond), "", []>;
+}
+*/
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrExtension.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrExtension.td
@@ -0,0 +1,204 @@
+//===-- X86InstrExtension.td - Sign and Zero Extensions ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the sign and zero extension operations.
+//
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0 in {
+  let Defs = [AX], Uses = [AL] in // AX = signext(AL)
+  def CBW : I<0x98, RawFrm, (outs), (ins),
+              "{cbtw|cbw}", []>, OpSize16, Sched<[WriteALU]>;
+  let Defs = [EAX], Uses = [AX] in // EAX = signext(AX)
+  def CWDE : I<0x98, RawFrm, (outs), (ins),
+              "{cwtl|cwde}", []>, OpSize32, Sched<[WriteALU]>;
+
+  let Defs = [AX,DX], Uses = [AX] in // DX:AX = signext(AX)
+  def CWD : I<0x99, RawFrm, (outs), (ins),
+              "{cwtd|cwd}", []>, OpSize16, Sched<[WriteALU]>;
+  let Defs = [EAX,EDX], Uses = [EAX] in // EDX:EAX = signext(EAX)
+  def CDQ : I<0x99, RawFrm, (outs), (ins),
+              "{cltd|cdq}", []>, OpSize32, Sched<[WriteALU]>;
+
+
+  let Defs = [RAX], Uses = [EAX] in // RAX = signext(EAX)
+  def CDQE : RI<0x98, RawFrm, (outs), (ins),
+               "{cltq|cdqe}", []>, Sched<[WriteALU]>;
+
+  let Defs = [RAX,RDX], Uses = [RAX] in // RDX:RAX = signext(RAX)
+  def CQO  : RI<0x99, RawFrm, (outs), (ins),
+                "{cqto|cqo}", []>, Sched<[WriteALU]>;
+}
+
+// Sign/Zero extenders
+let hasSideEffects = 0 in {
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+                   "movs{bw|x}\t{$src, $dst|$dst, $src}", []>,
+                   TB, OpSize16, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+                   "movs{bw|x}\t{$src, $dst|$dst, $src}", []>,
+                   TB, OpSize16, Sched<[WriteALULd]>;
+} // hasSideEffects = 0
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
+                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sext GR8:$src))]>, TB,
+                   OpSize32, Sched<[WriteALU]>;
+def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB,
+                   OpSize32, Sched<[WriteALULd]>;
+def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sext GR16:$src))]>, TB,
+                   OpSize32, Sched<[WriteALU]>;
+def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sextloadi32i16 addr:$src))]>,
+                   OpSize32, TB, Sched<[WriteALULd]>;
+
+let hasSideEffects = 0 in {
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+                   "movz{bw|x}\t{$src, $dst|$dst, $src}", []>,
+                   TB, OpSize16, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+                   "movz{bw|x}\t{$src, $dst|$dst, $src}", []>,
+                   TB, OpSize16, Sched<[WriteALULd]>;
+} // hasSideEffects = 0
+def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zext GR8:$src))]>, TB,
+                   OpSize32, Sched<[WriteALU]>;
+def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB,
+                   OpSize32, Sched<[WriteALULd]>;
+def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zext GR16:$src))]>, TB,
+                   OpSize32, Sched<[WriteALU]>;
+def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zextloadi32i16 addr:$src))]>,
+                   TB, OpSize32, Sched<[WriteALULd]>;
+
+// These instructions exist as a consequence of operand size prefix having
+// control of the destination size, but not the input size. Only support them
+// for the disassembler.
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
+def MOVSX16rr16: I<0xBF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                   "movs{ww|x}\t{$src, $dst|$dst, $src}",
+                   []>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
+def MOVZX16rr16: I<0xB7, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                   "movz{ww|x}\t{$src, $dst|$dst, $src}",
+                   []>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
+let mayLoad = 1 in {
+def MOVSX16rm16: I<0xBF, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                   "movs{ww|x}\t{$src, $dst|$dst, $src}",
+                   []>, OpSize16, TB, Sched<[WriteALULd]>, NotMemoryFoldable;
+def MOVZX16rm16: I<0xB7, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                   "movz{ww|x}\t{$src, $dst|$dst, $src}",
+                   []>, TB, OpSize16, Sched<[WriteALULd]>, NotMemoryFoldable;
+} // mayLoad = 1
+} // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0
+
+// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+let hasSideEffects = 0, isCodeGenOnly = 1 in {
+def MOVZX32rr8_NOREX : I<0xB6, MRMSrcReg,
+                         (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
+                         "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB, OpSize32, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX32rm8_NOREX : I<0xB6, MRMSrcMem,
+                         (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
+                         "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB, OpSize32, Sched<[WriteALULd]>;
+
+def MOVSX32rr8_NOREX : I<0xBE, MRMSrcReg,
+                         (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
+                         "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB, OpSize32, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVSX32rm8_NOREX : I<0xBE, MRMSrcMem,
+                         (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
+                         "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB, OpSize32, Sched<[WriteALULd]>;
+}
+
+// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
+// operand, which makes it a rare instruction with an 8-bit register
+// operand that can never access an h register. If support for h registers
+// were generalized, this would require a special register class.
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR8:$src))]>, TB,
+                    Sched<[WriteALU]>;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i8 addr:$src))]>,
+                    TB, Sched<[WriteALULd]>;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR16:$src))]>, TB,
+                    Sched<[WriteALU]>;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i16 addr:$src))]>,
+                    TB, Sched<[WriteALULd]>;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR32:$src))]>,
+                    Sched<[WriteALU]>, Requires<[In64BitMode]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i32 addr:$src))]>,
+                    Sched<[WriteALULd]>, Requires<[In64BitMode]>;
+
+// movzbq and movzwq encodings for the disassembler
+let hasSideEffects = 0 in {
+def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
+                     "movz{bq|x}\t{$src, $dst|$dst, $src}", []>,
+                     TB, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
+                     "movz{bq|x}\t{$src, $dst|$dst, $src}", []>,
+                     TB, Sched<[WriteALULd]>;
+def MOVZX64rr16 : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                     "movz{wq|x}\t{$src, $dst|$dst, $src}", []>,
+                     TB, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX64rm16 : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                     "movz{wq|x}\t{$src, $dst|$dst, $src}", []>,
+                     TB, Sched<[WriteALULd]>;
+}
+
+// 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a
+// 32-bit register.
+def : Pat<(i64 (zext GR8:$src)),
+          (SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8:$src), sub_32bit)>;
+def : Pat<(zextloadi64i8 addr:$src),
+          (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
+
+def : Pat<(i64 (zext GR16:$src)),
+          (SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16:$src), sub_32bit)>;
+def : Pat<(zextloadi64i16 addr:$src),
+          (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
+
+// The preferred way to do 32-bit-to-64-bit zero extension on x86-64 is to use a
+// SUBREG_TO_REG to utilize implicit zero-extension, however this isn't possible
+// when the 32-bit value is defined by a truncate or is copied from something
+// where the high bits aren't necessarily all zero. In such cases, we fall back
+// to these explicit zext instructions.
+def : Pat<(i64 (zext GR32:$src)),
+          (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src), sub_32bit)>;
+def : Pat<(i64 (zextloadi64i32 addr:$src)),
+          (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFMA.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFMA.td
@@ -0,0 +1,636 @@
+//===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes FMA (Fused Multiply-Add) instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FMA3 - Intel 3 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+// For all FMA opcodes declared in fma3p_rm_* and fma3s_rm_* multiclasses
+// defined below, both the register and memory variants are commutable.
+// For the register form the commutable operands are 1, 2 and 3.
+// For the memory variant the folded operand must be in 3. Thus,
+// in that case, only the operands 1 and 2 can be swapped.
+// Commuting some of operands may require the opcode change.
+// FMA*213*:
+//   operands 1 and 2 (memory & register forms): *213* --> *213*(no changes);
+//   operands 1 and 3 (register forms only):     *213* --> *231*;
+//   operands 2 and 3 (register forms only):     *213* --> *132*.
+// FMA*132*:
+//   operands 1 and 2 (memory & register forms): *132* --> *231*;
+//   operands 1 and 3 (register forms only):     *132* --> *132*(no changes);
+//   operands 2 and 3 (register forms only):     *132* --> *213*.
+// FMA*231*:
+//   operands 1 and 2 (memory & register forms): *231* --> *132*;
+//   operands 1 and 3 (register forms only):     *231* --> *213*;
+//   operands 2 and 3 (register forms only):     *231* --> *231*(no changes).
+
+multiclass fma3p_rm_213<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                        ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
+                        SDNode Op, X86FoldableSchedWrite sched> {
+  def r     : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, RC:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set RC:$dst, (VT (Op RC:$src2, RC:$src1, RC:$src3)))]>,
+                   Sched<[sched]>;
+
+  let mayLoad = 1 in
+  def m     : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, x86memop:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set RC:$dst, (VT (Op RC:$src2, RC:$src1,
+                                          (MemFrag addr:$src3))))]>,
+                   Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+multiclass fma3p_rm_231<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                        ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
+                        SDNode Op, X86FoldableSchedWrite sched> {
+  let hasSideEffects = 0 in
+  def r     : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, RC:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   []>, Sched<[sched]>;
+
+  let mayLoad = 1 in
+  def m     : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, x86memop:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3),
+                                          RC:$src1)))]>,
+                   Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+multiclass fma3p_rm_132<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                        ValueType VT, X86MemOperand x86memop, PatFrag MemFrag,
+                        SDNode Op, X86FoldableSchedWrite sched> {
+  let hasSideEffects = 0 in
+  def r     : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, RC:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   []>, Sched<[sched]>;
+
+  // Pattern is 312 order so that the load is in a different place from the
+  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
+  let mayLoad = 1 in
+  def m     : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, x86memop:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1,
+                                          RC:$src2)))]>,
+                   Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
+multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                       string OpcodeStr, string PackTy, string Suff,
+                       PatFrag MemFrag128, PatFrag MemFrag256,
+                       SDNode Op, ValueType OpTy128, ValueType OpTy256,
+                       X86SchedWriteWidths sched> {
+  defm NAME#213#Suff : fma3p_rm_213<opc213, !strconcat(OpcodeStr, "213", PackTy),
+                                    VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
+  defm NAME#231#Suff : fma3p_rm_231<opc231, !strconcat(OpcodeStr, "231", PackTy),
+                                    VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
+  defm NAME#132#Suff : fma3p_rm_132<opc132, !strconcat(OpcodeStr, "132", PackTy),
+                                    VR128, OpTy128, f128mem, MemFrag128, Op, sched.XMM>;
+
+  defm NAME#213#Suff#Y : fma3p_rm_213<opc213, !strconcat(OpcodeStr, "213", PackTy),
+                                      VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
+                                      VEX_L;
+  defm NAME#231#Suff#Y : fma3p_rm_231<opc231, !strconcat(OpcodeStr, "231", PackTy),
+                                      VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
+                                      VEX_L;
+  defm NAME#132#Suff#Y : fma3p_rm_132<opc132, !strconcat(OpcodeStr, "132", PackTy),
+                                      VR256, OpTy256, f256mem, MemFrag256, Op, sched.YMM>,
+                                      VEX_L;
+}
+
+// Fused Multiply-Add
+let ExeDomain = SSEPackedSingle in {
+  defm VFMADD    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS",
+                               loadv4f32, loadv8f32, X86Fmadd, v4f32, v8f32,
+                               SchedWriteFMA>;
+  defm VFMSUB    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS",
+                               loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32,
+                               SchedWriteFMA>;
+  defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", "PS",
+                               loadv4f32, loadv8f32, X86Fmaddsub, v4f32, v8f32,
+                               SchedWriteFMA>;
+  defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", "PS",
+                               loadv4f32, loadv8f32, X86Fmsubadd, v4f32, v8f32,
+                               SchedWriteFMA>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+  defm VFMADD    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD",
+                               loadv2f64, loadv4f64, X86Fmadd, v2f64,
+                               v4f64, SchedWriteFMA>, VEX_W;
+  defm VFMSUB    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD",
+                               loadv2f64, loadv4f64, X86Fmsub, v2f64,
+                               v4f64, SchedWriteFMA>, VEX_W;
+  defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", "PD",
+                               loadv2f64, loadv4f64, X86Fmaddsub,
+                               v2f64, v4f64, SchedWriteFMA>, VEX_W;
+  defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", "PD",
+                               loadv2f64, loadv4f64, X86Fmsubadd,
+                               v2f64, v4f64, SchedWriteFMA>, VEX_W;
+}
+
+// Fused Negative Multiply-Add
+let ExeDomain = SSEPackedSingle in {
+  defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", "PS", loadv4f32,
+                             loadv8f32, X86Fnmadd, v4f32, v8f32, SchedWriteFMA>;
+  defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", "PS", loadv4f32,
+                             loadv8f32, X86Fnmsub, v4f32, v8f32, SchedWriteFMA>;
+}
+let ExeDomain = SSEPackedDouble in {
+  defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", "PD", loadv2f64,
+                             loadv4f64, X86Fnmadd, v2f64, v4f64, SchedWriteFMA>, VEX_W;
+  defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", "PD", loadv2f64,
+                             loadv4f64, X86Fnmsub, v2f64, v4f64, SchedWriteFMA>, VEX_W;
+}
+
+// All source register operands of FMA opcodes defined in fma3s_rm multiclass
+// can be commuted. In many cases such commute transformation requires an opcode
+// adjustment, for example, commuting the operands 1 and 2 in FMA*132 form
+// would require an opcode change to FMA*231:
+//     FMA*132* reg1, reg2, reg3; // reg1 * reg3 + reg2;
+//     -->
+//     FMA*231* reg2, reg1, reg3; // reg1 * reg3 + reg2;
+// Please see more detailed comment at the very beginning of the section
+// defining FMA3 opcodes above.
+multiclass fma3s_rm_213<bits<8> opc, string OpcodeStr,
+                        X86MemOperand x86memop, RegisterClass RC,
+                        SDPatternOperator OpNode,
+                        X86FoldableSchedWrite sched> {
+  def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
+                (ins RC:$src1, RC:$src2, RC:$src3),
+                !strconcat(OpcodeStr,
+                           "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                [(set RC:$dst, (OpNode RC:$src2, RC:$src1, RC:$src3))]>,
+                Sched<[sched]>;
+
+  let mayLoad = 1 in
+  def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
+                (ins RC:$src1, RC:$src2, x86memop:$src3),
+                !strconcat(OpcodeStr,
+                           "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                [(set RC:$dst,
+                  (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>,
+                Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+multiclass fma3s_rm_231<bits<8> opc, string OpcodeStr,
+                        X86MemOperand x86memop, RegisterClass RC,
+                        SDPatternOperator OpNode, X86FoldableSchedWrite sched> {
+  let hasSideEffects = 0 in
+  def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
+                (ins RC:$src1, RC:$src2, RC:$src3),
+                !strconcat(OpcodeStr,
+                           "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                []>, Sched<[sched]>;
+
+  let mayLoad = 1 in
+  def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
+                (ins RC:$src1, RC:$src2, x86memop:$src3),
+                !strconcat(OpcodeStr,
+                           "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                [(set RC:$dst,
+                  (OpNode RC:$src2, (load addr:$src3), RC:$src1))]>,
+                Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+multiclass fma3s_rm_132<bits<8> opc, string OpcodeStr,
+                        X86MemOperand x86memop, RegisterClass RC,
+                        SDPatternOperator OpNode, X86FoldableSchedWrite sched> {
+  let hasSideEffects = 0 in
+  def r : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
+                (ins RC:$src1, RC:$src2, RC:$src3),
+                !strconcat(OpcodeStr,
+                           "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                []>, Sched<[sched]>;
+
+  // Pattern is 312 order so that the load is in a different place from the
+  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
+  let mayLoad = 1 in
+  def m : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
+                (ins RC:$src1, RC:$src2, x86memop:$src3),
+                !strconcat(OpcodeStr,
+                           "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                [(set RC:$dst,
+                  (OpNode (load addr:$src3), RC:$src1, RC:$src2))]>,
+                Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
+multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                       string OpStr, string PackTy, string Suff,
+                       SDNode OpNode, RegisterClass RC,
+                       X86MemOperand x86memop, X86FoldableSchedWrite sched> {
+  defm NAME#213#Suff : fma3s_rm_213<opc213, !strconcat(OpStr, "213", PackTy),
+                                    x86memop, RC, OpNode, sched>;
+  defm NAME#231#Suff : fma3s_rm_231<opc231, !strconcat(OpStr, "231", PackTy),
+                                    x86memop, RC, OpNode, sched>;
+  defm NAME#132#Suff : fma3s_rm_132<opc132, !strconcat(OpStr, "132", PackTy),
+                                    x86memop, RC, OpNode, sched>;
+}
+
+// These FMA*_Int instructions are defined specially for being used when
+// the scalar FMA intrinsics are lowered to machine instructions, and in that
+// sense, they are similar to existing ADD*_Int, SUB*_Int, MUL*_Int, etc.
+// instructions.
+//
+// All of the FMA*_Int opcodes are defined as commutable here.
+// Commuting the 2nd and 3rd source register operands of FMAs is quite trivial
+// and the corresponding optimizations have been developed.
+// Commuting the 1st operand of FMA*_Int requires some additional analysis,
+// the commute optimization is legal only if all users of FMA*_Int use only
+// the lowest element of the FMA*_Int instruction. Even though such analysis
+// may be not implemented yet we allow the routines doing the actual commute
+// transformation to decide if one or another instruction is commutable or not.
+let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
+    hasSideEffects = 0 in
+multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
+                        Operand memopr, RegisterClass RC,
+                        X86FoldableSchedWrite sched> {
+  def r_Int : FMA3S_Int<opc, MRMSrcReg, (outs RC:$dst),
+                        (ins RC:$src1, RC:$src2, RC:$src3),
+                        !strconcat(OpcodeStr,
+                                   "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                        []>, Sched<[sched]>;
+
+  let mayLoad = 1 in
+  def m_Int : FMA3S_Int<opc, MRMSrcMem, (outs RC:$dst),
+                        (ins RC:$src1, RC:$src2, memopr:$src3),
+                        !strconcat(OpcodeStr,
+                                   "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                        []>, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+}
+
+// The FMA 213 form is created for lowering of scalar FMA intrinscis
+// to machine instructions.
+// The FMA 132 form can trivially be get by commuting the 2nd and 3rd operands
+// of FMA 213 form.
+// The FMA 231 form can be get only by commuting the 1st operand of 213 or 132
+// forms and is possible only after special analysis of all uses of the initial
+// instruction. Such analysis do not exist yet and thus introducing the 231
+// form of FMA*_Int instructions is done using an optimistic assumption that
+// such analysis will be implemented eventually.
+multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                           string OpStr, string PackTy, string Suff,
+                           RegisterClass RC, Operand memop,
+                           X86FoldableSchedWrite sched> {
+  defm NAME#132#Suff : fma3s_rm_int<opc132, !strconcat(OpStr, "132", PackTy),
+                                    memop, RC, sched>;
+  defm NAME#213#Suff : fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
+                                    memop, RC, sched>;
+  defm NAME#231#Suff : fma3s_rm_int<opc231, !strconcat(OpStr, "231", PackTy),
+                                    memop, RC, sched>;
+}
+
+multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                 string OpStr, SDNode OpNode, X86FoldableSchedWrite sched> {
+  let ExeDomain = SSEPackedSingle in
+  defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", OpNode,
+                          FR32, f32mem, sched>,
+              fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", "SS",
+                              VR128, ssmem, sched>;
+
+  let ExeDomain = SSEPackedDouble in
+  defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "SD", OpNode,
+                        FR64, f64mem, sched>,
+              fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", "SD",
+                              VR128, sdmem, sched>, VEX_W;
+}
+
+defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadd,
+                    SchedWriteFMA.Scl>, VEX_LIG;
+defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsub,
+                    SchedWriteFMA.Scl>, VEX_LIG;
+
+defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadd,
+                     SchedWriteFMA.Scl>, VEX_LIG;
+defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsub,
+                     SchedWriteFMA.Scl>, VEX_LIG;
+
+multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
+                               SDNode Move, ValueType VT, ValueType EltVT,
+                               RegisterClass RC, PatFrag mem_frag> {
+  let Predicates = [HasFMA, NoAVX512] in {
+    def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
+                (Op RC:$src2,
+                    (EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
+                    RC:$src3))))),
+              (!cast<Instruction>(Prefix#"213"#Suffix#"r_Int")
+               VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
+               (VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
+
+    def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
+                (Op RC:$src2, RC:$src3,
+                    (EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))),
+              (!cast<Instruction>(Prefix#"231"#Suffix#"r_Int")
+               VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
+               (VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
+
+    def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
+                (Op RC:$src2,
+                    (EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
+                    (mem_frag addr:$src3)))))),
+              (!cast<Instruction>(Prefix#"213"#Suffix#"m_Int")
+               VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
+               addr:$src3)>;
+
+    def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
+                (Op (EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
+                    (mem_frag addr:$src3), RC:$src2))))),
+              (!cast<Instruction>(Prefix#"132"#Suffix#"m_Int")
+               VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
+               addr:$src3)>;
+
+    def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
+                (Op RC:$src2, (mem_frag addr:$src3),
+                    (EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))),
+              (!cast<Instruction>(Prefix#"231"#Suffix#"m_Int")
+               VR128:$src1, (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
+               addr:$src3)>;
+  }
+}
+
+defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
+
+defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
+
+//===----------------------------------------------------------------------===//
+// FMA4 - AMD 4 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                 X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
+                 PatFrag mem_frag, X86FoldableSchedWrite sched> {
+  let isCommutable = 1 in
+  def rr : FMA4S<opc, MRMSrcRegOp4, (outs RC:$dst),
+           (ins RC:$src1, RC:$src2, RC:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set RC:$dst,
+             (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG,
+           Sched<[sched]>;
+  def rm : FMA4S<opc, MRMSrcMemOp4, (outs RC:$dst),
+           (ins RC:$src1, RC:$src2, x86memop:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
+                           (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG,
+           Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+  def mr : FMA4S<opc, MRMSrcMem, (outs RC:$dst),
+           (ins RC:$src1, x86memop:$src2, RC:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set RC:$dst,
+             (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG,
+           Sched<[sched.Folded, ReadAfterLd,
+                  // x86memop:$src2
+                  ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                  ReadDefault,
+                  // RC:$src3
+                  ReadAfterLd]>;
+// For disassembler
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def rr_REV : FMA4S<opc, MRMSrcReg, (outs RC:$dst),
+               (ins RC:$src1, RC:$src2, RC:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
+               VEX_LIG, FoldGenData<NAME#rr>, Sched<[sched]>;
+}
+
+multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
+                     ValueType VT, X86FoldableSchedWrite sched> {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
+  def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2, VR128:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+               []>, VEX_W, VEX_LIG, Sched<[sched]>;
+  let mayLoad = 1 in
+  def rm_Int : FMA4S_Int<opc, MRMSrcMemOp4, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2, memop:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+               []>, VEX_W, VEX_LIG,
+               Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+  let mayLoad = 1 in
+  def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst),
+               (ins VR128:$src1, memop:$src2, VR128:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+               []>,
+               VEX_LIG, Sched<[sched.Folded, ReadAfterLd,
+                               // memop:$src2
+                               ReadDefault, ReadDefault, ReadDefault,
+                               ReadDefault, ReadDefault,
+                               // VR128::$src3
+                               ReadAfterLd]>;
+  def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2, VR128:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+               []>, VEX_LIG, FoldGenData<NAME#rr_Int>, Sched<[sched]>;
+} // isCodeGenOnly = 1
+}
+
+multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                 ValueType OpVT128, ValueType OpVT256,
+                 PatFrag ld_frag128, PatFrag ld_frag256,
+                 X86SchedWriteWidths sched> {
+  let isCommutable = 1 in
+  def rr : FMA4<opc, MRMSrcRegOp4, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+             (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>,
+           VEX_W, Sched<[sched.XMM]>;
+  def rm : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
+                              (ld_frag128 addr:$src3)))]>, VEX_W,
+           Sched<[sched.XMM.Folded, ReadAfterLd, ReadAfterLd]>;
+  def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+             (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>,
+           Sched<[sched.XMM.Folded, ReadAfterLd,
+                  // f128mem:$src2
+                  ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                  ReadDefault,
+                  // VR128::$src3
+                  ReadAfterLd]>;
+  let isCommutable = 1 in
+  def Yrr : FMA4<opc, MRMSrcRegOp4, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2, VR256:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR256:$dst,
+             (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>,
+           VEX_W, VEX_L, Sched<[sched.YMM]>;
+  def Yrm : FMA4<opc, MRMSrcMemOp4, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
+                              (ld_frag256 addr:$src3)))]>, VEX_W, VEX_L,
+           Sched<[sched.YMM.Folded, ReadAfterLd, ReadAfterLd]>;
+  def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+           (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR256:$dst, (OpNode VR256:$src1,
+                              (ld_frag256 addr:$src2), VR256:$src3))]>, VEX_L,
+           Sched<[sched.YMM.Folded, ReadAfterLd,
+                  // f256mem:$src2
+                  ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                  ReadDefault,
+                  // VR256::$src3
+                  ReadAfterLd]>;
+// For disassembler
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
+  def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2, VR128:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
+               Sched<[sched.XMM]>, FoldGenData<NAME#rr>;
+  def Yrr_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
+                (ins VR256:$src1, VR256:$src2, VR256:$src3),
+                !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
+                VEX_L, Sched<[sched.YMM]>, FoldGenData<NAME#Yrr>;
+} // isCodeGenOnly = 1
+}
+
+let ExeDomain = SSEPackedSingle in {
+  // Scalar Instructions
+  defm VFMADDSS4  : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32,
+                          SchedWriteFMA.Scl>,
+                    fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
+                              SchedWriteFMA.Scl>;
+  defm VFMSUBSS4  : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32,
+                          SchedWriteFMA.Scl>,
+                    fma4s_int<0x6E, "vfmsubss", ssmem, v4f32,
+                              SchedWriteFMA.Scl>;
+  defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
+                          X86Fnmadd, loadf32, SchedWriteFMA.Scl>,
+                    fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32,
+                              SchedWriteFMA.Scl>;
+  defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
+                          X86Fnmsub, loadf32, SchedWriteFMA.Scl>,
+                    fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
+                              SchedWriteFMA.Scl>;
+  // Packed Instructions
+  defm VFMADDPS4    : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
+                            loadv4f32, loadv8f32, SchedWriteFMA>;
+  defm VFMSUBPS4    : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
+                            loadv4f32, loadv8f32, SchedWriteFMA>;
+  defm VFNMADDPS4   : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
+                            loadv4f32, loadv8f32, SchedWriteFMA>;
+  defm VFNMSUBPS4   : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
+                            loadv4f32, loadv8f32, SchedWriteFMA>;
+  defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
+                            loadv4f32, loadv8f32, SchedWriteFMA>;
+  defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
+                            loadv4f32, loadv8f32, SchedWriteFMA>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+  // Scalar Instructions
+  defm VFMADDSD4  : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64,
+                          SchedWriteFMA.Scl>,
+                    fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
+                              SchedWriteFMA.Scl>;
+  defm VFMSUBSD4  : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64,
+                          SchedWriteFMA.Scl>,
+                    fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64,
+                              SchedWriteFMA.Scl>;
+  defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
+                          X86Fnmadd, loadf64, SchedWriteFMA.Scl>,
+                    fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64,
+                              SchedWriteFMA.Scl>;
+  defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
+                          X86Fnmsub, loadf64, SchedWriteFMA.Scl>,
+                    fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
+                              SchedWriteFMA.Scl>;
+  // Packed Instructions
+  defm VFMADDPD4    : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
+                            loadv2f64, loadv4f64, SchedWriteFMA>;
+  defm VFMSUBPD4    : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
+                            loadv2f64, loadv4f64, SchedWriteFMA>;
+  defm VFNMADDPD4   : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
+                            loadv2f64, loadv4f64, SchedWriteFMA>;
+  defm VFNMSUBPD4   : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
+                            loadv2f64, loadv4f64, SchedWriteFMA>;
+  defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
+                            loadv2f64, loadv4f64, SchedWriteFMA>;
+  defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
+                            loadv2f64, loadv4f64, SchedWriteFMA>;
+}
+
+multiclass scalar_fma4_patterns<SDNode Op, string Name,
+                               ValueType VT, ValueType EltVT,
+                               RegisterClass RC, PatFrag mem_frag> {
+  let Predicates = [HasFMA4] in {
+    def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
+                                  (Op RC:$src1, RC:$src2, RC:$src3))))),
+              (!cast<Instruction>(Name#"rr_Int")
+               (VT (COPY_TO_REGCLASS RC:$src1, VR128)),
+               (VT (COPY_TO_REGCLASS RC:$src2, VR128)),
+               (VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
+
+    def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
+                                  (Op RC:$src1, RC:$src2,
+                                      (mem_frag addr:$src3)))))),
+              (!cast<Instruction>(Name#"rm_Int")
+               (VT (COPY_TO_REGCLASS RC:$src1, VR128)),
+               (VT (COPY_TO_REGCLASS RC:$src2, VR128)), addr:$src3)>;
+
+    def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
+                                  (Op RC:$src1, (mem_frag addr:$src2),
+                                      RC:$src3))))),
+              (!cast<Instruction>(Name#"mr_Int")
+               (VT (COPY_TO_REGCLASS RC:$src1, VR128)), addr:$src2,
+               (VT (COPY_TO_REGCLASS RC:$src3, VR128)))>;
+  }
+}
+
+defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
+
+defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFPStack.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFPStack.td
@@ -0,0 +1,748 @@
+//===- X86InstrFPStack.td - FPU Instruction Set ------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 x87 FPU instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPStack specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FpGet2    : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
+                                           SDTCisVT<1, f80>]>;
+def SDTX86Fld       : SDTypeProfile<1, 2, [SDTCisFP<0>,
+                                           SDTCisPtrTy<1>,
+                                           SDTCisVT<2, OtherVT>]>;
+def SDTX86Fst       : SDTypeProfile<0, 3, [SDTCisFP<0>,
+                                           SDTCisPtrTy<1>,
+                                           SDTCisVT<2, OtherVT>]>;
+def SDTX86Fild      : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
+                                           SDTCisVT<2, OtherVT>]>;
+def SDTX86Fnstsw    : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
+def SDTX86FpToIMem  : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+
+def SDTX86CwdStore  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def X86fld          : SDNode<"X86ISD::FLD", SDTX86Fld,
+                             [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fst          : SDNode<"X86ISD::FST", SDTX86Fst,
+                             [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+                              SDNPMemOperand]>;
+def X86fild         : SDNode<"X86ISD::FILD", SDTX86Fild,
+                             [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fildflag     : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
+                             [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+                              SDNPMemOperand]>;
+def X86fp_stsw      : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>;
+def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m",          SDTX86CwdStore,
+                             [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+                              SDNPMemOperand]>;
+
+//===----------------------------------------------------------------------===//
+// FPStack pattern fragments
+//===----------------------------------------------------------------------===//
+
+def fpimm0 : FPImmLeaf<fAny, [{
+  return Imm.isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : FPImmLeaf<fAny, [{
+  return Imm.isExactlyValue(-0.0);
+}]>;
+
+def fpimm1 : FPImmLeaf<fAny, [{
+  return Imm.isExactlyValue(+1.0);
+}]>;
+
+def fpimmneg1 : FPImmLeaf<fAny, [{
+  return Imm.isExactlyValue(-1.0);
+}]>;
+
+/*
+// Some 'special' instructions - expanded after instruction selection.
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+  def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
+                              [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
+  def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
+                              [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
+  def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
+                              [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
+  def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
+                              [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
+  def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
+                              [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
+  def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
+                              [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
+  def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
+                              [(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
+  def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
+                              [(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
+  def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
+                              [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
+}
+*/
+
+// All FP Stack operations are represented with four instructions here.  The
+// first three instructions, generated by the instruction selector, use "RFP32"
+// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
+// 64-bit or 80-bit floating point values.  These sizes apply to the values,
+// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
+// copied to each other without losing information.  These instructions are all
+// pseudo instructions and use the "_Fp" suffix.
+// In some cases there are additional variants with a mixture of different
+// register sizes.
+// The second instruction is defined with FPI, which is the actual instruction
+// emitted by the assembler.  These use "RST" registers, although frequently
+// the actual register(s) used are implicit.  These are always 80 bits.
+// The FP stackifier pass converts one to the other after register allocation
+// occurs.
+//
+// Note that the FpI instruction should have instruction selection info (e.g.
+// a pattern) and the FPI instruction should have emission info (e.g. opcode
+// encoding and asm printing info).
+
+// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
+// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
+// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
+// f80 instructions cannot use SSE and use neither of these.
+class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+             FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
+class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+             FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
+
+// Factoring for arithmetic.
+multiclass FPBinary_rr<SDNode OpNode> {
+// Register op register -> register
+// These are separated out because they have no reversed form.
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
+                [(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
+                [(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
+                [(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
+}
+// The FopST0 series are not included here because of the irregularities
+// in where the 'r' goes in assembly output.
+// These instructions cannot address 80-bit memory.
+multiclass FPBinary<SDNode OpNode, Format fp, string asmstring,
+                    bit Forward = 1> {
+let mayLoad = 1, hasSideEffects = 1 in {
+// ST(0) = ST(0) + [mem]
+def _Fp32m  : FpIf32<(outs RFP32:$dst),
+                     (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
+                  [!if(Forward,
+                       (set RFP32:$dst,
+                        (OpNode RFP32:$src1, (loadf32 addr:$src2))),
+                       (set RFP32:$dst,
+                        (OpNode (loadf32 addr:$src2), RFP32:$src1)))]>;
+def _Fp64m  : FpIf64<(outs RFP64:$dst),
+                     (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
+                  [!if(Forward,
+                       (set RFP64:$dst,
+                        (OpNode RFP64:$src1, (loadf64 addr:$src2))),
+                       (set RFP64:$dst,
+                        (OpNode (loadf64 addr:$src2), RFP64:$src1)))]>;
+def _Fp64m32: FpIf64<(outs RFP64:$dst),
+                     (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
+                  [!if(Forward,
+                       (set RFP64:$dst,
+                        (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2)))),
+                       (set RFP64:$dst,
+                        (OpNode (f64 (extloadf32 addr:$src2)), RFP64:$src1)))]>;
+def _Fp80m32: FpI_<(outs RFP80:$dst),
+                   (ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
+                  [!if(Forward,
+                       (set RFP80:$dst,
+                        (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2)))),
+                       (set RFP80:$dst,
+                        (OpNode (f80 (extloadf32 addr:$src2)), RFP80:$src1)))]>;
+def _Fp80m64: FpI_<(outs RFP80:$dst),
+                   (ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
+                  [!if(Forward,
+                       (set RFP80:$dst,
+                        (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))),
+                       (set RFP80:$dst,
+                        (OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>;
+def _F32m  : FPI<0xD8, fp, (outs), (ins f32mem:$src),
+                 !strconcat("f", asmstring, "{s}\t$src")>;
+def _F64m  : FPI<0xDC, fp, (outs), (ins f64mem:$src),
+                 !strconcat("f", asmstring, "{l}\t$src")>;
+// ST(0) = ST(0) + [memint]
+def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
+                       OneArgFPRW,
+                       [!if(Forward,
+                            (set RFP32:$dst,
+                             (OpNode RFP32:$src1, (X86fild addr:$src2, i16))),
+                            (set RFP32:$dst,
+                             (OpNode (X86fild addr:$src2, i16), RFP32:$src1)))]>;
+def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
+                       OneArgFPRW,
+                       [!if(Forward,
+                            (set RFP32:$dst,
+                             (OpNode RFP32:$src1, (X86fild addr:$src2, i32))),
+                            (set RFP32:$dst,
+                             (OpNode (X86fild addr:$src2, i32), RFP32:$src1)))]>;
+def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
+                       OneArgFPRW,
+                       [!if(Forward,
+                            (set RFP64:$dst,
+                             (OpNode RFP64:$src1, (X86fild addr:$src2, i16))),
+                            (set RFP64:$dst,
+                             (OpNode (X86fild addr:$src2, i16), RFP64:$src1)))]>;
+def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
+                       OneArgFPRW,
+                       [!if(Forward,
+                            (set RFP64:$dst,
+                             (OpNode RFP64:$src1, (X86fild addr:$src2, i32))),
+                            (set RFP64:$dst,
+                             (OpNode (X86fild addr:$src2, i32), RFP64:$src1)))]>;
+def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
+                     OneArgFPRW,
+                     [!if(Forward,
+                          (set RFP80:$dst,
+                           (OpNode RFP80:$src1, (X86fild addr:$src2, i16))),
+                          (set RFP80:$dst,
+                           (OpNode (X86fild addr:$src2, i16), RFP80:$src1)))]>;
+def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
+                     OneArgFPRW,
+                     [!if(Forward,
+                          (set RFP80:$dst,
+                           (OpNode RFP80:$src1, (X86fild addr:$src2, i32))),
+                          (set RFP80:$dst,
+                           (OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>;
+def _FI16m  : FPI<0xDE, fp, (outs), (ins i16mem:$src),
+                  !strconcat("fi", asmstring, "{s}\t$src")>;
+def _FI32m  : FPI<0xDA, fp, (outs), (ins i32mem:$src),
+                  !strconcat("fi", asmstring, "{l}\t$src")>;
+} // mayLoad = 1, hasSideEffects = 1
+}
+
+let Defs = [FPSW] in {
+// FPBinary_rr just defines pseudo-instructions, no need to set a scheduling
+// resources.
+let hasNoSchedulingInfo = 1 in {
+defm ADD : FPBinary_rr<fadd>;
+defm SUB : FPBinary_rr<fsub>;
+defm MUL : FPBinary_rr<fmul>;
+defm DIV : FPBinary_rr<fdiv>;
+}
+
+// Sets the scheduling resources for the actual NAME#_F<size>m definitions.
+let SchedRW = [WriteFAddLd] in {
+defm ADD : FPBinary<fadd, MRM0m, "add">;
+defm SUB : FPBinary<fsub, MRM4m, "sub">;
+defm SUBR: FPBinary<fsub ,MRM5m, "subr", 0>;
+}
+
+let SchedRW = [WriteFMulLd] in {
+defm MUL : FPBinary<fmul, MRM1m, "mul">;
+}
+
+let SchedRW = [WriteFDivLd] in {
+defm DIV : FPBinary<fdiv, MRM6m, "div">;
+defm DIVR: FPBinary<fdiv, MRM7m, "divr", 0>;
+}
+} // Defs = [FPSW]
+
+class FPST0rInst<Format fp, string asm>
+  : FPI<0xD8, fp, (outs), (ins RST:$op), asm>;
+class FPrST0Inst<Format fp, string asm>
+  : FPI<0xDC, fp, (outs), (ins RST:$op), asm>;
+class FPrST0PInst<Format fp, string asm>
+  : FPI<0xDE, fp, (outs), (ins RST:$op), asm>;
+
+// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
+// of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
+// we have to put some 'r's in and take them out of weird places.
+let SchedRW = [WriteFAdd] in {
+def ADD_FST0r   : FPST0rInst <MRM0r, "fadd\t$op">;
+def ADD_FrST0   : FPrST0Inst <MRM0r, "fadd\t{%st(0), $op|$op, st(0)}">;
+def ADD_FPrST0  : FPrST0PInst<MRM0r, "faddp\t$op">;
+def SUBR_FST0r  : FPST0rInst <MRM5r, "fsubr\t$op">;
+def SUB_FrST0   : FPrST0Inst <MRM5r, "fsub{r}\t{%st(0), $op|$op, st(0)}">;
+def SUB_FPrST0  : FPrST0PInst<MRM5r, "fsub{r}p\t$op">;
+def SUB_FST0r   : FPST0rInst <MRM4r, "fsub\t$op">;
+def SUBR_FrST0  : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">;
+def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">;
+} // SchedRW
+let SchedRW = [WriteFCom] in {
+def COM_FST0r   : FPST0rInst <MRM2r, "fcom\t$op">;
+def COMP_FST0r  : FPST0rInst <MRM3r, "fcomp\t$op">;
+} // SchedRW
+let SchedRW = [WriteFMul] in {
+def MUL_FST0r   : FPST0rInst <MRM1r, "fmul\t$op">;
+def MUL_FrST0   : FPrST0Inst <MRM1r, "fmul\t{%st(0), $op|$op, st(0)}">;
+def MUL_FPrST0  : FPrST0PInst<MRM1r, "fmulp\t$op">;
+} // SchedRW
+let SchedRW = [WriteFDiv] in {
+def DIVR_FST0r  : FPST0rInst <MRM7r, "fdivr\t$op">;
+def DIV_FrST0   : FPrST0Inst <MRM7r, "fdiv{r}\t{%st(0), $op|$op, st(0)}">;
+def DIV_FPrST0  : FPrST0PInst<MRM7r, "fdiv{r}p\t$op">;
+def DIV_FST0r   : FPST0rInst <MRM6r, "fdiv\t$op">;
+def DIVR_FrST0  : FPrST0Inst <MRM6r, "fdiv{|r}\t{%st(0), $op|$op, st(0)}">;
+def DIVR_FPrST0 : FPrST0PInst<MRM6r, "fdiv{|r}p\t$op">;
+} // SchedRW
+
+// Unary operations.
+multiclass FPUnary<SDNode OpNode, Format fp, string asmstring> {
+def _Fp32  : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
+                 [(set RFP32:$dst, (OpNode RFP32:$src))]>;
+def _Fp64  : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
+                 [(set RFP64:$dst, (OpNode RFP64:$src))]>;
+def _Fp80  : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
+                 [(set RFP80:$dst, (OpNode RFP80:$src))]>;
+def _F     : FPI<0xD9, fp, (outs), (ins), asmstring>;
+}
+
+let Defs = [FPSW] in {
+
+let SchedRW = [WriteFSign] in {
+defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
+defm ABS : FPUnary<fabs, MRM_E1, "fabs">;
+}
+
+let SchedRW = [WriteFSqrt80] in
+defm SQRT: FPUnary<fsqrt,MRM_FA, "fsqrt">;
+
+let SchedRW = [WriteMicrocoded] in {
+defm SIN : FPUnary<fsin, MRM_FE, "fsin">;
+defm COS : FPUnary<fcos, MRM_FF, "fcos">;
+}
+
+let SchedRW = [WriteFCom] in {
+let hasSideEffects = 0 in {
+def TST_Fp32  : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
+def TST_Fp64  : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
+def TST_Fp80  : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
+} // hasSideEffects
+
+def TST_F  : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
+} // SchedRW
+} // Defs = [FPSW]
+
+// Versions of FP instructions that take a single memory operand.  Added for the
+//   disassembler; remove as they are included with patterns elsewhere.
+let SchedRW = [WriteFComLd] in {
+def FCOM32m  : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
+def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
+
+def FCOM64m  : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">;
+def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
+
+def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
+def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
+
+def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
+def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
+} // SchedRW
+
+let SchedRW = [WriteMicrocoded] in {
+def FLDENVm  : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
+def FSTENVm  : FPI<0xD9, MRM6m, (outs), (ins f32mem:$dst), "fnstenv\t$dst">;
+
+def FRSTORm  : FPI<0xDD, MRM4m, (outs), (ins f32mem:$dst), "frstor\t$dst">;
+def FSAVEm   : FPI<0xDD, MRM6m, (outs), (ins f32mem:$dst), "fnsave\t$dst">;
+def FNSTSWm  : FPI<0xDD, MRM7m, (outs), (ins i16mem:$dst), "fnstsw\t$dst">;
+
+def FBLDm    : FPI<0xDF, MRM4m, (outs), (ins f80mem:$src), "fbld\ttbyte ptr $src">;
+def FBSTPm   : FPI<0xDF, MRM6m, (outs), (ins f80mem:$dst), "fbstp\ttbyte ptr $dst">;
+} // SchedRW
+
+// Floating point cmovs.
+class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32, HasCMov]>;
+class FpIf64CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64, HasCMov]>;
+
+multiclass FPCMov<PatLeaf cc> {
+  def _Fp32  : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
+                       CondMovFP,
+                     [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
+                                        cc, EFLAGS))]>;
+  def _Fp64  : FpIf64CMov<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
+                       CondMovFP,
+                     [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+                                        cc, EFLAGS))]>;
+  def _Fp80  : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
+                     CondMovFP,
+                     [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
+                                        cc, EFLAGS))]>,
+                                        Requires<[HasCMov]>;
+}
+
+let Defs = [FPSW] in {
+let SchedRW = [WriteFCMOV] in {
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
+defm CMOVB  : FPCMov<X86_COND_B>;
+defm CMOVBE : FPCMov<X86_COND_BE>;
+defm CMOVE  : FPCMov<X86_COND_E>;
+defm CMOVP  : FPCMov<X86_COND_P>;
+defm CMOVNB : FPCMov<X86_COND_AE>;
+defm CMOVNBE: FPCMov<X86_COND_A>;
+defm CMOVNE : FPCMov<X86_COND_NE>;
+defm CMOVNP : FPCMov<X86_COND_NP>;
+} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
+
+let Predicates = [HasCMov] in {
+// These are not factored because there's no clean way to pass DA/DB.
+def CMOVB_F  : FPI<0xDA, MRM0r, (outs), (ins RST:$op),
+                  "fcmovb\t{$op, %st(0)|st(0), $op}">;
+def CMOVBE_F : FPI<0xDA, MRM2r, (outs), (ins RST:$op),
+                  "fcmovbe\t{$op, %st(0)|st(0), $op}">;
+def CMOVE_F  : FPI<0xDA, MRM1r, (outs), (ins RST:$op),
+                  "fcmove\t{$op, %st(0)|st(0), $op}">;
+def CMOVP_F  : FPI<0xDA, MRM3r, (outs), (ins RST:$op),
+                  "fcmovu\t{$op, %st(0)|st(0), $op}">;
+def CMOVNB_F : FPI<0xDB, MRM0r, (outs), (ins RST:$op),
+                  "fcmovnb\t{$op, %st(0)|st(0), $op}">;
+def CMOVNBE_F: FPI<0xDB, MRM2r, (outs), (ins RST:$op),
+                  "fcmovnbe\t{$op, %st(0)|st(0), $op}">;
+def CMOVNE_F : FPI<0xDB, MRM1r, (outs), (ins RST:$op),
+                  "fcmovne\t{$op, %st(0)|st(0), $op}">;
+def CMOVNP_F : FPI<0xDB, MRM3r, (outs), (ins RST:$op),
+                  "fcmovnu\t{$op, %st(0)|st(0), $op}">;
+} // Predicates = [HasCMov]
+} // SchedRW
+
+// Floating point loads & stores.
+let SchedRW = [WriteLoad] in {
+let canFoldAsLoad = 1 in {
+def LD_Fp32m   : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (loadf32 addr:$src))]>;
+let isReMaterializable = 1 in
+  def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (loadf64 addr:$src))]>;
+def LD_Fp80m   : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (loadf80 addr:$src))]>;
+} // canFoldAsLoad
+def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
+def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
+def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
+def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
+} // SchedRW
+
+let SchedRW = [WriteStore] in {
+def ST_Fp32m   : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
+                  [(store RFP32:$src, addr:$op)]>;
+def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
+                  [(truncstoref32 RFP64:$src, addr:$op)]>;
+def ST_Fp64m   : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
+                  [(store RFP64:$src, addr:$op)]>;
+def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
+                  [(truncstoref32 RFP80:$src, addr:$op)]>;
+def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP,
+                  [(truncstoref64 RFP80:$src, addr:$op)]>;
+// FST does not support 80-bit memory target; FSTP must be used.
+
+let mayStore = 1, hasSideEffects = 0 in {
+def ST_FpP32m    : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
+def ST_FpP64m32  : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP64m    : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP80m32  : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
+def ST_FpP80m64  : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
+} // mayStore
+
+def ST_FpP80m    : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
+                    [(store RFP80:$src, addr:$op)]>;
+
+let mayStore = 1, hasSideEffects = 0 in {
+def IST_Fp16m32  : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp32m32  : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp64m32  : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp16m64  : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp32m64  : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp64m64  : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp16m80  : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp32m80  : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp64m80  : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
+} // mayStore
+} // SchedRW
+
+let mayLoad = 1, SchedRW = [WriteLoad] in {
+def LD_F32m   : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
+def LD_F64m   : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
+def LD_F80m   : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
+def ILD_F16m  : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
+def ILD_F32m  : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
+def ILD_F64m  : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
+}
+let mayStore = 1, SchedRW = [WriteStore] in {
+def ST_F32m   : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
+def ST_F64m   : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
+def ST_FP32m  : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
+def ST_FP64m  : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">;
+def ST_FP80m  : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">;
+def IST_F16m  : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">;
+def IST_F32m  : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">;
+def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">;
+def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">;
+def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
+}
+
+// FISTTP requires SSE3 even though it's a FPStack op.
+let Predicates = [HasSSE3], SchedRW = [WriteStore] in {
+def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
+                    [(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
+def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
+                    [(X86fp_to_i32mem RFP32:$src, addr:$op)]>;
+def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
+                    [(X86fp_to_i64mem RFP32:$src, addr:$op)]>;
+def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
+                    [(X86fp_to_i16mem RFP64:$src, addr:$op)]>;
+def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
+                    [(X86fp_to_i32mem RFP64:$src, addr:$op)]>;
+def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
+                    [(X86fp_to_i64mem RFP64:$src, addr:$op)]>;
+def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
+                    [(X86fp_to_i16mem RFP80:$src, addr:$op)]>;
+def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
+                    [(X86fp_to_i32mem RFP80:$src, addr:$op)]>;
+def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
+                    [(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
+} // Predicates = [HasSSE3]
+
+let mayStore = 1, SchedRW = [WriteStore] in {
+def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
+def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
+def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">;
+}
+
+// FP Stack manipulation instructions.
+let SchedRW = [WriteMove] in {
+def LD_Frr   : FPI<0xD9, MRM0r, (outs), (ins RST:$op), "fld\t$op">;
+def ST_Frr   : FPI<0xDD, MRM2r, (outs), (ins RST:$op), "fst\t$op">;
+def ST_FPrr  : FPI<0xDD, MRM3r, (outs), (ins RST:$op), "fstp\t$op">;
+def XCH_F    : FPI<0xD9, MRM1r, (outs), (ins RST:$op), "fxch\t$op">;
+}
+
+// Floating point constant loads.
+let isReMaterializable = 1, SchedRW = [WriteZero] in {
+def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+                [(set RFP32:$dst, fpimm0)]>;
+def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+                [(set RFP32:$dst, fpimm1)]>;
+def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+                [(set RFP64:$dst, fpimm0)]>;
+def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+                [(set RFP64:$dst, fpimm1)]>;
+def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+                [(set RFP80:$dst, fpimm0)]>;
+def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+                [(set RFP80:$dst, fpimm1)]>;
+}
+
+let SchedRW = [WriteFLD0] in
+def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
+
+let SchedRW = [WriteFLD1] in
+def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
+
+let SchedRW = [WriteFLDC], Defs = [FPSW] in {
+def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
+def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
+def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
+def FLDLG2 : I<0xD9, MRM_EC, (outs), (ins), "fldlg2", []>;
+def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", []>;
+} // SchedRW
+
+// Floating point compares.
+let SchedRW = [WriteFCom] in {
+def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+                        [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>;
+def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+                        [(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>;
+def UCOM_Fpr80 : FpI_  <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+                        [(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>;
+} // SchedRW
+} // Defs = [FPSW]
+
+let SchedRW = [WriteFCom] in {
+// CC = ST(0) cmp ST(i)
+let Defs = [EFLAGS, FPSW] in {
+def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
+def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
+def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
+}
+
+let Defs = [FPSW], Uses = [ST0] in {
+def UCOM_Fr    : FPI<0xDD, MRM4r,    // FPSW = cmp ST(0) with ST(i)
+                    (outs), (ins RST:$reg), "fucom\t$reg">;
+def UCOM_FPr   : FPI<0xDD, MRM5r,    // FPSW = cmp ST(0) with ST(i), pop
+                    (outs), (ins RST:$reg), "fucomp\t$reg">;
+def UCOM_FPPr  : FPI<0xDA, MRM_E9,       // cmp ST(0) with ST(1), pop, pop
+                    (outs), (ins), "fucompp">;
+}
+
+let Defs = [EFLAGS, FPSW], Uses = [ST0] in {
+def UCOM_FIr   : FPI<0xDB, MRM5r,     // CC = cmp ST(0) with ST(i)
+                    (outs), (ins RST:$reg), "fucomi\t$reg">;
+def UCOM_FIPr  : FPI<0xDF, MRM5r,     // CC = cmp ST(0) with ST(i), pop
+                    (outs), (ins RST:$reg), "fucompi\t$reg">;
+}
+
+let Defs = [EFLAGS, FPSW] in {
+def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RST:$reg), "fcomi\t$reg">;
+def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RST:$reg), "fcompi\t$reg">;
+}
+} // SchedRW
+
+// Floating point flag ops.
+let SchedRW = [WriteALU] in {
+let Defs = [AX], Uses = [FPSW] in
+def FNSTSW16r : I<0xDF, MRM_E0,                  // AX = fp flags
+                  (outs), (ins), "fnstsw\t{%ax|ax}",
+                  [(set AX, (X86fp_stsw FPSW))]>;
+let Defs = [FPSW] in
+def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
+                  (outs), (ins i16mem:$dst), "fnstcw\t$dst",
+                  [(X86fp_cwd_get16 addr:$dst)]>;
+} // SchedRW
+let Defs = [FPSW], mayLoad = 1 in
+def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
+                  (outs), (ins i16mem:$dst), "fldcw\t$dst", []>,
+                Sched<[WriteLoad]>;
+
+// FPU control instructions
+let SchedRW = [WriteMicrocoded] in {
+let Defs = [FPSW] in {
+def FNINIT : I<0xDB, MRM_E3, (outs), (ins), "fninit", []>;
+def FFREE : FPI<0xDD, MRM0r, (outs), (ins RST:$reg), "ffree\t$reg">;
+def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RST:$reg), "ffreep\t$reg">;
+
+def FPNCEST0r : FPI<0xD9, MRM3r, (outs RST:$op), (ins),
+                  "fstpnce\t{%st(0), $op|$op, st(0)}">;
+
+def FENI8087_NOP : I<0xDB, MRM_E0, (outs), (ins), "feni8087_nop", []>;
+
+def FDISI8087_NOP : I<0xDB, MRM_E1, (outs), (ins), "fdisi8087_nop", []>;
+
+// Clear exceptions
+def FNCLEX : I<0xDB, MRM_E2, (outs), (ins), "fnclex", []>;
+} // Defs = [FPSW]
+} // SchedRW
+
+// Operand-less floating-point instructions for the disassembler.
+def FNOP : I<0xD9, MRM_D0, (outs), (ins), "fnop", []>, Sched<[WriteNop]>;
+
+let SchedRW = [WriteMicrocoded] in {
+let Defs = [FPSW] in {
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
+def FXAM : I<0xD9, MRM_E5, (outs), (ins), "fxam", []>;
+def F2XM1 : I<0xD9, MRM_F0, (outs), (ins), "f2xm1", []>;
+def FYL2X : I<0xD9, MRM_F1, (outs), (ins), "fyl2x", []>;
+def FPTAN : I<0xD9, MRM_F2, (outs), (ins), "fptan", []>;
+def FPATAN : I<0xD9, MRM_F3, (outs), (ins), "fpatan", []>;
+def FXTRACT : I<0xD9, MRM_F4, (outs), (ins), "fxtract", []>;
+def FPREM1 : I<0xD9, MRM_F5, (outs), (ins), "fprem1", []>;
+def FDECSTP : I<0xD9, MRM_F6, (outs), (ins), "fdecstp", []>;
+def FINCSTP : I<0xD9, MRM_F7, (outs), (ins), "fincstp", []>;
+def FPREM : I<0xD9, MRM_F8, (outs), (ins), "fprem", []>;
+def FYL2XP1 : I<0xD9, MRM_F9, (outs), (ins), "fyl2xp1", []>;
+def FSINCOS : I<0xD9, MRM_FB, (outs), (ins), "fsincos", []>;
+def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", []>;
+def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", []>;
+def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", []>;
+} // Defs = [FPSW]
+
+def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
+             "fxsave\t$dst", [(int_x86_fxsave addr:$dst)]>, TB,
+             Requires<[HasFXSR]>;
+def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaquemem:$dst),
+               "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)]>,
+               TB, Requires<[HasFXSR, In64BitMode]>;
+def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaquemem:$src),
+              "fxrstor\t$src", [(int_x86_fxrstor addr:$src)]>,
+              TB, Requires<[HasFXSR]>;
+def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaquemem:$src),
+                "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)]>,
+                TB, Requires<[HasFXSR, In64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Required for RET of f32 / f64 / f80 values.
+def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
+def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
+def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
+
+// Required for CALL which return f32 / f64 / f80 values.
+def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
+                                                          RFP64:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
+                                                          RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
+                                                          RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
+                                                         RFP80:$src)>;
+
+// Floating point constant -0.0 and -1.0
+def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
+def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
+def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
+def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
+def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
+def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
+
+// Used to conv. i64 to f64 since there isn't a SSE version.
+def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
+
+// FP extensions map onto simple pseudo-value conversions if they are to/from
+// the FP stack.
+def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
+          Requires<[FPStackf32]>;
+def : Pat<(f80 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
+           Requires<[FPStackf32]>;
+def : Pat<(f80 (fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
+           Requires<[FPStackf64]>;
+
+// FP truncations map onto simple pseudo-value conversions if they are to/from
+// the FP stack.  We have validated that only value-preserving truncations make
+// it through isel.
+def : Pat<(f32 (fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
+          Requires<[FPStackf32]>;
+def : Pat<(f32 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
+           Requires<[FPStackf32]>;
+def : Pat<(f64 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
+           Requires<[FPStackf64]>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFormats.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFormats.td
@@ -0,0 +1,993 @@
+//===-- X86InstrFormats.td - X86 Instruction Formats -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<7> val> {
+  bits<7> Value = val;
+}
+
+def Pseudo        : Format<0>;
+def RawFrm        : Format<1>;
+def AddRegFrm     : Format<2>;
+def RawFrmMemOffs : Format<3>;
+def RawFrmSrc     : Format<4>;
+def RawFrmDst     : Format<5>;
+def RawFrmDstSrc  : Format<6>;
+def RawFrmImm8    : Format<7>;
+def RawFrmImm16   : Format<8>;
+def MRMDestMem     : Format<32>;
+def MRMSrcMem      : Format<33>;
+def MRMSrcMem4VOp3 : Format<34>;
+def MRMSrcMemOp4   : Format<35>;
+def MRMXm  : Format<39>;
+def MRM0m  : Format<40>;  def MRM1m  : Format<41>;  def MRM2m  : Format<42>;
+def MRM3m  : Format<43>;  def MRM4m  : Format<44>;  def MRM5m  : Format<45>;
+def MRM6m  : Format<46>;  def MRM7m  : Format<47>;
+def MRMDestReg     : Format<48>;
+def MRMSrcReg      : Format<49>;
+def MRMSrcReg4VOp3 : Format<50>;
+def MRMSrcRegOp4   : Format<51>;
+def MRMXr  : Format<55>;
+def MRM0r  : Format<56>;  def MRM1r  : Format<57>;  def MRM2r  : Format<58>;
+def MRM3r  : Format<59>;  def MRM4r  : Format<60>;  def MRM5r  : Format<61>;
+def MRM6r  : Format<62>;  def MRM7r  : Format<63>;
+def MRM_C0 : Format<64>;  def MRM_C1 : Format<65>;  def MRM_C2 : Format<66>;
+def MRM_C3 : Format<67>;  def MRM_C4 : Format<68>;  def MRM_C5 : Format<69>;
+def MRM_C6 : Format<70>;  def MRM_C7 : Format<71>;  def MRM_C8 : Format<72>;
+def MRM_C9 : Format<73>;  def MRM_CA : Format<74>;  def MRM_CB : Format<75>;
+def MRM_CC : Format<76>;  def MRM_CD : Format<77>;  def MRM_CE : Format<78>;
+def MRM_CF : Format<79>;  def MRM_D0 : Format<80>;  def MRM_D1 : Format<81>;
+def MRM_D2 : Format<82>;  def MRM_D3 : Format<83>;  def MRM_D4 : Format<84>;
+def MRM_D5 : Format<85>;  def MRM_D6 : Format<86>;  def MRM_D7 : Format<87>;
+def MRM_D8 : Format<88>;  def MRM_D9 : Format<89>;  def MRM_DA : Format<90>;
+def MRM_DB : Format<91>;  def MRM_DC : Format<92>;  def MRM_DD : Format<93>;
+def MRM_DE : Format<94>;  def MRM_DF : Format<95>;  def MRM_E0 : Format<96>;
+def MRM_E1 : Format<97>;  def MRM_E2 : Format<98>;  def MRM_E3 : Format<99>;
+def MRM_E4 : Format<100>; def MRM_E5 : Format<101>; def MRM_E6 : Format<102>;
+def MRM_E7 : Format<103>; def MRM_E8 : Format<104>; def MRM_E9 : Format<105>;
+def MRM_EA : Format<106>; def MRM_EB : Format<107>; def MRM_EC : Format<108>;
+def MRM_ED : Format<109>; def MRM_EE : Format<110>; def MRM_EF : Format<111>;
+def MRM_F0 : Format<112>; def MRM_F1 : Format<113>; def MRM_F2 : Format<114>;
+def MRM_F3 : Format<115>; def MRM_F4 : Format<116>; def MRM_F5 : Format<117>;
+def MRM_F6 : Format<118>; def MRM_F7 : Format<119>; def MRM_F8 : Format<120>;
+def MRM_F9 : Format<121>; def MRM_FA : Format<122>; def MRM_FB : Format<123>;
+def MRM_FC : Format<124>; def MRM_FD : Format<125>; def MRM_FE : Format<126>;
+def MRM_FF : Format<127>;
+
+// ImmType - This specifies the immediate type used by an instruction. This is
+// part of the ad-hoc solution used to emit machine instruction encodings by our
+// machine code emitter.
+class ImmType<bits<4> val> {
+  bits<4> Value = val;
+}
+def NoImm      : ImmType<0>;
+def Imm8       : ImmType<1>;
+def Imm8PCRel  : ImmType<2>;
+def Imm8Reg    : ImmType<3>; // Register encoded in [7:4].
+def Imm16      : ImmType<4>;
+def Imm16PCRel : ImmType<5>;
+def Imm32      : ImmType<6>;
+def Imm32PCRel : ImmType<7>;
+def Imm32S     : ImmType<8>;
+def Imm64      : ImmType<9>;
+
+// FPFormat - This specifies what form this FP instruction has.  This is used by
+// the Floating-Point stackifier pass.
+class FPFormat<bits<3> val> {
+  bits<3> Value = val;
+}
+def NotFP      : FPFormat<0>;
+def ZeroArgFP  : FPFormat<1>;
+def OneArgFP   : FPFormat<2>;
+def OneArgFPRW : FPFormat<3>;
+def TwoArgFP   : FPFormat<4>;
+def CompareFP  : FPFormat<5>;
+def CondMovFP  : FPFormat<6>;
+def SpecialFP  : FPFormat<7>;
+
+// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
+// Keep in sync with tables in X86InstrInfo.cpp.
+class Domain<bits<2> val> {
+  bits<2> Value = val;
+}
+def GenericDomain   : Domain<0>;
+def SSEPackedSingle : Domain<1>;
+def SSEPackedDouble : Domain<2>;
+def SSEPackedInt    : Domain<3>;
+
+// Class specifying the vector form of the decompressed
+// displacement of 8-bit.
+class CD8VForm<bits<3> val> {
+  bits<3> Value = val;
+}
+def CD8VF  : CD8VForm<0>;  // v := VL
+def CD8VH  : CD8VForm<1>;  // v := VL/2
+def CD8VQ  : CD8VForm<2>;  // v := VL/4
+def CD8VO  : CD8VForm<3>;  // v := VL/8
+// The tuple (subvector) forms.
+def CD8VT1 : CD8VForm<4>;  // v := 1
+def CD8VT2 : CD8VForm<5>;  // v := 2
+def CD8VT4 : CD8VForm<6>;  // v := 4
+def CD8VT8 : CD8VForm<7>;  // v := 8
+
+// Class specifying the prefix used an opcode extension.
+class Prefix<bits<3> val> {
+  bits<3> Value = val;
+}
+def NoPrfx : Prefix<0>;
+def PD     : Prefix<1>;
+def XS     : Prefix<2>;
+def XD     : Prefix<3>;
+def PS     : Prefix<4>; // Similar to NoPrfx, but disassembler uses this to know
+                        // that other instructions with this opcode use PD/XS/XD
+                        // and if any of those is not supported they shouldn't
+                        // decode to this instruction. e.g. ANDSS/ANDSD don't
+                        // exist, but the 0xf2/0xf3 encoding shouldn't
+                        // disable to ANDPS.
+
+// Class specifying the opcode map.
+class Map<bits<3> val> {
+  bits<3> Value = val;
+}
+def OB        : Map<0>;
+def TB        : Map<1>;
+def T8        : Map<2>;
+def TA        : Map<3>;
+def XOP8      : Map<4>;
+def XOP9      : Map<5>;
+def XOPA      : Map<6>;
+def ThreeDNow : Map<7>;
+
+// Class specifying the encoding
+class Encoding<bits<2> val> {
+  bits<2> Value = val;
+}
+def EncNormal : Encoding<0>;
+def EncVEX    : Encoding<1>;
+def EncXOP    : Encoding<2>;
+def EncEVEX   : Encoding<3>;
+
+// Operand size for encodings that change based on mode.
+class OperandSize<bits<2> val> {
+  bits<2> Value = val;
+}
+def OpSizeFixed  : OperandSize<0>; // Never needs a 0x66 prefix.
+def OpSize16     : OperandSize<1>; // Needs 0x66 prefix in 32-bit mode.
+def OpSize32     : OperandSize<2>; // Needs 0x66 prefix in 16-bit mode.
+
+// Address size for encodings that change based on mode.
+class AddressSize<bits<2> val> {
+  bits<2> Value = val;
+}
+def AdSizeX  : AddressSize<0>; // Address size determined using addr operand.
+def AdSize16 : AddressSize<1>; // Encodes a 16-bit address.
+def AdSize32 : AddressSize<2>; // Encodes a 32-bit address.
+def AdSize64 : AddressSize<3>; // Encodes a 64-bit address.
+
+// Prefix byte classes which are used to indicate to the ad-hoc machine code
+// emitter that various prefix bytes are required.
+class OpSize16 { OperandSize OpSize = OpSize16; }
+class OpSize32 { OperandSize OpSize = OpSize32; }
+class AdSize16 { AddressSize AdSize = AdSize16; }
+class AdSize32 { AddressSize AdSize = AdSize32; }
+class AdSize64 { AddressSize AdSize = AdSize64; }
+class REX_W  { bit hasREX_WPrefix = 1; }
+class LOCK   { bit hasLockPrefix = 1; }
+class REP    { bit hasREPPrefix = 1; }
+class TB     { Map OpMap = TB; }
+class T8     { Map OpMap = T8; }
+class TA     { Map OpMap = TA; }
+class XOP8   { Map OpMap = XOP8; Prefix OpPrefix = PS; }
+class XOP9   { Map OpMap = XOP9; Prefix OpPrefix = PS; }
+class XOPA   { Map OpMap = XOPA; Prefix OpPrefix = PS; }
+class ThreeDNow { Map OpMap = ThreeDNow; }
+class OBXS   { Prefix OpPrefix = XS; }
+class PS   : TB { Prefix OpPrefix = PS; }
+class PD   : TB { Prefix OpPrefix = PD; }
+class XD   : TB { Prefix OpPrefix = XD; }
+class XS   : TB { Prefix OpPrefix = XS; }
+class T8PS : T8 { Prefix OpPrefix = PS; }
+class T8PD : T8 { Prefix OpPrefix = PD; }
+class T8XD : T8 { Prefix OpPrefix = XD; }
+class T8XS : T8 { Prefix OpPrefix = XS; }
+class TAPS : TA { Prefix OpPrefix = PS; }
+class TAPD : TA { Prefix OpPrefix = PD; }
+class TAXD : TA { Prefix OpPrefix = XD; }
+class VEX    { Encoding OpEnc = EncVEX; }
+class VEX_W    { bits<2> VEX_WPrefix = 1; }
+class VEX_WIG  { bits<2> VEX_WPrefix = 2; }
+// Special version of VEX_W that can be changed to VEX.W==0 for EVEX2VEX.
+// FIXME: We should consider adding separate bits for VEX_WIG and the extra
+// part of W1X. This would probably simplify the tablegen emitters and
+// the TSFlags creation below.
+class VEX_W1X  { bits<2> VEX_WPrefix = 3; }
+class VEX_4V : VEX { bit hasVEX_4V = 1; }
+class VEX_L  { bit hasVEX_L = 1; }
+class VEX_LIG { bit ignoresVEX_L = 1; }
+class EVEX   { Encoding OpEnc = EncEVEX; }
+class EVEX_4V : EVEX { bit hasVEX_4V = 1; }
+class EVEX_K { bit hasEVEX_K = 1; }
+class EVEX_KZ : EVEX_K { bit hasEVEX_Z = 1; }
+class EVEX_B { bit hasEVEX_B = 1; }
+class EVEX_RC { bit hasEVEX_RC = 1; }
+class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; }
+class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; }
+class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; }
+class NOTRACK { bit hasNoTrackPrefix = 1; }
+
+// Specify AVX512 8-bit compressed displacement encoding based on the vector
+// element size in bits (8, 16, 32, 64) and the CDisp8 form.
+class EVEX_CD8<int esize, CD8VForm form> {
+  int CD8_EltSize = !srl(esize, 3);
+  bits<3> CD8_Form = form.Value;
+}
+
+class XOP { Encoding OpEnc = EncXOP; }
+class XOP_4V : XOP { bit hasVEX_4V = 1; }
+
+// Specify the alternative register form instruction to replace the current
+// instruction in case it was picked during generation of memory folding tables
+class FoldGenData<string _RegisterForm> {
+  string FoldGenRegForm = _RegisterForm;
+}
+
+// Provide a specific instruction to be used by the EVEX2VEX conversion.
+class EVEX2VEXOverride<string VEXInstrName> {
+  string EVEX2VEXOverride = VEXInstrName;
+}
+
+// Mark the instruction as "illegal to memory fold/unfold"
+class NotMemoryFoldable { bit isMemoryFoldable = 0; }
+
+// Prevent EVEX->VEX conversion from considering this instruction.
+class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; }
+
+class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
+              string AsmStr, Domain d = GenericDomain>
+  : Instruction {
+  let Namespace = "X86";
+
+  bits<8> Opcode = opcod;
+  Format Form = f;
+  bits<7> FormBits = Form.Value;
+  ImmType ImmT = i;
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  string AsmString = AsmStr;
+
+  // If this is a pseudo instruction, mark it isCodeGenOnly.
+  let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+  //
+  // Attributes specific to X86 instructions...
+  //
+  bit ForceDisassemble = 0; // Force instruction to disassemble even though it's
+                            // isCodeGenonly. Needed to hide an ambiguous
+                            // AsmString from the parser, but still disassemble.
+
+  OperandSize OpSize = OpSizeFixed; // Does this instruction's encoding change
+                                    // based on operand size of the mode?
+  bits<2> OpSizeBits = OpSize.Value;
+  AddressSize AdSize = AdSizeX; // Does this instruction's encoding change
+                                // based on address size of the mode?
+  bits<2> AdSizeBits = AdSize.Value;
+
+  Prefix OpPrefix = NoPrfx; // Which prefix byte does this inst have?
+  bits<3> OpPrefixBits = OpPrefix.Value;
+  Map OpMap = OB;           // Which opcode map does this inst have?
+  bits<3> OpMapBits = OpMap.Value;
+  bit hasREX_WPrefix  = 0;  // Does this inst require the REX.W prefix?
+  FPFormat FPForm = NotFP;  // What flavor of FP instruction is this?
+  bit hasLockPrefix = 0;    // Does this inst have a 0xF0 prefix?
+  Domain ExeDomain = d;
+  bit hasREPPrefix = 0;     // Does this inst have a REP prefix?
+  Encoding OpEnc = EncNormal; // Encoding used by this instruction
+  bits<2> OpEncBits = OpEnc.Value;
+  bits<2> VEX_WPrefix = 0;  // Does this inst set the VEX_W field?
+  bit hasVEX_4V = 0;        // Does this inst require the VEX.VVVV field?
+  bit hasVEX_L = 0;         // Does this inst use large (256-bit) registers?
+  bit ignoresVEX_L = 0;     // Does this instruction ignore the L-bit
+  bit hasEVEX_K = 0;        // Does this inst require masking?
+  bit hasEVEX_Z = 0;        // Does this inst set the EVEX_Z field?
+  bit hasEVEX_L2 = 0;       // Does this inst set the EVEX_L2 field?
+  bit hasEVEX_B = 0;        // Does this inst set the EVEX_B field?
+  bits<3> CD8_Form = 0;     // Compressed disp8 form - vector-width.
+  // Declare it int rather than bits<4> so that all bits are defined when
+  // assigning to bits<7>.
+  int CD8_EltSize = 0;      // Compressed disp8 form - element-size in bytes.
+  bit hasEVEX_RC = 0;       // Explicitly specified rounding control in FP instruction.
+  bit hasNoTrackPrefix = 0; // Does this inst has 0x3E (NoTrack) prefix?
+
+  bits<2> EVEX_LL;
+  let EVEX_LL{0} = hasVEX_L;
+  let EVEX_LL{1} = hasEVEX_L2;
+  // Vector size in bytes.
+  bits<7> VectSize = !shl(16, EVEX_LL);
+
+  // The scaling factor for AVX512's compressed displacement is either
+  //   - the size of a  power-of-two number of elements or
+  //   - the size of a single element for broadcasts or
+  //   - the total vector size divided by a power-of-two number.
+  // Possible values are: 0 (non-AVX512 inst), 1, 2, 4, 8, 16, 32 and 64.
+  bits<7> CD8_Scale = !if (!eq (OpEnc.Value, EncEVEX.Value),
+                           !if (CD8_Form{2},
+                                !shl(CD8_EltSize, CD8_Form{1-0}),
+                                !if (hasEVEX_B,
+                                     CD8_EltSize,
+                                     !srl(VectSize, CD8_Form{1-0}))), 0);
+
+  // Used in the memory folding generation (TableGen backend) to point to an alternative
+  // instruction to replace the current one in case it got picked during generation.
+  string FoldGenRegForm = ?;
+
+  // Used to prevent an explicit EVEX2VEX override for this instruction.
+  string EVEX2VEXOverride = ?;
+
+  bit isMemoryFoldable = 1;     // Is it allowed to memory fold/unfold this instruction?
+  bit notEVEX2VEXConvertible = 0; // Prevent EVEX->VEX conversion.
+
+  // TSFlags layout should be kept in sync with X86BaseInfo.h.
+  let TSFlags{6-0}   = FormBits;
+  let TSFlags{8-7}   = OpSizeBits;
+  let TSFlags{10-9}  = AdSizeBits;
+  // No need for 3rd bit, we don't need to distinguish NoPrfx from PS.
+  let TSFlags{12-11} = OpPrefixBits{1-0};
+  let TSFlags{15-13} = OpMapBits;
+  let TSFlags{16}    = hasREX_WPrefix;
+  let TSFlags{20-17} = ImmT.Value;
+  let TSFlags{23-21} = FPForm.Value;
+  let TSFlags{24}    = hasLockPrefix;
+  let TSFlags{25}    = hasREPPrefix;
+  let TSFlags{27-26} = ExeDomain.Value;
+  let TSFlags{29-28} = OpEncBits;
+  let TSFlags{37-30} = Opcode;
+  // Currently no need for second bit in TSFlags - W Ignore is equivalent to 0.
+  let TSFlags{38}    = VEX_WPrefix{0};
+  let TSFlags{39}    = hasVEX_4V;
+  let TSFlags{40}    = hasVEX_L;
+  let TSFlags{41}    = hasEVEX_K;
+  let TSFlags{42}    = hasEVEX_Z;
+  let TSFlags{43}    = hasEVEX_L2;
+  let TSFlags{44}    = hasEVEX_B;
+  // If we run out of TSFlags bits, it's possible to encode this in 3 bits.
+  let TSFlags{51-45} = CD8_Scale;
+  let TSFlags{52}    = hasEVEX_RC;
+  let TSFlags{53}    = hasNoTrackPrefix;
+}
+
+class PseudoI<dag oops, dag iops, list<dag> pattern>
+  : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
+  let Pattern = pattern;
+}
+
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+        list<dag> pattern, Domain d = GenericDomain>
+  : X86Inst<o, f, NoImm, outs, ins, asm, d> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii8<bits<8> o, Format f, dag outs, dag ins, string asm,
+          list<dag> pattern, Domain d = GenericDomain>
+  : X86Inst<o, f, Imm8, outs, ins, asm, d> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii8Reg<bits<8> o, Format f, dag outs, dag ins, string asm,
+             list<dag> pattern, Domain d = GenericDomain>
+  : X86Inst<o, f, Imm8Reg, outs, ins, asm, d> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+               list<dag> pattern>
+  : X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
+           list<dag> pattern>
+  : X86Inst<o, f, Imm16, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
+           list<dag> pattern>
+  : X86Inst<o, f, Imm32, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii32S<bits<8> o, Format f, dag outs, dag ins, string asm,
+            list<dag> pattern>
+  : X86Inst<o, f, Imm32S, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Ii64<bits<8> o, Format f, dag outs, dag ins, string asm,
+           list<dag> pattern>
+  : X86Inst<o, f, Imm64, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+           list<dag> pattern>
+           : X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+           list<dag> pattern>
+  : X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+// FPStack Instruction Templates:
+// FPI - Floating Point Instruction template.
+class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
+  : I<o, F, outs, ins, asm, []> {}
+
+// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
+class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
+  : PseudoI<outs, ins, pattern> {
+  let FPForm = fp;
+}
+
+// Templates for instructions that use a 16- or 32-bit segmented address as
+//  their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
+//
+//   Iseg16 - 16-bit segment selector, 16-bit offset
+//   Iseg32 - 16-bit segment selector, 32-bit offset
+
+class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : X86Inst<o, f, Imm16, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : X86Inst<o, f, Imm32, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+// SI - SSE 1 & 2 scalar instructions
+class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+         list<dag> pattern, Domain d = GenericDomain>
+      : I<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+                   !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
+                   !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
+                   !if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
+                   !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
+                   [UseSSE1])))));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+                  !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+                  asm));
+}
+
+// SI - SSE 1 & 2 scalar intrinsics - vex form available on AVX512
+class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
+         list<dag> pattern, Domain d = GenericDomain>
+      : I<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+                   !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
+                   !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
+                   !if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
+                   !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
+                   [UseSSE1])))));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+                  !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+                  asm));
+}
+// SIi8 - SSE 1 & 2 scalar instructions - vex form available on AVX512
+class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern> {
+  let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+                   !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
+                   !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
+                   [UseSSE2])));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+                  !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+                  asm));
+}
+
+// PI - SSE 1 & 2 packed instructions
+class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+         Domain d>
+      : I<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+                   !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
+                   !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
+                   [UseSSE1])));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+                  !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+                  asm));
+}
+
+// MMXPI - SSE 1 & 2 packed instructions with MMX operands
+class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+            Domain d>
+      : I<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(!eq(OpPrefix.Value, PD.Value), [HasMMX, HasSSE2],
+                       [HasMMX, HasSSE1]);
+}
+
+// PIi8 - SSE 1 & 2 packed instructions with immediate
+class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern, Domain d>
+      : Ii8<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
+                   !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
+                   !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
+                   [UseSSE1])));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm),
+                  !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm),
+                  asm));
+}
+
+// SSE1 Instruction Templates:
+//
+//   SSI   - SSE1 instructions with XS prefix.
+//   PSI   - SSE1 instructions with PS prefix.
+//   PSIi8 - SSE1 instructions with ImmT == Imm8 and PS prefix.
+//   VSSI  - SSE1 instructions with XS prefix in AVX form.
+//   VPSI  - SSE1 instructions with PS prefix in AVX form, packed single.
+
+class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE1]>;
+class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
+        Requires<[UseSSE1]>;
+class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
+        Requires<[UseSSE1]>;
+class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+        Requires<[HasAVX]>;
+class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, PS,
+        Requires<[HasAVX]>;
+
+// SSE2 Instruction Templates:
+//
+//   SDI    - SSE2 instructions with XD prefix.
+//   SDIi8  - SSE2 instructions with ImmT == Imm8 and XD prefix.
+//   S2SI   - SSE2 instructions with XS prefix.
+//   SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
+//   PDI    - SSE2 instructions with PD prefix, packed double domain.
+//   PDIi8  - SSE2 instructions with ImmT == Imm8 and PD prefix.
+//   VSDI   - SSE2 scalar instructions with XD prefix in AVX form.
+//   VPDI   - SSE2 vector instructions with PD prefix in AVX form,
+//                 packed double domain.
+//   VS2I   - SSE2 scalar instructions with PD prefix in AVX form.
+//   S2I    - SSE2 scalar instructions with PD prefix.
+//   MMXSDIi8  - SSE2 instructions with ImmT == Imm8 and XD prefix as well as
+//               MMX operands.
+//   MMXSSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix as well as
+//               MMX operands.
+
+class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
+class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[UseSSE2]>;
+class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
+class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>;
+class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+        Requires<[UseSSE2]>;
+class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+        Requires<[UseSSE2]>;
+class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
+        Requires<[UseAVX]>;
+class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+        Requires<[HasAVX]>;
+class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
+        PD, Requires<[HasAVX]>;
+class VS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, PD,
+        Requires<[UseAVX]>;
+class S2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, PD, Requires<[UseSSE2]>;
+class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+               list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX, HasSSE2]>;
+class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+                list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX, HasSSE2]>;
+
+// SSE3 Instruction Templates:
+//
+//   S3I   - SSE3 instructions with PD prefixes.
+//   S3SI  - SSE3 instructions with XS prefix.
+//   S3DI  - SSE3 instructions with XD prefix.
+
+class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+        Requires<[UseSSE3]>;
+class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+        Requires<[UseSSE3]>;
+class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+        Requires<[UseSSE3]>;
+
+
+// SSSE3 Instruction Templates:
+//
+//   SS38I - SSSE3 instructions with T8 prefix.
+//   SS3AI - SSSE3 instructions with TA prefix.
+//   MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands.
+//   MMXSS3AI - SSSE3 instructions with TA prefix and MMX operands.
+//
+// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
+// uses the MMX registers. The 64-bit versions are grouped with the MMX
+// classes. They need to be enabled even if AVX is enabled.
+
+class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[UseSSSE3]>;
+class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[UseSSSE3]>;
+class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+               list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PS,
+        Requires<[HasMMX, HasSSSE3]>;
+class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+               list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPS,
+        Requires<[HasMMX, HasSSSE3]>;
+
+// SSE4.1 Instruction Templates:
+//
+//   SS48I - SSE 4.1 instructions with T8 prefix.
+//   SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
+//
+class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[UseSSE41]>;
+class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[UseSSE41]>;
+
+// SSE4.2 Instruction Templates:
+//
+//   SS428I - SSE 4.2 instructions with T8 prefix.
+class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[UseSSE42]>;
+
+//   SS42FI - SSE 4.2 instructions with T8XD prefix.
+// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
+class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;
+
+//   SS42AI = SSE 4.2 instructions with TA prefix
+class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[UseSSE42]>;
+
+// AVX Instruction Templates:
+//   Instructions introduced in AVX (no SSE equivalent forms)
+//
+//   AVX8I - AVX instructions with T8PD prefix.
+//   AVXAIi8 - AVX instructions with TAPD prefix and ImmT = Imm8.
+class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[HasAVX]>;
+class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[HasAVX]>;
+
+// AVX2 Instruction Templates:
+//   Instructions introduced in AVX2 (no SSE equivalent forms)
+//
+//   AVX28I - AVX2 instructions with T8PD prefix.
+//   AVX2AIi8 - AVX2 instructions with TAPD prefix and ImmT = Imm8.
+class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[HasAVX2]>;
+class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[HasAVX2]>;
+
+
+// AVX-512 Instruction Templates:
+//   Instructions introduced in AVX-512 (no SSE equivalent forms)
+//
+//   AVX5128I - AVX-512 instructions with T8PD prefix.
+//   AVX512AIi8 - AVX-512 instructions with TAPD prefix and ImmT = Imm8.
+//   AVX512PDI  - AVX-512 instructions with PD, double packed.
+//   AVX512PSI  - AVX-512 instructions with PS, single packed.
+//   AVX512XS8I - AVX-512 instructions with T8 and XS prefixes.
+//   AVX512XSI  - AVX-512 instructions with XS prefix, generic domain.
+//   AVX512BI   - AVX-512 instructions with PD, int packed domain.
+//   AVX512SI   - AVX-512 scalar instructions with PD prefix.
+
+class AVX5128I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[HasAVX512]>;
+class AVX5128IBase : T8PD {
+  Domain ExeDomain = SSEPackedInt;
+}
+class AVX512XS8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8XS,
+        Requires<[HasAVX512]>;
+class AVX512XSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, XS,
+        Requires<[HasAVX512]>;
+class AVX512XDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, XD,
+        Requires<[HasAVX512]>;
+class AVX512BI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
+        Requires<[HasAVX512]>;
+class AVX512BIBase : PD {
+  Domain ExeDomain = SSEPackedInt;
+}
+class AVX512BIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, PD,
+        Requires<[HasAVX512]>;
+class AVX512BIi8Base : PD {
+  Domain ExeDomain = SSEPackedInt;
+  ImmType ImmT = Imm8;
+}
+class AVX512XSIi8Base : XS {
+  Domain ExeDomain = SSEPackedInt;
+  ImmType ImmT = Imm8;
+}
+class AVX512XDIi8Base : XD {
+  Domain ExeDomain = SSEPackedInt;
+  ImmType ImmT = Imm8;
+}
+class AVX512PSIi8Base : PS {
+  Domain ExeDomain = SSEPackedSingle;
+  ImmType ImmT = Imm8;
+}
+class AVX512PDIi8Base : PD {
+  Domain ExeDomain = SSEPackedDouble;
+  ImmType ImmT = Imm8;
+}
+class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[HasAVX512]>;
+class AVX512AIi8Base : TAPD {
+  ImmType ImmT = Imm8;
+}
+class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>,
+        Requires<[HasAVX512]>;
+class AVX512PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, PD,
+        Requires<[HasAVX512]>;
+class AVX512PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, PS,
+        Requires<[HasAVX512]>;
+class AVX512PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern, Domain d>
+      : Ii8<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
+class AVX512PI<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern, Domain d>
+      : I<o, F, outs, ins, asm, pattern, d>, Requires<[HasAVX512]>;
+class AVX512FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern>, T8PD,
+        EVEX_4V, Requires<[HasAVX512]>;
+class AVX512FMA3Base : T8PD, EVEX_4V;
+
+class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern>, Requires<[HasAVX512]>;
+
+// AES Instruction Templates:
+//
+// AES8I
+// These use the same encoding as the SSE4.2 T8 and TA encodings.
+class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
+        Requires<[NoAVX, HasAES]>;
+
+class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        Requires<[NoAVX, HasAES]>;
+
+// PCLMUL Instruction Templates
+class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+               list<dag>pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD;
+
+// FMA3 Instruction Templates
+class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern>, T8PD,
+        VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoVLX]>;
+class FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern>, T8PD,
+        VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoAVX512]>;
+class FMA3S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
+                list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern>, T8PD,
+        VEX_4V, FMASC, Requires<[HasFMA, NoAVX512]>;
+
+// FMA4 Instruction Templates
+class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
+        VEX_4V, FMASC, Requires<[HasFMA4, NoVLX]>;
+class FMA4S<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag>pattern>
+      : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
+        VEX_4V, FMASC, Requires<[HasFMA4, NoAVX512]>;
+class FMA4S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
+                list<dag>pattern>
+      : Ii8Reg<o, F, outs, ins, asm, pattern>, TAPD,
+        VEX_4V, FMASC, Requires<[HasFMA4]>;
+
+// XOP 2, 3 and 4 Operand Instruction Template
+class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+         XOP9, Requires<[HasXOP]>;
+
+// XOP 2 and 3 Operand Instruction Templates with imm byte
+class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+         XOP8, Requires<[HasXOP]>;
+// XOP 4 Operand Instruction Templates with imm byte
+class IXOPi8Reg<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+         XOP8, Requires<[HasXOP]>;
+
+//  XOP 5 operand instruction (VEX encoding!)
+class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : Ii8Reg<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
+        VEX_4V, Requires<[HasXOP]>;
+
+// X86-64 Instruction templates...
+//
+
+class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
+         list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi16 <bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii16<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii32<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi32S <bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii32S<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi64<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii64<o, F, outs, ins, asm, pattern>, REX_W;
+
+class RS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : S2I<o, F, outs, ins, asm, pattern>, REX_W;
+class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : VS2I<o, F, outs, ins, asm, pattern>, VEX_W;
+
+// MMX Instruction templates
+//
+
+// MMXI   - MMX instructions with TB prefix.
+// MMXI32 - MMX instructions with TB prefix valid only in 32 bit mode.
+// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
+// MMX2I  - MMX / SSE2 instructions with PD prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
+// MMXID  - MMX instructions with XD prefix.
+// MMXIS  - MMX instructions with XS prefix.
+class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
+class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,Not64BitMode]>;
+class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX,In64BitMode]>;
+class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, PS, REX_W, Requires<[HasMMX]>;
+class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, PD, Requires<[HasMMX]>;
+class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, PS, Requires<[HasMMX]>;
+class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
+class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFragmentsSIMD.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrFragmentsSIMD.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrInfo.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrInfo.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrInfo_reduce.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrInfo_reduce.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrMMX.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrMMX.td
@@ -0,0 +1,612 @@
+//===-- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MMX instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+// All instructions that use MMX should be in this file, even if they also use
+// SSE.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Multiclasses
+//===----------------------------------------------------------------------===//
+
+// Alias instruction that maps zero vector to pxor mmx.
+// This is expanded by ExpandPostRAPseudos to an pxor.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isPseudo = 1, SchedRW = [WriteZero] in {
+def MMX_SET0 : I<0, Pseudo, (outs VR64:$dst), (ins), "", []>;
+}
+
+let Constraints = "$src1 = $dst" in {
+  // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+  // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
+  multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+                               X86FoldableSchedWrite sched, bit Commutable = 0,
+                               X86MemOperand OType = i64mem> {
+    def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+                 (ins VR64:$src1, VR64:$src2),
+                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                 [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>,
+              Sched<[sched]> {
+      let isCommutable = Commutable;
+    }
+    def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+                 (ins VR64:$src1, OType:$src2),
+                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                 [(set VR64:$dst, (IntId VR64:$src1,
+                                   (bitconvert (load_mmx addr:$src2))))]>,
+                 Sched<[sched.Folded, ReadAfterLd]>;
+  }
+
+  multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+                                string OpcodeStr, Intrinsic IntId,
+                                Intrinsic IntId2, X86FoldableSchedWrite sched,
+                                X86FoldableSchedWrite schedImm> {
+    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+                                  (ins VR64:$src1, VR64:$src2),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                  [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>,
+             Sched<[sched]>;
+    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+                                  (ins VR64:$src1, i64mem:$src2),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                  [(set VR64:$dst, (IntId VR64:$src1,
+                                    (bitconvert (load_mmx addr:$src2))))]>,
+                  Sched<[sched.Folded, ReadAfterLd]>;
+    def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
+                                   (ins VR64:$src1, i32u8imm:$src2),
+                    !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           [(set VR64:$dst, (IntId2 VR64:$src1, imm:$src2))]>,
+           Sched<[schedImm]>;
+  }
+}
+
+/// Unary MMX instructions requiring SSSE3.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+                               Intrinsic IntId64, X86FoldableSchedWrite sched> {
+  def rr : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR64:$dst, (IntId64 VR64:$src))]>,
+           Sched<[sched]>;
+
+  def rm : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR64:$dst,
+                   (IntId64 (bitconvert (load_mmx addr:$src))))]>,
+                 Sched<[sched.Folded]>;
+}
+
+/// Binary MMX instructions requiring SSSE3.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+                             Intrinsic IntId64, X86FoldableSchedWrite sched,
+                             bit Commutable = 0> {
+  let isCommutable = Commutable in
+  def rr : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
+       (ins VR64:$src1, VR64:$src2),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>,
+      Sched<[sched]>;
+  def rm : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst),
+       (ins VR64:$src1, i64mem:$src2),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       [(set VR64:$dst,
+         (IntId64 VR64:$src1,
+          (bitconvert (load_mmx addr:$src2))))]>,
+      Sched<[sched.Folded, ReadAfterLd]>;
+}
+}
+
+/// PALIGN MMX instructions (require SSSE3).
+multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
+                           X86FoldableSchedWrite sched> {
+  def rri  : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+      (ins VR64:$src1, VR64:$src2, u8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+      [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
+      Sched<[sched]>;
+  def rmi  : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+      (ins VR64:$src1, i64mem:$src2, u8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+      [(set VR64:$dst, (IntId VR64:$src1,
+                       (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
+      Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+                         string asm, X86FoldableSchedWrite sched, Domain d> {
+  def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                  [(set DstRC:$dst, (Int SrcRC:$src))], d>,
+            Sched<[sched]>;
+  def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                  [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>,
+            Sched<[sched.Folded]>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+                    PatFrag ld_frag, string asm, Domain d> {
+  def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst),
+                  (ins DstRC:$src1, SrcRC:$src2), asm,
+                  [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>,
+                  Sched<[WriteCvtI2PS]>;
+  def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst),
+                  (ins DstRC:$src1, x86memop:$src2), asm,
+                  [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>,
+                  Sched<[WriteCvtI2PS.Folded]>;
+}
+
+//===----------------------------------------------------------------------===//
+// MMX EMMS Instruction
+//===----------------------------------------------------------------------===//
+
+let SchedRW = [WriteEMMS] in
+def MMX_EMMS  : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>;
+
+//===----------------------------------------------------------------------===//
+// MMX Scalar Instructions
+//===----------------------------------------------------------------------===//
+
+// Data Transfer Instructions
+def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+                        "movd\t{$src, $dst|$dst, $src}",
+                        [(set VR64:$dst,
+                         (x86mmx (scalar_to_vector GR32:$src)))]>,
+                        Sched<[WriteVecMoveFromGpr]>;
+def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
+                        "movd\t{$src, $dst|$dst, $src}",
+                        [(set VR64:$dst,
+                        (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>,
+                        Sched<[WriteVecLoad]>;
+
+let Predicates = [HasMMX] in {
+  def : Pat<(x86mmx (MMX_X86movw2d GR32:$src)),
+            (MMX_MOVD64rr GR32:$src)>;
+  def : Pat<(x86mmx (MMX_X86movw2d (i32 0))),
+            (MMX_SET0)>;
+  def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))),
+            (MMX_MOVD64rm addr:$src)>;
+}
+
+let mayStore = 1 in
+def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
+                        "movd\t{$src, $dst|$dst, $src}", []>,
+                   Sched<[WriteVecStore]>;
+
+def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
+                         "movd\t{$src, $dst|$dst, $src}",
+                         [(set GR32:$dst,
+                          (MMX_X86movd2w (x86mmx VR64:$src)))]>,
+                         Sched<[WriteVecMoveToGpr]>, FoldGenData<"MMX_MOVD64rr">;
+
+let isBitcast = 1 in
+def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+                             "movq\t{$src, $dst|$dst, $src}",
+                             [(set VR64:$dst, (bitconvert GR64:$src))]>,
+                             Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst),
+                             (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}",
+                             []>, Sched<[SchedWriteVecMoveLS.MMX.RM]>;
+
+let isBitcast = 1 in {
+def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
+                               (outs GR64:$dst), (ins VR64:$src),
+                               "movq\t{$src, $dst|$dst, $src}",
+                               [(set GR64:$dst, (bitconvert VR64:$src))]>,
+                               Sched<[WriteVecMoveToGpr]>;
+let SchedRW = [WriteVecMove], hasSideEffects = 0, isMoveReg = 1 in {
+def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+                        "movq\t{$src, $dst|$dst, $src}", []>;
+let isCodeGenOnly = 1, ForceDisassemble = 1 in
+def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src),
+                            "movq\t{$src, $dst|$dst, $src}", []>,
+                            FoldGenData<"MMX_MOVQ64rr">;
+} // SchedRW, hasSideEffects, isMoveReg
+} // isBitcast
+
+def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
+                (MMX_MOVQ64rr_REV VR64:$dst, VR64:$src), 0>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+def MMX_MOVD64from64rm : MMXRI<0x7E, MRMDestMem,
+                               (outs), (ins i64mem:$dst, VR64:$src),
+                               "movq\t{$src, $dst|$dst, $src}", []>,
+                               Sched<[SchedWriteVecMoveLS.MMX.MR]>;
+
+let SchedRW = [SchedWriteVecMoveLS.MMX.RM] in {
+let canFoldAsLoad = 1 in
+def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                        [(set VR64:$dst, (load_mmx addr:$src))]>;
+} // SchedRW
+
+let SchedRW = [SchedWriteVecMoveLS.MMX.MR] in
+def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                        [(store (x86mmx VR64:$src), addr:$dst)]>;
+
+let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
+def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+                             (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
+                             [(set VR64:$dst,
+                               (x86mmx (bitconvert
+                               (i64 (extractelt (v2i64 VR128:$src),
+                                     (iPTR 0))))))]>;
+
+def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
+                              (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
+                              [(set VR128:$dst,
+                                (v2i64
+                                  (scalar_to_vector
+                                    (i64 (bitconvert (x86mmx VR64:$src))))))]>;
+
+let isCodeGenOnly = 1, hasSideEffects = 1 in {
+def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
+                               (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
+                               []>;
+
+def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+                              (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
+                              []>;
+}
+} // SchedRW
+
+let Predicates = [HasMMX, HasSSE1] in
+def MMX_MOVNTQmr  : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+                         "movntq\t{$src, $dst|$dst, $src}",
+                         [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>,
+                         Sched<[SchedWriteVecMoveLSNT.MMX.MR]>;
+
+let Predicates = [HasMMX] in {
+  // movd to MMX register zero-extends
+  def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))),
+            (MMX_MOVD64rr GR32:$src)>;
+  def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
+            (MMX_MOVD64rm addr:$src)>;
+}
+
+// Arithmetic Instructions
+defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
+                                     SchedWriteVecALU.MMX>;
+defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w,
+                                     SchedWriteVecALU.MMX>;
+defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d,
+                                     SchedWriteVecALU.MMX>;
+// -- Addition
+defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b,
+                                   SchedWriteVecALU.MMX, 1>;
+defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w,
+                                   SchedWriteVecALU.MMX, 1>;
+defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d,
+                                   SchedWriteVecALU.MMX, 1>;
+let Predicates = [HasMMX, HasSSE2] in
+defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q,
+                                   SchedWriteVecALU.MMX, 1>;
+defm MMX_PADDSB  : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b,
+                                     SchedWriteVecALU.MMX, 1>;
+defm MMX_PADDSW  : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w,
+                                     SchedWriteVecALU.MMX, 1>;
+
+defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b,
+                                   SchedWriteVecALU.MMX, 1>;
+defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w,
+                                   SchedWriteVecALU.MMX, 1>;
+
+defm MMX_PHADDW  : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w,
+                                        SchedWritePHAdd.MMX>;
+defm MMX_PHADDD  : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d,
+                                        SchedWritePHAdd.MMX>;
+defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw,
+                                        SchedWritePHAdd.MMX>;
+
+// -- Subtraction
+defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b,
+                                   SchedWriteVecALU.MMX>;
+defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w,
+                                   SchedWriteVecALU.MMX>;
+defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d,
+                                   SchedWriteVecALU.MMX>;
+let Predicates = [HasMMX, HasSSE2] in
+defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q,
+                                   SchedWriteVecALU.MMX>;
+
+defm MMX_PSUBSB  : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b,
+                                   SchedWriteVecALU.MMX>;
+defm MMX_PSUBSW  : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w,
+                                   SchedWriteVecALU.MMX>;
+
+defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b,
+                                   SchedWriteVecALU.MMX>;
+defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w,
+                                   SchedWriteVecALU.MMX>;
+
+defm MMX_PHSUBW  : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w,
+                                        SchedWritePHAdd.MMX>;
+defm MMX_PHSUBD  : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d,
+                                        SchedWritePHAdd.MMX>;
+defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw,
+                                        SchedWritePHAdd.MMX>;
+
+// -- Multiplication
+defm MMX_PMULLW  : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w,
+                                     SchedWriteVecIMul.MMX, 1>;
+
+defm MMX_PMULHW  : MMXI_binop_rm_int<0xE5, "pmulhw",  int_x86_mmx_pmulh_w,
+                                     SchedWriteVecIMul.MMX, 1>;
+let Predicates = [HasMMX, HasSSE1] in
+defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w,
+                                     SchedWriteVecIMul.MMX, 1>;
+let Predicates = [HasMMX, HasSSE2] in
+defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq,
+                                     SchedWriteVecIMul.MMX, 1>;
+defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
+                                     int_x86_ssse3_pmul_hr_sw,
+                                     SchedWriteVecIMul.MMX, 1>;
+
+// -- Miscellanea
+defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd,
+                                     SchedWriteVecIMul.MMX, 1>;
+
+defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
+                                          int_x86_ssse3_pmadd_ub_sw,
+                                          SchedWriteVecIMul.MMX>;
+let Predicates = [HasMMX, HasSSE1] in {
+defm MMX_PAVGB   : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b,
+                                     SchedWriteVecALU.MMX, 1>;
+defm MMX_PAVGW   : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w,
+                                     SchedWriteVecALU.MMX, 1>;
+
+defm MMX_PMINUB  : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b,
+                                     SchedWriteVecALU.MMX, 1>;
+defm MMX_PMINSW  : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w,
+                                     SchedWriteVecALU.MMX, 1>;
+
+defm MMX_PMAXUB  : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b,
+                                     SchedWriteVecALU.MMX, 1>;
+defm MMX_PMAXSW  : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w,
+                                     SchedWriteVecALU.MMX, 1>;
+
+defm MMX_PSADBW  : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw,
+                                     SchedWritePSADBW.MMX, 1>;
+}
+
+defm MMX_PSIGNB :  SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b,
+                                        SchedWriteVecALU.MMX>;
+defm MMX_PSIGNW :  SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w,
+                                        SchedWriteVecALU.MMX>;
+defm MMX_PSIGND :  SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d,
+                                        SchedWriteVecALU.MMX>;
+let Constraints = "$src1 = $dst" in
+  defm MMX_PALIGNR : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b,
+                                     SchedWriteShuffle.MMX>;
+
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand,
+                                  SchedWriteVecLogic.MMX, 1>;
+defm MMX_POR  : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por,
+                                  SchedWriteVecLogic.MMX, 1>;
+defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor,
+                                  SchedWriteVecLogic.MMX, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn,
+                                   SchedWriteVecLogic.MMX>;
+
+// Shift Instructions
+defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+                                    int_x86_mmx_psrl_w, int_x86_mmx_psrli_w,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+                                    int_x86_mmx_psrl_d, int_x86_mmx_psrli_d,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+                                    int_x86_mmx_psrl_q, int_x86_mmx_psrli_q,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+
+defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+                                    int_x86_mmx_psll_w, int_x86_mmx_pslli_w,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+                                    int_x86_mmx_psll_d, int_x86_mmx_pslli_d,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+                                    int_x86_mmx_psll_q, int_x86_mmx_pslli_q,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+
+defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+                                    int_x86_mmx_psra_w, int_x86_mmx_psrai_w,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+                                    int_x86_mmx_psra_d, int_x86_mmx_psrai_d,
+                                    SchedWriteVecShift.MMX,
+                                    SchedWriteVecShiftImm.MMX>;
+
+// Comparison Instructions
+defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b,
+                                     SchedWriteVecALU.MMX>;
+defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w,
+                                     SchedWriteVecALU.MMX>;
+defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d,
+                                     SchedWriteVecALU.MMX>;
+
+defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b,
+                                     SchedWriteVecALU.MMX>;
+defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w,
+                                     SchedWriteVecALU.MMX>;
+defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d,
+                                     SchedWriteVecALU.MMX>;
+
+// -- Unpack Instructions
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
+                                       int_x86_mmx_punpckhbw,
+                                       SchedWriteShuffle.MMX>;
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
+                                       int_x86_mmx_punpckhwd,
+                                       SchedWriteShuffle.MMX>;
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
+                                       int_x86_mmx_punpckhdq,
+                                       SchedWriteShuffle.MMX>;
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
+                                       int_x86_mmx_punpcklbw,
+                                       SchedWriteShuffle.MMX,
+                                       0, i32mem>;
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
+                                       int_x86_mmx_punpcklwd,
+                                       SchedWriteShuffle.MMX,
+                                       0, i32mem>;
+defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
+                                       int_x86_mmx_punpckldq,
+                                       SchedWriteShuffle.MMX,
+                                       0, i32mem>;
+
+// -- Pack Instructions
+defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb,
+                                      SchedWriteShuffle.MMX>;
+defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw,
+                                      SchedWriteShuffle.MMX>;
+defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb,
+                                      SchedWriteShuffle.MMX>;
+
+// -- Shuffle Instructions
+defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b,
+                                       SchedWriteVarShuffle.MMX>;
+
+def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
+                          (outs VR64:$dst), (ins VR64:$src1, u8imm:$src2),
+                          "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                          [(set VR64:$dst,
+                             (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>,
+                          Sched<[SchedWriteShuffle.MMX]>;
+def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
+                          (outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2),
+                          "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                          [(set VR64:$dst,
+                             (int_x86_sse_pshuf_w (load_mmx addr:$src1),
+                                                   imm:$src2))]>,
+                          Sched<[SchedWriteShuffle.MMX.Folded]>;
+
+// -- Conversion Instructions
+defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+                      f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+                      WriteCvtPS2I, SSEPackedSingle>, PS;
+defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+                      f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+                      WriteCvtPD2I, SSEPackedDouble>, PD;
+defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+                       f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+                       WriteCvtPS2I, SSEPackedSingle>, PS;
+defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+                       f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+                       WriteCvtPD2I, SSEPackedDouble>, PD;
+defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+                         i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+                         WriteCvtI2PD, SSEPackedDouble>, PD;
+let Constraints = "$src1 = $dst" in {
+  defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+                         int_x86_sse_cvtpi2ps,
+                         i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedSingle>, PS;
+}
+
+// Extract / Insert
+let Predicates = [HasMMX, HasSSE1] in
+def MMX_PEXTRWrr: MMXIi8<0xC5, MRMSrcReg,
+                     (outs GR32orGR64:$dst), (ins VR64:$src1, i32u8imm:$src2),
+                     "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1,
+                                             imm:$src2))]>,
+                     Sched<[WriteVecExtract]>;
+let Constraints = "$src1 = $dst" in {
+let Predicates = [HasMMX, HasSSE1] in {
+  def MMX_PINSRWrr : MMXIi8<0xC4, MRMSrcReg,
+                    (outs VR64:$dst),
+                    (ins VR64:$src1, GR32orGR64:$src2, i32u8imm:$src3),
+                    "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+                                      GR32orGR64:$src2, imm:$src3))]>,
+                    Sched<[WriteVecInsert]>;
+
+  def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem,
+                   (outs VR64:$dst),
+                   (ins VR64:$src1, i16mem:$src2, i32u8imm:$src3),
+                   "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                   [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+                                       (i32 (anyext (loadi16 addr:$src2))),
+                                     imm:$src3))]>,
+                   Sched<[WriteVecInsertLd, ReadAfterLd]>;
+}
+}
+
+// Mask creation
+let Predicates = [HasMMX, HasSSE1] in
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
+                          (ins VR64:$src),
+                          "pmovmskb\t{$src, $dst|$dst, $src}",
+                          [(set GR32orGR64:$dst,
+                                (int_x86_mmx_pmovmskb VR64:$src))]>,
+                          Sched<[WriteMMXMOVMSK]>;
+
+// Low word of XMM to MMX.
+def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
+                            [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
+          (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
+          (x86mmx (MMX_MOVQ64rm addr:$src))>;
+
+// Misc.
+let SchedRW = [SchedWriteShuffle.MMX] in {
+let Uses = [EDI], Predicates = [HasMMX, HasSSE1,Not64BitMode] in
+def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+                          "maskmovq\t{$mask, $src|$src, $mask}",
+                          [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
+let Uses = [RDI], Predicates = [HasMMX, HasSSE1,In64BitMode] in
+def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+                           "maskmovq\t{$mask, $src|$src, $mask}",
+                           [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
+}
+
+// 64-bit bit convert.
+let Predicates = [HasMMX, HasSSE2] in {
+def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
+          (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
+          (MMX_MOVFR642Qrr FR64:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+                   (bc_v2i64 (v4i32 (X86cvtp2Int (v4f32 VR128:$src)))))),
+          (MMX_CVTPS2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+                   (bc_v2i64 (v4i32 (X86cvttp2si (v4f32 VR128:$src)))))),
+          (MMX_CVTTPS2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+                   (bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))),
+          (MMX_CVTTPS2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+                   (bc_v2i64 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
+          (MMX_CVTPD2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+                   (bc_v2i64 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
+          (MMX_CVTTPD2PIirr VR128:$src)>;
+}
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrMPX.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrMPX.td
@@ -0,0 +1,80 @@
+//===-- X86InstrMPX.td - MPX Instruction Set ---------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MPX instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: Investigate a better scheduler class once MPX is used inside LLVM.
+let SchedRW = [WriteSystem] in {
+
+multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
+  def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
+              OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
+              Requires<[HasMPX, Not64BitMode]>;
+  def 64rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
+              OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
+              Requires<[HasMPX, In64BitMode]>;
+}
+
+defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
+
+multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
+  def 32rm: I<opc, MRMSrcMem, (outs), (ins  BNDR:$src1, anymem:$src2),
+              OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
+              Requires<[HasMPX, Not64BitMode]>;
+  def 64rm: I<opc, MRMSrcMem, (outs), (ins  BNDR:$src1, anymem:$src2),
+              OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
+              Requires<[HasMPX, In64BitMode]>;
+
+  def 32rr: I<opc, MRMSrcReg, (outs), (ins  BNDR:$src1, GR32:$src2),
+              OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
+              Requires<[HasMPX, Not64BitMode]>;
+  def 64rr: I<opc, MRMSrcReg, (outs), (ins  BNDR:$src1, GR64:$src2),
+              OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
+              Requires<[HasMPX, In64BitMode]>;
+}
+defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS, NotMemoryFoldable;
+defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD, NotMemoryFoldable;
+defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD, NotMemoryFoldable;
+
+def BNDMOVrr   : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
+                  "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
+                  Requires<[HasMPX]>, NotMemoryFoldable;
+let mayLoad = 1 in {
+def BNDMOV32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
+                  "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
+                  Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable;
+def BNDMOV64rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
+                  "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
+                  Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable;
+}
+let isCodeGenOnly = 1, ForceDisassemble = 1 in
+def BNDMOVrr_REV   : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
+                       "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
+                       Requires<[HasMPX]>, NotMemoryFoldable;
+let mayStore = 1 in {
+def BNDMOV32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
+                  "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
+                  Requires<[HasMPX, Not64BitMode]>, NotMemoryFoldable;
+def BNDMOV64mr : I<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src),
+                  "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
+                  Requires<[HasMPX, In64BitMode]>, NotMemoryFoldable;
+
+def BNDSTXmr:      I<0x1B, MRMDestMem, (outs), (ins anymem:$dst, BNDR:$src),
+                    "bndstx\t{$src, $dst|$dst, $src}", []>, PS,
+                    Requires<[HasMPX]>;
+}
+let mayLoad = 1 in
+def BNDLDXrm:      I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
+                    "bndldx\t{$src, $dst|$dst, $src}", []>, PS,
+                    Requires<[HasMPX]>;
+} // SchedRW
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSGX.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSGX.td
@@ -0,0 +1,30 @@
+//===-- X86InstrSGX.td - SGX Instruction Set Extension -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel SGX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SGX instructions
+
+let SchedRW = [WriteSystem], Predicates = [HasSGX] in {
+// ENCLS - Execute an Enclave System Function of Specified Leaf Number
+def ENCLS : I<0x01, MRM_CF, (outs), (ins),
+             "encls", []>, TB;
+
+// ENCLU - Execute an Enclave User Function of Specified Leaf Number
+def ENCLU : I<0x01, MRM_D7, (outs), (ins),
+             "enclu", []>, TB;
+
+// ENCLV - Execute an Enclave VMM Function of Specified Leaf Number
+def ENCLV : I<0x01, MRM_C0, (outs), (ins),
+             "enclv", []>, TB;
+} // SchedRW
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSSE.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSSE.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSVM.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSVM.td
@@ -0,0 +1,63 @@
+//===-- X86InstrSVM.td - SVM Instruction Set Extension -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the AMD SVM instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SVM instructions
+
+let SchedRW = [WriteSystem] in {
+// 0F 01 D9
+def VMMCALL : I<0x01, MRM_D9, (outs), (ins), "vmmcall", []>, TB;
+
+// 0F 01 DC
+def STGI : I<0x01, MRM_DC, (outs), (ins), "stgi", []>, TB;
+
+// 0F 01 DD
+def CLGI : I<0x01, MRM_DD, (outs), (ins), "clgi", []>, TB;
+
+// 0F 01 DE
+let Uses = [EAX] in
+def SKINIT : I<0x01, MRM_DE, (outs), (ins), "skinit\t{%eax|eax}", []>, TB;
+
+// 0F 01 D8
+let Uses = [EAX] in
+def VMRUN32 : I<0x01, MRM_D8, (outs), (ins), "vmrun\t{%eax|eax}", []>, TB,
+                Requires<[Not64BitMode]>;
+let Uses = [RAX] in
+def VMRUN64 : I<0x01, MRM_D8, (outs), (ins), "vmrun\t{%rax|rax}", []>, TB,
+                Requires<[In64BitMode]>;
+
+// 0F 01 DA
+let Uses = [EAX] in
+def VMLOAD32 : I<0x01, MRM_DA, (outs), (ins), "vmload\t{%eax|eax}", []>, TB,
+                 Requires<[Not64BitMode]>;
+let Uses = [RAX] in
+def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins), "vmload\t{%rax|rax}", []>, TB,
+                 Requires<[In64BitMode]>;
+
+// 0F 01 DB
+let Uses = [EAX] in
+def VMSAVE32 : I<0x01, MRM_DB, (outs), (ins), "vmsave\t{%eax|eax}", []>, TB,
+                 Requires<[Not64BitMode]>;
+let Uses = [RAX] in
+def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins), "vmsave\t{%rax|rax}", []>, TB,
+                 Requires<[In64BitMode]>;
+
+// 0F 01 DF
+let Uses = [EAX, ECX] in
+def INVLPGA32 : I<0x01, MRM_DF, (outs), (ins),
+                "invlpga\t{%eax, %ecx|eax, ecx}", []>, TB, Requires<[Not64BitMode]>;
+let Uses = [RAX, ECX] in
+def INVLPGA64 : I<0x01, MRM_DF, (outs), (ins),
+                "invlpga\t{%rax, %ecx|rax, ecx}", []>, TB, Requires<[In64BitMode]>;
+} // SchedRW
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrShiftRotate.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrShiftRotate.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSystem.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrSystem.td
@@ -0,0 +1,743 @@
+//===-- X86InstrSystem.td - System Instructions ------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instructions that are generally used in
+// privileged modes.  These are not typically used by the compiler, but are
+// supported for the assembler and disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+let SchedRW = [WriteSystem] in {
+let Defs = [RAX, RDX] in
+  def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
+
+let Defs = [RAX, RCX, RDX] in
+  def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB;
+
+// CPU flow control instructions
+
+let mayLoad = 1, mayStore = 0, hasSideEffects = 1, isTrap = 1 in {
+  def UD2    : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
+  def UD1    : I<0xB9, RawFrm, (outs), (ins), "ud1", []>, TB;
+  def UD0    : I<0xFF, RawFrm, (outs), (ins), "ud0", []>, TB;
+}
+
+def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
+def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
+
+// Interrupt and SysCall Instructions.
+let Uses = [EFLAGS] in
+  def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>, Requires<[Not64BitMode]>;
+
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>;
+} // SchedRW
+
+// The long form of "int $3" turns into int3 as a size optimization.
+// FIXME: This doesn't work because InstAlias can't match immediate constants.
+//def : InstAlias<"int\t$3", (INT3)>;
+
+let SchedRW = [WriteSystem] in {
+
+def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap",
+              [(int_x86_int imm:$trap)]>;
+
+
+def SYSCALL  : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
+def SYSRET   : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB;
+def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+               Requires<[In64BitMode]>;
+
+def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
+
+def SYSEXIT   : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexitq", []>, TB,
+                  Requires<[In64BitMode]>;
+} // SchedRW
+
+def : Pat<(debugtrap),
+          (INT3)>, Requires<[NotPS4]>;
+def : Pat<(debugtrap),
+          (INT (i8 0x41))>, Requires<[IsPS4]>;
+
+//===----------------------------------------------------------------------===//
+//  Input/Output Instructions.
+//
+let SchedRW = [WriteSystem] in {
+let Defs = [AL], Uses = [DX] in
+def IN8rr  : I<0xEC, RawFrm, (outs), (ins), "in{b}\t{%dx, %al|al, dx}", []>;
+let Defs = [AX], Uses = [DX] in
+def IN16rr : I<0xED, RawFrm, (outs), (ins), "in{w}\t{%dx, %ax|ax, dx}", []>,
+               OpSize16;
+let Defs = [EAX], Uses = [DX] in
+def IN32rr : I<0xED, RawFrm, (outs), (ins), "in{l}\t{%dx, %eax|eax, dx}", []>,
+               OpSize32;
+
+let Defs = [AL] in
+def IN8ri  : Ii8<0xE4, RawFrm, (outs), (ins u8imm:$port),
+                 "in{b}\t{$port, %al|al, $port}", []>;
+let Defs = [AX] in
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
+                 "in{w}\t{$port, %ax|ax, $port}", []>, OpSize16;
+let Defs = [EAX] in
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
+                 "in{l}\t{$port, %eax|eax, $port}", []>, OpSize32;
+
+let Uses = [DX, AL] in
+def OUT8rr  : I<0xEE, RawFrm, (outs), (ins), "out{b}\t{%al, %dx|dx, al}", []>;
+let Uses = [DX, AX] in
+def OUT16rr : I<0xEF, RawFrm, (outs), (ins), "out{w}\t{%ax, %dx|dx, ax}", []>,
+                OpSize16;
+let Uses = [DX, EAX] in
+def OUT32rr : I<0xEF, RawFrm, (outs), (ins), "out{l}\t{%eax, %dx|dx, eax}", []>,
+                OpSize32;
+
+let Uses = [AL] in
+def OUT8ir  : Ii8<0xE6, RawFrm, (outs), (ins u8imm:$port),
+                   "out{b}\t{%al, $port|$port, al}", []>;
+let Uses = [AX] in
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
+                   "out{w}\t{%ax, $port|$port, ax}", []>, OpSize16;
+let Uses = [EAX] in
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
+                  "out{l}\t{%eax, $port|$port, eax}", []>, OpSize32;
+
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Moves to and from debug registers
+
+let SchedRW = [WriteSystem] in {
+def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[Not64BitMode]>;
+def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[In64BitMode]>;
+
+def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[Not64BitMode]>;
+def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[In64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Moves to and from control registers
+
+let SchedRW = [WriteSystem] in {
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[Not64BitMode]>;
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[In64BitMode]>;
+
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[Not64BitMode]>;
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB,
+                Requires<[In64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Segment override instruction prefixes
+
+//let SchedRW = [WriteNop] in {
+//def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
+//def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
+//def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
+//def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
+//def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
+//def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
+//} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Moves to and from segment registers.
+//
+
+let SchedRW = [WriteMove] in {
+def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
+def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+let mayStore = 1 in {
+def MOV16ms : I<0x8C, MRMDestMem, (outs), (ins i16mem:$dst, SEGMENT_REG:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>;
+}
+def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
+def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+let mayLoad = 1 in {
+def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>;
+}
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Segmentation support instructions.
+
+let SchedRW = [WriteSystem] in {
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
+
+let mayLoad = 1 in
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize16, NotMemoryFoldable;
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize16, NotMemoryFoldable;
+
+// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+let mayLoad = 1 in
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize32, NotMemoryFoldable;
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize32, NotMemoryFoldable;
+// i16mem operand in LAR64rm and GR32 operand in LAR64rr is not a typo.
+let mayLoad = 1 in
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
+
+// i16mem operand in LSL32rm and GR32 operand in LSL32rr is not a typo.
+let mayLoad = 1 in
+def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize16, NotMemoryFoldable;
+def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize16, NotMemoryFoldable;
+// i16mem operand in LSL64rm and GR32 operand in LSL64rr is not a typo.
+let mayLoad = 1 in
+def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize32, NotMemoryFoldable;
+def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
+                OpSize32, NotMemoryFoldable;
+let mayLoad = 1 in
+def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
+def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
+
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
+
+def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+               "str{w}\t$dst", []>, TB, OpSize16;
+def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
+               "str{l}\t$dst", []>, TB, OpSize32;
+def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
+                "str{q}\t$dst", []>, TB;
+let mayStore = 1 in
+def STRm   : I<0x00, MRM1m, (outs), (ins i16mem:$dst), "str{w}\t$dst", []>, TB;
+
+def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), "ltr{w}\t$src", []>, TB, NotMemoryFoldable;
+let mayLoad = 1 in
+def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), "ltr{w}\t$src", []>, TB, NotMemoryFoldable;
+
+def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), "push{w}\t{%cs|cs}", []>,
+                 OpSize16, Requires<[Not64BitMode]>;
+def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins), "push{l}\t{%cs|cs}", []>,
+                 OpSize32, Requires<[Not64BitMode]>;
+def PUSHSS16 : I<0x16, RawFrm, (outs), (ins), "push{w}\t{%ss|ss}", []>,
+                 OpSize16, Requires<[Not64BitMode]>;
+def PUSHSS32 : I<0x16, RawFrm, (outs), (ins), "push{l}\t{%ss|ss}", []>,
+                 OpSize32, Requires<[Not64BitMode]>;
+def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins), "push{w}\t{%ds|ds}", []>,
+                 OpSize16, Requires<[Not64BitMode]>;
+def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins), "push{l}\t{%ds|ds}", []>,
+                 OpSize32, Requires<[Not64BitMode]>;
+def PUSHES16 : I<0x06, RawFrm, (outs), (ins), "push{w}\t{%es|es}", []>,
+                 OpSize16, Requires<[Not64BitMode]>;
+def PUSHES32 : I<0x06, RawFrm, (outs), (ins), "push{l}\t{%es|es}", []>,
+                 OpSize32, Requires<[Not64BitMode]>;
+def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins), "push{w}\t{%fs|fs}", []>,
+                 OpSize16, TB;
+def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins), "push{l}\t{%fs|fs}", []>, TB,
+                 OpSize32, Requires<[Not64BitMode]>;
+def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins), "push{w}\t{%gs|gs}", []>,
+                 OpSize16, TB;
+def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins), "push{l}\t{%gs|gs}", []>, TB,
+                 OpSize32, Requires<[Not64BitMode]>;
+def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins), "push{q}\t{%fs|fs}", []>, TB,
+                 OpSize32, Requires<[In64BitMode]>;
+def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins), "push{q}\t{%gs|gs}", []>, TB,
+                 OpSize32, Requires<[In64BitMode]>;
+
+// No "pop cs" instruction.
+def POPSS16 : I<0x17, RawFrm, (outs), (ins), "pop{w}\t{%ss|ss}", []>,
+              OpSize16, Requires<[Not64BitMode]>;
+def POPSS32 : I<0x17, RawFrm, (outs), (ins), "pop{l}\t{%ss|ss}", []>,
+              OpSize32, Requires<[Not64BitMode]>;
+
+def POPDS16 : I<0x1F, RawFrm, (outs), (ins), "pop{w}\t{%ds|ds}", []>,
+              OpSize16, Requires<[Not64BitMode]>;
+def POPDS32 : I<0x1F, RawFrm, (outs), (ins), "pop{l}\t{%ds|ds}", []>,
+              OpSize32, Requires<[Not64BitMode]>;
+
+def POPES16 : I<0x07, RawFrm, (outs), (ins), "pop{w}\t{%es|es}", []>,
+              OpSize16, Requires<[Not64BitMode]>;
+def POPES32 : I<0x07, RawFrm, (outs), (ins), "pop{l}\t{%es|es}", []>,
+              OpSize32, Requires<[Not64BitMode]>;
+
+def POPFS16 : I<0xa1, RawFrm, (outs), (ins), "pop{w}\t{%fs|fs}", []>,
+                OpSize16, TB;
+def POPFS32 : I<0xa1, RawFrm, (outs), (ins), "pop{l}\t{%fs|fs}", []>, TB,
+                OpSize32, Requires<[Not64BitMode]>;
+def POPFS64 : I<0xa1, RawFrm, (outs), (ins), "pop{q}\t{%fs|fs}", []>, TB,
+                OpSize32, Requires<[In64BitMode]>;
+
+def POPGS16 : I<0xa9, RawFrm, (outs), (ins), "pop{w}\t{%gs|gs}", []>,
+                OpSize16, TB;
+def POPGS32 : I<0xa9, RawFrm, (outs), (ins), "pop{l}\t{%gs|gs}", []>, TB,
+                OpSize32, Requires<[Not64BitMode]>;
+def POPGS64 : I<0xa9, RawFrm, (outs), (ins), "pop{q}\t{%gs|gs}", []>, TB,
+                OpSize32, Requires<[In64BitMode]>;
+
+def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
+                "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize16,
+                Requires<[Not64BitMode]>;
+def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
+                "lds{l}\t{$src, $dst|$dst, $src}", []>, OpSize32,
+                Requires<[Not64BitMode]>;
+
+def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
+                "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
+def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
+                "lss{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
+def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
+                 "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
+                "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize16,
+                Requires<[Not64BitMode]>;
+def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
+                "les{l}\t{$src, $dst|$dst, $src}", []>, OpSize32,
+                Requires<[Not64BitMode]>;
+
+def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
+                "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
+def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
+                "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
+def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
+                 "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaquemem:$src),
+                "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
+def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
+                "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
+
+def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
+                 "lgs\t{$src, $dst|$dst, $src}", []>, TB;
+
+def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), "verr\t$seg", []>, TB, NotMemoryFoldable;
+def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", []>, TB, NotMemoryFoldable;
+let mayLoad = 1 in {
+def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), "verr\t$seg", []>, TB, NotMemoryFoldable;
+def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", []>, TB, NotMemoryFoldable;
+}
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Descriptor-table support instructions
+
+let SchedRW = [WriteSystem] in {
+def SGDT16m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
+                "sgdt{w}\t$dst", []>, TB, OpSize16, Requires<[Not64BitMode]>;
+def SGDT32m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
+                "sgdt{l}\t$dst", []>, OpSize32, TB, Requires <[Not64BitMode]>;
+def SGDT64m : I<0x01, MRM0m, (outs), (ins opaquemem:$dst),
+                "sgdt{q}\t$dst", []>, TB, Requires <[In64BitMode]>;
+def SIDT16m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
+                "sidt{w}\t$dst", []>, TB, OpSize16, Requires<[Not64BitMode]>;
+def SIDT32m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
+                "sidt{l}\t$dst", []>, OpSize32, TB, Requires <[Not64BitMode]>;
+def SIDT64m : I<0x01, MRM1m, (outs), (ins opaquemem:$dst),
+                "sidt{q}\t$dst", []>, TB, Requires <[In64BitMode]>;
+def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
+                "sldt{w}\t$dst", []>, TB, OpSize16;
+let mayStore = 1 in
+def SLDT16m : I<0x00, MRM0m, (outs), (ins i16mem:$dst),
+                "sldt{w}\t$dst", []>, TB;
+def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
+                "sldt{l}\t$dst", []>, OpSize32, TB;
+
+// LLDT is not interpreted specially in 64-bit mode because there is no sign
+//   extension.
+def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
+                 "sldt{q}\t$dst", []>, TB, Requires<[In64BitMode]>;
+
+def LGDT16m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
+                "lgdt{w}\t$src", []>, TB, OpSize16, Requires<[Not64BitMode]>;
+def LGDT32m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
+                "lgdt{l}\t$src", []>, OpSize32, TB, Requires<[Not64BitMode]>;
+def LGDT64m : I<0x01, MRM2m, (outs), (ins opaquemem:$src),
+                "lgdt{q}\t$src", []>, TB, Requires<[In64BitMode]>;
+def LIDT16m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
+                "lidt{w}\t$src", []>, TB, OpSize16, Requires<[Not64BitMode]>;
+def LIDT32m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
+                "lidt{l}\t$src", []>, OpSize32, TB, Requires<[Not64BitMode]>;
+def LIDT64m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
+                "lidt{q}\t$src", []>, TB, Requires<[In64BitMode]>;
+def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
+                "lldt{w}\t$src", []>, TB, NotMemoryFoldable;
+let mayLoad = 1 in
+def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
+                "lldt{w}\t$src", []>, TB, NotMemoryFoldable;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Specialized register support
+let SchedRW = [WriteSystem] in {
+let Uses = [EAX, ECX, EDX] in
+def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
+let Defs = [EAX, EDX], Uses = [ECX] in
+def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
+
+let Defs = [RAX, RDX], Uses = [ECX] in
+  def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)]>, TB;
+
+def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
+                "smsw{w}\t$dst", []>, OpSize16, TB;
+def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
+                "smsw{l}\t$dst", []>, OpSize32, TB;
+// no m form encodable; use SMSW16m
+def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
+                 "smsw{q}\t$dst", []>, TB;
+
+// For memory operands, there is only a 16-bit form
+def SMSW16m : I<0x01, MRM4m, (outs), (ins i16mem:$dst),
+                "smsw{w}\t$dst", []>, TB;
+
+def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
+                "lmsw{w}\t$src", []>, TB, NotMemoryFoldable;
+let mayLoad = 1 in
+def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
+                "lmsw{w}\t$src", []>, TB, NotMemoryFoldable;
+
+let Defs = [EAX, EBX, ECX, EDX], Uses = [EAX, ECX] in
+  def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Cache instructions
+let SchedRW = [WriteSystem] in {
+def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
+def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [(int_x86_wbinvd)]>, TB;
+
+// wbnoinvd is like wbinvd, except without invalidation
+// encoding: like wbinvd + an 0xF3 prefix
+def WBNOINVD : I<0x09, RawFrm, (outs), (ins), "wbnoinvd",
+                 [(int_x86_wbnoinvd)]>, XS,
+                 Requires<[HasWBNOINVD]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// CET instructions
+// Use with caution, availability is not predicated on features.
+let SchedRW = [WriteSystem] in {
+  let Uses = [SSP] in {
+    let Defs = [SSP] in {
+      def INCSSPD : I<0xAE, MRM5r, (outs), (ins GR32:$src), "incsspd\t$src",
+                       [(int_x86_incsspd GR32:$src)]>, XS;
+      def INCSSPQ : RI<0xAE, MRM5r, (outs), (ins GR64:$src), "incsspq\t$src",
+                       [(int_x86_incsspq GR64:$src)]>, XS;
+    } // Defs SSP
+
+    let Constraints = "$src = $dst" in {
+      def RDSSPD : I<0x1E, MRM1r, (outs GR32:$dst), (ins GR32:$src),
+                     "rdsspd\t$dst",
+                     [(set GR32:$dst, (int_x86_rdsspd GR32:$src))]>, XS;
+      def RDSSPQ : RI<0x1E, MRM1r, (outs GR64:$dst), (ins GR64:$src),
+                     "rdsspq\t$dst",
+                     [(set GR64:$dst, (int_x86_rdsspq GR64:$src))]>, XS;
+    }
+
+    let Defs = [SSP] in {
+      def SAVEPREVSSP : I<0x01, MRM_EA, (outs), (ins), "saveprevssp",
+                       [(int_x86_saveprevssp)]>, XS;
+      def RSTORSSP : I<0x01, MRM5m, (outs), (ins i32mem:$src),
+                       "rstorssp\t$src",
+                       [(int_x86_rstorssp addr:$src)]>, XS;
+    } // Defs SSP
+  } // Uses SSP
+
+  def WRSSD : I<0xF6, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                "wrssd\t{$src, $dst|$dst, $src}",
+                [(int_x86_wrssd GR32:$src, addr:$dst)]>, T8PS;
+  def WRSSQ : RI<0xF6, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                 "wrssq\t{$src, $dst|$dst, $src}",
+                 [(int_x86_wrssq GR64:$src, addr:$dst)]>, T8PS;
+  def WRUSSD : I<0xF5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                 "wrussd\t{$src, $dst|$dst, $src}",
+                 [(int_x86_wrussd GR32:$src, addr:$dst)]>, T8PD;
+  def WRUSSQ : RI<0xF5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                  "wrussq\t{$src, $dst|$dst, $src}",
+                  [(int_x86_wrussq GR64:$src, addr:$dst)]>, T8PD;
+
+  let Defs = [SSP] in {
+    let Uses = [SSP] in {
+        def SETSSBSY : I<0x01, MRM_E8, (outs), (ins), "setssbsy",
+                         [(int_x86_setssbsy)]>, XS;
+    } // Uses SSP
+
+    def CLRSSBSY : I<0xAE, MRM6m, (outs), (ins i32mem:$src),
+                     "clrssbsy\t$src",
+                     [(int_x86_clrssbsy addr:$src)]>, XS;
+  } // Defs SSP
+} // SchedRW
+
+let SchedRW = [WriteSystem] in {
+    def ENDBR64 : I<0x1E, MRM_FA, (outs), (ins), "endbr64", []>, XS;
+    def ENDBR32 : I<0x1E, MRM_FB, (outs), (ins), "endbr32", []>, XS;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// XSAVE instructions
+let SchedRW = [WriteSystem] in {
+let Predicates = [HasXSAVE] in {
+let Defs = [EDX, EAX], Uses = [ECX] in
+  def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
+
+let Uses = [EDX, EAX, ECX] in
+  def XSETBV : I<0x01, MRM_D1, (outs), (ins),
+                "xsetbv",
+                [(int_x86_xsetbv ECX, EDX, EAX)]>, TB;
+
+} // HasXSAVE
+
+let Uses = [EDX, EAX] in {
+def XSAVE : I<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
+              "xsave\t$dst",
+              [(int_x86_xsave addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>;
+def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
+                 "xsave64\t$dst",
+                 [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
+def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaquemem:$dst),
+               "xrstor\t$dst",
+               [(int_x86_xrstor addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE]>;
+def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaquemem:$dst),
+                  "xrstor64\t$dst",
+                  [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVE, In64BitMode]>;
+def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaquemem:$dst),
+                 "xsaveopt\t$dst",
+                 [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT]>;
+def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaquemem:$dst),
+                    "xsaveopt64\t$dst",
+                    [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, PS, Requires<[HasXSAVEOPT, In64BitMode]>;
+def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
+               "xsavec\t$dst",
+               [(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC]>;
+def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaquemem:$dst),
+                 "xsavec64\t$dst",
+                 [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVEC, In64BitMode]>;
+def XSAVES : I<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
+               "xsaves\t$dst",
+               [(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
+def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaquemem:$dst),
+                  "xsaves64\t$dst",
+                  [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVE, In64BitMode]>;
+def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
+                "xrstors\t$dst",
+                [(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES]>;
+def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
+                   "xrstors64\t$dst",
+                   [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[HasXSAVES, In64BitMode]>;
+} // Uses
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// VIA PadLock crypto instructions
+let Defs = [RAX, RDI], Uses = [RDX, RDI], SchedRW = [WriteSystem] in
+  def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB;
+
+def : InstAlias<"xstorerng", (XSTORE)>;
+
+let SchedRW = [WriteSystem] in {
+let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
+  def XCRYPTECB : I<0xa7, MRM_C8, (outs), (ins), "xcryptecb", []>, TB;
+  def XCRYPTCBC : I<0xa7, MRM_D0, (outs), (ins), "xcryptcbc", []>, TB;
+  def XCRYPTCTR : I<0xa7, MRM_D8, (outs), (ins), "xcryptctr", []>, TB;
+  def XCRYPTCFB : I<0xa7, MRM_E0, (outs), (ins), "xcryptcfb", []>, TB;
+  def XCRYPTOFB : I<0xa7, MRM_E8, (outs), (ins), "xcryptofb", []>, TB;
+}
+
+let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
+  def XSHA1 : I<0xa6, MRM_C8, (outs), (ins), "xsha1", []>, TB;
+  def XSHA256 : I<0xa6, MRM_D0, (outs), (ins), "xsha256", []>, TB;
+}
+let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
+  def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB;
+} // SchedRW
+
+/*
+//==-----------------------------------------------------------------------===//
+// PKU  - enable protection key
+let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+  def WRPKRU : PseudoI<(outs), (ins GR32:$src),
+                [(int_x86_wrpkru GR32:$src)]>;
+  def RDPKRU : PseudoI<(outs GR32:$dst), (ins),
+                [(set GR32:$dst, (int_x86_rdpkru))]>;
+}
+*/
+
+let SchedRW = [WriteSystem] in {
+let Defs = [EAX, EDX], Uses = [ECX] in
+  def RDPKRUr : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB;
+let Uses = [EAX, ECX, EDX] in
+  def WRPKRUr : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// FS/GS Base Instructions
+let Predicates = [HasFSGSBase, In64BitMode], SchedRW = [WriteSystem] in {
+  def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins),
+                   "rdfsbase{l}\t$dst",
+                   [(set GR32:$dst, (int_x86_rdfsbase_32))]>, XS;
+  def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins),
+                     "rdfsbase{q}\t$dst",
+                     [(set GR64:$dst, (int_x86_rdfsbase_64))]>, XS;
+  def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins),
+                   "rdgsbase{l}\t$dst",
+                   [(set GR32:$dst, (int_x86_rdgsbase_32))]>, XS;
+  def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins),
+                     "rdgsbase{q}\t$dst",
+                     [(set GR64:$dst, (int_x86_rdgsbase_64))]>, XS;
+  def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$src),
+                   "wrfsbase{l}\t$src",
+                   [(int_x86_wrfsbase_32 GR32:$src)]>, XS;
+  def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$src),
+                      "wrfsbase{q}\t$src",
+                      [(int_x86_wrfsbase_64 GR64:$src)]>, XS;
+  def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$src),
+                   "wrgsbase{l}\t$src",
+                   [(int_x86_wrgsbase_32 GR32:$src)]>, XS;
+  def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$src),
+                      "wrgsbase{q}\t$src",
+                      [(int_x86_wrgsbase_64 GR64:$src)]>, XS;
+}
+
+//===----------------------------------------------------------------------===//
+// INVPCID Instruction
+let SchedRW = [WriteSystem] in {
+def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+                  "invpcid\t{$src2, $src1|$src1, $src2}",
+                  [(int_x86_invpcid GR32:$src1, addr:$src2)]>, T8PD,
+                  Requires<[Not64BitMode, HasINVPCID]>;
+def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+                  "invpcid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+                  Requires<[In64BitMode, HasINVPCID]>;
+} // SchedRW
+
+let Predicates = [In64BitMode, HasINVPCID] in {
+  // The instruction can only use a 64 bit register as the register argument
+  // in 64 bit mode, while the intrinsic only accepts a 32 bit argument
+  // corresponding to it.
+  // The accepted values for now are 0,1,2,3 anyways (see Intel SDM -- INVCPID
+  // type),/ so it doesn't hurt us that one can't supply a 64 bit value here.
+  def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2),
+            (INVPCID64
+              (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit),
+              addr:$src2)>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SMAP Instruction
+let Defs = [EFLAGS], SchedRW = [WriteSystem] in {
+  def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
+  def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
+}
+
+//===----------------------------------------------------------------------===//
+// SMX Instruction
+let SchedRW = [WriteSystem] in {
+let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in {
+  def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, TB;
+} // Uses, Defs
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// TS flag control instruction.
+let SchedRW = [WriteSystem] in {
+def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
+}
+
+//===----------------------------------------------------------------------===//
+// IF (inside EFLAGS) management instructions.
+let SchedRW = [WriteSystem], Uses = [EFLAGS], Defs = [EFLAGS] in {
+def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
+def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// RDPID Instruction
+let SchedRW = [WriteSystem] in {
+def RDPID32 : I<0xC7, MRM7r, (outs GR32:$dst), (ins),
+                "rdpid\t$dst", [(set GR32:$dst, (int_x86_rdpid))]>, XS,
+                Requires<[Not64BitMode, HasRDPID]>;
+def RDPID64 : I<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdpid\t$dst", []>, XS,
+                Requires<[In64BitMode, HasRDPID]>;
+} // SchedRW
+
+let Predicates = [In64BitMode, HasRDPID] in {
+  // Due to silly instruction definition, we have to compensate for the
+  // instruction outputting a 64-bit register.
+  def : Pat<(int_x86_rdpid),
+            (EXTRACT_SUBREG (RDPID64), sub_32bit)>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PTWRITE Instruction - Write Data to a Processor Trace Packet
+let SchedRW = [WriteSystem] in {
+def PTWRITEm: I<0xAE, MRM4m, (outs), (ins i32mem:$dst),
+                "ptwrite{l}\t$dst", [(int_x86_ptwrite32 (loadi32 addr:$dst))]>, XS,
+                Requires<[HasPTWRITE]>;
+def PTWRITE64m : RI<0xAE, MRM4m, (outs), (ins i64mem:$dst),
+                    "ptwrite{q}\t$dst", [(int_x86_ptwrite64 (loadi64 addr:$dst))]>, XS,
+                    Requires<[In64BitMode, HasPTWRITE]>;
+
+def PTWRITEr : I<0xAE, MRM4r, (outs), (ins GR32:$dst),
+                 "ptwrite{l}\t$dst", [(int_x86_ptwrite32 GR32:$dst)]>, XS,
+                    Requires<[HasPTWRITE]>;
+def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst),
+                    "ptwrite{q}\t$dst", [(int_x86_ptwrite64 GR64:$dst)]>, XS,
+                    Requires<[In64BitMode, HasPTWRITE]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Platform Configuration instruction
+
+// From ISA docs:
+//  "This instruction is used to execute functions for configuring platform
+//   features.
+//   EAX: Leaf function to be invoked.
+//   RBX/RCX/RDX: Leaf-specific purpose."
+//  "Successful execution of the leaf clears RAX (set to zero) and ZF, CF, PF,
+//   AF, OF, and SF are cleared. In case of failure, the failure reason is
+//   indicated in RAX with ZF set to 1 and CF, PF, AF, OF, and SF are cleared."
+// Thus all these mentioned registers are considered clobbered.
+
+let SchedRW = [WriteSystem] in {
+let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX, RDX, EFLAGS] in
+    def PCONFIG : I<0x01, MRM_C5, (outs), (ins), "pconfig", []>, TB,
+                  Requires<[HasPCONFIG]>;
+} // SchedRW
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrTSX.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrTSX.td
@@ -0,0 +1,60 @@
+//===-- X86InstrVMX.td - TSX Instruction Set Extension -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel TSX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TSX instructions
+
+def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>,
+                     [SDNPHasChain, SDNPSideEffect]>;
+
+let SchedRW = [WriteSystem] in {
+
+//let usesCustomInserter = 1 in
+//def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
+//               "# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>,
+//             Requires<[HasRTM]>;
+
+let isBranch = 1, isTerminator = 1, Defs = [EAX] in {
+def XBEGIN_2 : Ii16PCRel<0xc7, MRM_F8, (outs), (ins brtarget16:$dst),
+                         "xbegin\t$dst", []>, OpSize16;
+def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget32:$dst),
+                         "xbegin\t$dst", []>, OpSize32;
+}
+
+// Pseudo instruction to fake the definition of EAX on the fallback code path.
+//let isPseudo = 1, Defs = [EAX] in {
+//def XABORT_DEF : I<0, Pseudo, (outs), (ins), "# XABORT DEF", []>;
+//}
+
+def XEND : I<0x01, MRM_D5, (outs), (ins),
+             "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>;
+
+let Defs = [EFLAGS] in
+def XTEST : I<0x01, MRM_D6, (outs), (ins),
+              "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasRTM]>;
+
+def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
+                 "xabort\t$imm",
+                 [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
+} // SchedRW
+
+// HLE prefixes
+let SchedRW = [WriteSystem] in {
+
+let isAsmParserOnly = 1 in {
+def XACQUIRE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "xacquire", []>;
+def XRELEASE_PREFIX : I<0xF3, RawFrm, (outs), (ins), "xrelease", []>;
+}
+
+} // SchedRW
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrVMX.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrVMX.td
@@ -0,0 +1,88 @@
+//===-- X86InstrVMX.td - VMX Instruction Set Extension -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel VMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VMX instructions
+
+let SchedRW = [WriteSystem] in {
+// 66 0F 38 80
+def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+               "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+               Requires<[Not64BitMode]>;
+def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+               "invept\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+               Requires<[In64BitMode]>;
+
+// 66 0F 38 81
+def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+                "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+                Requires<[Not64BitMode]>;
+def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+                "invvpid\t{$src2, $src1|$src1, $src2}", []>, T8PD,
+                Requires<[In64BitMode]>;
+
+// 0F 01 C1
+def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
+def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+  "vmclear\t$vmcs", []>, PD;
+
+// OF 01 D4
+def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB;
+
+// 0F 01 C2
+def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
+
+// 0F 01 C3
+def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
+def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+  "vmptrld\t$vmcs", []>, PS;
+def VMPTRSTm : I<0xC7, MRM7m, (outs), (ins i64mem:$vmcs),
+  "vmptrst\t$vmcs", []>, PS;
+def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+  "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
+  NotMemoryFoldable;
+def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+  "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
+  NotMemoryFoldable;
+
+let mayStore = 1 in {
+def VMREAD64mr : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+  "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
+  NotMemoryFoldable;
+def VMREAD32mr : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+  "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
+  NotMemoryFoldable;
+} // mayStore
+
+def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
+  NotMemoryFoldable;
+def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
+  NotMemoryFoldable;
+
+let mayLoad = 1 in {
+def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
+  NotMemoryFoldable;
+def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
+  NotMemoryFoldable;
+} // mayLoad
+
+// 0F 01 C4
+def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
+def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
+  "vmxon\t$vmxon", []>, XS;
+} // SchedRW
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrVecCompiler.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrVecCompiler.td
@@ -0,0 +1,511 @@
+//===- X86InstrVecCompiler.td - Vector Compiler Patterns ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various vector pseudo instructions used by the
+// compiler, as well as Pat patterns used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// No op bitconverts
+//===----------------------------------------------------------------------===//
+
+// Bitcasts between 128-bit vector types. Return the original type since
+// no instruction is needed for the conversion
+def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+
+// Bitcasts between 256-bit vector types. Return the original type since
+// no instruction is needed for the conversion
+def : Pat<(v4i64  (bitconvert (v8i32  VR256:$src))), (v4i64  VR256:$src)>;
+def : Pat<(v4i64  (bitconvert (v16i16 VR256:$src))), (v4i64  VR256:$src)>;
+def : Pat<(v4i64  (bitconvert (v32i8  VR256:$src))), (v4i64  VR256:$src)>;
+def : Pat<(v4i64  (bitconvert (v8f32  VR256:$src))), (v4i64  VR256:$src)>;
+def : Pat<(v4i64  (bitconvert (v4f64  VR256:$src))), (v4i64  VR256:$src)>;
+def : Pat<(v8i32  (bitconvert (v4i64  VR256:$src))), (v8i32  VR256:$src)>;
+def : Pat<(v8i32  (bitconvert (v16i16 VR256:$src))), (v8i32  VR256:$src)>;
+def : Pat<(v8i32  (bitconvert (v32i8  VR256:$src))), (v8i32  VR256:$src)>;
+def : Pat<(v8i32  (bitconvert (v4f64  VR256:$src))), (v8i32  VR256:$src)>;
+def : Pat<(v8i32  (bitconvert (v8f32  VR256:$src))), (v8i32  VR256:$src)>;
+def : Pat<(v16i16 (bitconvert (v4i64  VR256:$src))), (v16i16 VR256:$src)>;
+def : Pat<(v16i16 (bitconvert (v8i32  VR256:$src))), (v16i16 VR256:$src)>;
+def : Pat<(v16i16 (bitconvert (v32i8  VR256:$src))), (v16i16 VR256:$src)>;
+def : Pat<(v16i16 (bitconvert (v4f64  VR256:$src))), (v16i16 VR256:$src)>;
+def : Pat<(v16i16 (bitconvert (v8f32  VR256:$src))), (v16i16 VR256:$src)>;
+def : Pat<(v32i8  (bitconvert (v4i64  VR256:$src))), (v32i8  VR256:$src)>;
+def : Pat<(v32i8  (bitconvert (v8i32  VR256:$src))), (v32i8  VR256:$src)>;
+def : Pat<(v32i8  (bitconvert (v16i16 VR256:$src))), (v32i8  VR256:$src)>;
+def : Pat<(v32i8  (bitconvert (v4f64  VR256:$src))), (v32i8  VR256:$src)>;
+def : Pat<(v32i8  (bitconvert (v8f32  VR256:$src))), (v32i8  VR256:$src)>;
+def : Pat<(v8f32  (bitconvert (v4i64  VR256:$src))), (v8f32  VR256:$src)>;
+def : Pat<(v8f32  (bitconvert (v8i32  VR256:$src))), (v8f32  VR256:$src)>;
+def : Pat<(v8f32  (bitconvert (v16i16 VR256:$src))), (v8f32  VR256:$src)>;
+def : Pat<(v8f32  (bitconvert (v32i8  VR256:$src))), (v8f32  VR256:$src)>;
+def : Pat<(v8f32  (bitconvert (v4f64  VR256:$src))), (v8f32  VR256:$src)>;
+def : Pat<(v4f64  (bitconvert (v4i64  VR256:$src))), (v4f64  VR256:$src)>;
+def : Pat<(v4f64  (bitconvert (v8i32  VR256:$src))), (v4f64  VR256:$src)>;
+def : Pat<(v4f64  (bitconvert (v16i16 VR256:$src))), (v4f64  VR256:$src)>;
+def : Pat<(v4f64  (bitconvert (v32i8  VR256:$src))), (v4f64  VR256:$src)>;
+def : Pat<(v4f64  (bitconvert (v8f32  VR256:$src))), (v4f64  VR256:$src)>;
+
+// Bitcasts between 512-bit vector types. Return the original type since
+// no instruction is needed for the conversion.
+def : Pat<(v8f64  (bitconvert (v8i64  VR512:$src))), (v8f64  VR512:$src)>;
+def : Pat<(v8f64  (bitconvert (v16i32 VR512:$src))), (v8f64  VR512:$src)>;
+def : Pat<(v8f64  (bitconvert (v32i16 VR512:$src))), (v8f64  VR512:$src)>;
+def : Pat<(v8f64  (bitconvert (v64i8  VR512:$src))), (v8f64  VR512:$src)>;
+def : Pat<(v8f64  (bitconvert (v16f32 VR512:$src))), (v8f64  VR512:$src)>;
+def : Pat<(v16f32 (bitconvert (v8i64  VR512:$src))), (v16f32 VR512:$src)>;
+def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
+def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
+def : Pat<(v16f32 (bitconvert (v64i8  VR512:$src))), (v16f32 VR512:$src)>;
+def : Pat<(v16f32 (bitconvert (v8f64  VR512:$src))), (v16f32 VR512:$src)>;
+def : Pat<(v8i64  (bitconvert (v16i32 VR512:$src))), (v8i64  VR512:$src)>;
+def : Pat<(v8i64  (bitconvert (v32i16 VR512:$src))), (v8i64  VR512:$src)>;
+def : Pat<(v8i64  (bitconvert (v64i8  VR512:$src))), (v8i64  VR512:$src)>;
+def : Pat<(v8i64  (bitconvert (v8f64  VR512:$src))), (v8i64  VR512:$src)>;
+def : Pat<(v8i64  (bitconvert (v16f32 VR512:$src))), (v8i64  VR512:$src)>;
+def : Pat<(v16i32 (bitconvert (v8i64  VR512:$src))), (v16i32 VR512:$src)>;
+def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
+def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
+def : Pat<(v16i32 (bitconvert (v64i8  VR512:$src))), (v16i32 VR512:$src)>;
+def : Pat<(v16i32 (bitconvert (v8f64  VR512:$src))), (v16i32 VR512:$src)>;
+def : Pat<(v32i16 (bitconvert (v8i64  VR512:$src))), (v32i16 VR512:$src)>;
+def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
+def : Pat<(v32i16 (bitconvert (v64i8  VR512:$src))), (v32i16 VR512:$src)>;
+def : Pat<(v32i16 (bitconvert (v8f64  VR512:$src))), (v32i16 VR512:$src)>;
+def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
+def : Pat<(v64i8  (bitconvert (v8i64  VR512:$src))), (v64i8  VR512:$src)>;
+def : Pat<(v64i8  (bitconvert (v16i32 VR512:$src))), (v64i8  VR512:$src)>;
+def : Pat<(v64i8  (bitconvert (v32i16 VR512:$src))), (v64i8  VR512:$src)>;
+def : Pat<(v64i8  (bitconvert (v8f64  VR512:$src))), (v64i8  VR512:$src)>;
+def : Pat<(v64i8  (bitconvert (v16f32 VR512:$src))), (v64i8  VR512:$src)>;
+
+
+//===----------------------------------------------------------------------===//
+//  Non-instruction patterns
+//===----------------------------------------------------------------------===//
+
+// A vector extract of the first f32/f64 position is a subregister copy
+def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
+          (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
+def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
+          (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
+
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+          (COPY_TO_REGCLASS FR32:$src, VR128)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+          (COPY_TO_REGCLASS FR64:$src, VR128)>;
+
+
+//===----------------------------------------------------------------------===//
+// Subvector tricks
+//===----------------------------------------------------------------------===//
+
+// Patterns for insert_subvector/extract_subvector to/from index=0
+multiclass subvector_subreg_lowering<RegisterClass subRC, ValueType subVT,
+                                     RegisterClass RC, ValueType VT,
+                                     SubRegIndex subIdx> {
+  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
+            (subVT (EXTRACT_SUBREG RC:$src, subIdx))>;
+
+  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
+            (VT (INSERT_SUBREG (IMPLICIT_DEF), subRC:$src, subIdx))>;
+}
+
+// A 128-bit subvector extract from the first 256-bit vector position is a
+// subregister copy that needs no instruction. Likewise, a 128-bit subvector
+// insert to the first 256-bit vector position is a subregister copy that needs
+// no instruction.
+defm : subvector_subreg_lowering<VR128, v4i32, VR256, v8i32,  sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v4f32, VR256, v8f32,  sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v2i64, VR256, v4i64,  sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64,  sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8,  sub_xmm>;
+
+// A 128-bit subvector extract from the first 512-bit vector position is a
+// subregister copy that needs no instruction. Likewise, a 128-bit subvector
+// insert to the first 512-bit vector position is a subregister copy that needs
+// no instruction.
+defm : subvector_subreg_lowering<VR128, v4i32, VR512, v16i32, sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v4f32, VR512, v16f32, sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v2i64, VR512, v8i64,  sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64,  sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8,  sub_xmm>;
+
+// A 128-bit subvector extract from the first 512-bit vector position is a
+// subregister copy that needs no instruction. Likewise, a 128-bit subvector
+// insert to the first 512-bit vector position is a subregister copy that needs
+// no instruction.
+defm : subvector_subreg_lowering<VR256, v8i32,  VR512, v16i32, sub_ymm>;
+defm : subvector_subreg_lowering<VR256, v8f32,  VR512, v16f32, sub_ymm>;
+defm : subvector_subreg_lowering<VR256, v4i64,  VR512, v8i64,  sub_ymm>;
+defm : subvector_subreg_lowering<VR256, v4f64,  VR512, v8f64,  sub_ymm>;
+defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
+defm : subvector_subreg_lowering<VR256, v32i8,  VR512, v64i8,  sub_ymm>;
+
+
+multiclass subvector_store_lowering<string AlignedStr, string UnalignedStr,
+                                    RegisterClass RC, ValueType DstTy,
+                                    ValueType SrcTy, SubRegIndex SubIdx> {
+  def : Pat<(alignedstore (DstTy (extract_subvector
+                                  (SrcTy RC:$src), (iPTR 0))), addr:$dst),
+            (!cast<Instruction>("VMOV"#AlignedStr#"mr") addr:$dst,
+             (DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
+
+  def : Pat<(store (DstTy (extract_subvector
+                           (SrcTy RC:$src), (iPTR 0))), addr:$dst),
+            (!cast<Instruction>("VMOV"#UnalignedStr#"mr") addr:$dst,
+             (DstTy (EXTRACT_SUBREG RC:$src, SubIdx)))>;
+}
+
+let Predicates = [HasAVX, NoVLX] in {
+  defm : subvector_store_lowering<"APD", "UPD", VR256X, v2f64, v4f64,  sub_xmm>;
+  defm : subvector_store_lowering<"APS", "UPS", VR256X, v4f32, v8f32,  sub_xmm>;
+  defm : subvector_store_lowering<"DQA", "DQU", VR256X, v2i64, v4i64,  sub_xmm>;
+  defm : subvector_store_lowering<"DQA", "DQU", VR256X, v4i32, v8i32,  sub_xmm>;
+  defm : subvector_store_lowering<"DQA", "DQU", VR256X, v8i16, v16i16, sub_xmm>;
+  defm : subvector_store_lowering<"DQA", "DQU", VR256X, v16i8, v32i8,  sub_xmm>;
+}
+
+let Predicates = [HasVLX] in {
+  // Special patterns for storing subvector extracts of lower 128-bits
+  // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
+  defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR256X, v2f64, v4f64,
+                                  sub_xmm>;
+  defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR256X, v4f32, v8f32,
+                                  sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v2i64,
+                                  v4i64, sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v4i32,
+                                  v8i32, sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v8i16,
+                                  v16i16, sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR256X, v16i8,
+                                  v32i8, sub_xmm>;
+
+  // Special patterns for storing subvector extracts of lower 128-bits of 512.
+  // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
+  defm : subvector_store_lowering<"APDZ128", "UPDZ128", VR512, v2f64, v8f64,
+                                  sub_xmm>;
+  defm : subvector_store_lowering<"APSZ128", "UPSZ128", VR512, v4f32, v16f32,
+                                  sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v2i64,
+                                  v8i64, sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v4i32,
+                                  v16i32, sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v8i16,
+                                  v32i16, sub_xmm>;
+  defm : subvector_store_lowering<"DQA64Z128", "DQU64Z128", VR512, v16i8,
+                                  v64i8, sub_xmm>;
+
+  // Special patterns for storing subvector extracts of lower 256-bits of 512.
+  // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
+  defm : subvector_store_lowering<"APDZ256", "UPDZ256", VR512, v4f64, v8f64,
+                                  sub_ymm>;
+  defm : subvector_store_lowering<"APSZ256", "UPSZ256", VR512, v8f32, v16f32,
+                                  sub_ymm>;
+  defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v4i64,
+                                  v8i64, sub_ymm>;
+  defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v8i32,
+                                  v16i32, sub_ymm>;
+  defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v16i16,
+                                  v32i16, sub_ymm>;
+  defm : subvector_store_lowering<"DQA64Z256", "DQU64Z256", VR512, v32i8,
+                                  v64i8, sub_ymm>;
+}
+
+// If we're inserting into an all zeros vector, just use a plain move which
+// will zero the upper bits. A post-isel hook will take care of removing
+// any moves that we can prove are unnecessary.
+multiclass subvec_zero_lowering<string MoveStr,
+                                RegisterClass RC, ValueType DstTy,
+                                ValueType SrcTy, ValueType ZeroTy,
+                                SubRegIndex SubIdx> {
+  def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
+                                     (SrcTy RC:$src), (iPTR 0))),
+            (SUBREG_TO_REG (i64 0),
+             (SrcTy (!cast<Instruction>("VMOV"#MoveStr#"rr") RC:$src)), SubIdx)>;
+}
+
+let Predicates = [HasAVX, NoVLX] in {
+  defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, sub_xmm>;
+}
+
+let Predicates = [HasVLX] in {
+  defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32, sub_xmm>;
+
+  defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32, sub_xmm>;
+
+  defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32, sub_ymm>;
+}
+
+let Predicates = [HasAVX512, NoVLX] in {
+  defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, sub_xmm>;
+  defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, sub_xmm>;
+
+  defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32, sub_ymm>;
+  defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32, sub_ymm>;
+}
+
+class maskzeroupper<ValueType vt, RegisterClass RC> :
+  PatLeaf<(vt RC:$src), [{
+    return isMaskZeroExtended(N);
+  }]>;
+
+def maskzeroupperv1i1  : maskzeroupper<v1i1,  VK1>;
+def maskzeroupperv2i1  : maskzeroupper<v2i1,  VK2>;
+def maskzeroupperv4i1  : maskzeroupper<v4i1,  VK4>;
+def maskzeroupperv8i1  : maskzeroupper<v8i1,  VK8>;
+def maskzeroupperv16i1 : maskzeroupper<v16i1, VK16>;
+def maskzeroupperv32i1 : maskzeroupper<v32i1, VK32>;
+
+// The patterns determine if we can depend on the upper bits of a mask register
+// being zeroed by the previous operation so that we can skip explicit
+// zeroing.
+let Predicates = [HasBWI] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     maskzeroupperv1i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK1:$src, VK32)>;
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     maskzeroupperv8i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK8:$src, VK32)>;
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     maskzeroupperv16i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK16:$src, VK32)>;
+
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     maskzeroupperv1i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK1:$src, VK64)>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     maskzeroupperv8i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK8:$src, VK64)>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     maskzeroupperv16i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK16:$src, VK64)>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     maskzeroupperv32i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK32:$src, VK64)>;
+}
+
+let Predicates = [HasAVX512] in {
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     maskzeroupperv1i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK1:$src, VK16)>;
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     maskzeroupperv8i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK8:$src, VK16)>;
+}
+
+let Predicates = [HasDQI] in {
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    maskzeroupperv1i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK1:$src, VK8)>;
+}
+
+let Predicates = [HasVLX, HasDQI] in {
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    maskzeroupperv2i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK2:$src, VK8)>;
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    maskzeroupperv4i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK4:$src, VK8)>;
+}
+
+let Predicates = [HasVLX] in {
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     maskzeroupperv2i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK2:$src, VK16)>;
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     maskzeroupperv4i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK4:$src, VK16)>;
+}
+
+let Predicates = [HasBWI, HasVLX] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     maskzeroupperv2i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK2:$src, VK32)>;
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     maskzeroupperv4i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK4:$src, VK32)>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     maskzeroupperv2i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK2:$src, VK64)>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     maskzeroupperv4i1:$src, (iPTR 0))),
+            (COPY_TO_REGCLASS VK4:$src, VK64)>;
+}
+
+// If the bits are not zero we have to fall back to explicitly zeroing by
+// using shifts.
+let Predicates = [HasAVX512] in {
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     (v1i1 VK1:$mask), (iPTR 0))),
+            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK1:$mask, VK16),
+                                    (i8 15)), (i8 15))>;
+
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     (v2i1 VK2:$mask), (iPTR 0))),
+            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16),
+                                    (i8 14)), (i8 14))>;
+
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     (v4i1 VK4:$mask), (iPTR 0))),
+            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16),
+                                    (i8 12)), (i8 12))>;
+}
+
+let Predicates = [HasAVX512, NoDQI] in {
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16),
+                                    (i8 8)), (i8 8))>;
+}
+
+let Predicates = [HasDQI] in {
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>;
+
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    (v1i1 VK1:$mask), (iPTR 0))),
+            (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK1:$mask, VK8),
+                                    (i8 7)), (i8 7))>;
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    (v2i1 VK2:$mask), (iPTR 0))),
+            (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8),
+                                    (i8 6)), (i8 6))>;
+  def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
+                                    (v4i1 VK4:$mask), (iPTR 0))),
+            (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK4:$mask, VK8),
+                                    (i8 4)), (i8 4))>;
+}
+
+let Predicates = [HasBWI] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v16i1 VK16:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK32)>;
+
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v16i1 VK16:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK64)>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v32i1 VK32:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVDkk VK32:$mask), VK64)>;
+}
+
+let Predicates = [HasBWI, NoDQI] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32),
+                                    (i8 24)), (i8 24))>;
+
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64),
+                                    (i8 56)), (i8 56))>;
+}
+
+let Predicates = [HasBWI, HasDQI] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK32)>;
+
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>;
+}
+
+let Predicates = [HasBWI, HasVLX] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v1i1 VK1:$mask), (iPTR 0))),
+            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK1:$mask, VK32),
+                                    (i8 31)), (i8 31))>;
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v2i1 VK2:$mask), (iPTR 0))),
+            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK2:$mask, VK32),
+                                    (i8 30)), (i8 30))>;
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v4i1 VK4:$mask), (iPTR 0))),
+            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK4:$mask, VK32),
+                                    (i8 28)), (i8 28))>;
+
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v1i1 VK1:$mask), (iPTR 0))),
+            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK1:$mask, VK64),
+                                    (i8 63)), (i8 63))>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v2i1 VK2:$mask), (iPTR 0))),
+            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK2:$mask, VK64),
+                                    (i8 62)), (i8 62))>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v4i1 VK4:$mask), (iPTR 0))),
+            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK4:$mask, VK64),
+                                    (i8 60)), (i8 60))>;
+}
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrXOP.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86InstrXOP.td
@@ -0,0 +1,446 @@
+//===-- X86InstrXOP.td - XOP Instruction Set ---------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes XOP (eXtended OPerations)
+//
+//===----------------------------------------------------------------------===//
+
+multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
+  def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[SchedWritePHAdd.XMM]>;
+  def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
+           Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>;
+}
+
+let ExeDomain = SSEPackedInt in {
+  defm VPHSUBWD  : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, loadv2i64>;
+  defm VPHSUBDQ  : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, loadv2i64>;
+  defm VPHSUBBW  : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, loadv2i64>;
+  defm VPHADDWQ  : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, loadv2i64>;
+  defm VPHADDWD  : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, loadv2i64>;
+  defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, loadv2i64>;
+  defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, loadv2i64>;
+  defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, loadv2i64>;
+  defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, loadv2i64>;
+  defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, loadv2i64>;
+  defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, loadv2i64>;
+  defm VPHADDDQ  : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, loadv2i64>;
+  defm VPHADDBW  : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, loadv2i64>;
+  defm VPHADDBQ  : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, loadv2i64>;
+  defm VPHADDBD  : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, loadv2i64>;
+}
+
+// Scalar load 2 addr operand instructions
+multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
+                     Operand memop, ComplexPattern mem_cpat,
+                     X86FoldableSchedWrite sched> {
+  def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
+  def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, XOP,
+           Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
+                     PatFrag memop, X86FoldableSchedWrite sched> {
+  def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int VR128:$src))]>, XOP, Sched<[sched]>;
+  def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP,
+           Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
+                     PatFrag memop, X86FoldableSchedWrite sched> {
+  def Yrr : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR256:$dst, (Int VR256:$src))]>, XOP, VEX_L, Sched<[sched]>;
+  def Yrm : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, XOP, VEX_L,
+           Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+let ExeDomain = SSEPackedSingle in {
+  defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
+                           ssmem, sse_load_f32, SchedWriteFRnd.Scl>;
+  defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, loadv4f32,
+                           SchedWriteFRnd.XMM>;
+  defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, loadv8f32,
+                           SchedWriteFRnd.YMM>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+  defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
+                           sdmem, sse_load_f64, SchedWriteFRnd.Scl>;
+  defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, loadv2f64,
+                           SchedWriteFRnd.XMM>;
+  defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64,
+                           SchedWriteFRnd.YMM>;
+}
+
+multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                  ValueType vt128, X86FoldableSchedWrite sched> {
+  def rr : IXOP<opc, MRMSrcReg4VOp3, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst,
+              (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2))))]>,
+           XOP, Sched<[sched]>;
+  def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, i128mem:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst,
+              (vt128 (OpNode (vt128 VR128:$src1),
+                             (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
+           XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd]>;
+  def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst),
+           (ins i128mem:$src1, VR128:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst,
+              (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))),
+                             (vt128 VR128:$src2))))]>,
+             XOP, Sched<[sched.Folded, ReadAfterLd]>;
+  // For disassembler
+  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def rr_REV : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2),
+               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+               []>,
+               XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>;
+}
+
+let ExeDomain = SSEPackedInt in {
+  defm VPROTB : xop3op<0x90, "vprotb", rotl, v16i8, SchedWriteVarVecShift.XMM>;
+  defm VPROTD : xop3op<0x92, "vprotd", rotl, v4i32, SchedWriteVarVecShift.XMM>;
+  defm VPROTQ : xop3op<0x93, "vprotq", rotl, v2i64, SchedWriteVarVecShift.XMM>;
+  defm VPROTW : xop3op<0x91, "vprotw", rotl, v8i16, SchedWriteVarVecShift.XMM>;
+  defm VPSHAB : xop3op<0x98, "vpshab", X86vpsha, v16i8, SchedWriteVarVecShift.XMM>;
+  defm VPSHAD : xop3op<0x9A, "vpshad", X86vpsha, v4i32, SchedWriteVarVecShift.XMM>;
+  defm VPSHAQ : xop3op<0x9B, "vpshaq", X86vpsha, v2i64, SchedWriteVarVecShift.XMM>;
+  defm VPSHAW : xop3op<0x99, "vpshaw", X86vpsha, v8i16, SchedWriteVarVecShift.XMM>;
+  defm VPSHLB : xop3op<0x94, "vpshlb", X86vpshl, v16i8, SchedWriteVarVecShift.XMM>;
+  defm VPSHLD : xop3op<0x96, "vpshld", X86vpshl, v4i32, SchedWriteVarVecShift.XMM>;
+  defm VPSHLQ : xop3op<0x97, "vpshlq", X86vpshl, v2i64, SchedWriteVarVecShift.XMM>;
+  defm VPSHLW : xop3op<0x95, "vpshlw", X86vpshl, v8i16, SchedWriteVarVecShift.XMM>;
+}
+
+multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                     ValueType vt128, X86FoldableSchedWrite sched> {
+  def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, u8imm:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst,
+              (vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>,
+           XOP, Sched<[sched]>;
+  def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins i128mem:$src1, u8imm:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst,
+              (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>,
+           XOP, Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+let ExeDomain = SSEPackedInt in {
+  defm VPROTB : xop3opimm<0xC0, "vprotb", X86vrotli, v16i8,
+                          SchedWriteVecShiftImm.XMM>;
+  defm VPROTD : xop3opimm<0xC2, "vprotd", X86vrotli, v4i32,
+                          SchedWriteVecShiftImm.XMM>;
+  defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vrotli, v2i64,
+                          SchedWriteVecShiftImm.XMM>;
+  defm VPROTW : xop3opimm<0xC1, "vprotw", X86vrotli, v8i16,
+                          SchedWriteVecShiftImm.XMM>;
+}
+
+// Instruction where second source can be memory, but third must be register
+multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int,
+                    X86FoldableSchedWrite sched> {
+  let isCommutable = 1 in
+  def rr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+              (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP_4V,
+           Sched<[sched]>;
+  def rm : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+              (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
+              VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
+}
+
+let ExeDomain = SSEPackedInt in {
+  defm VPMADCSWD  : xop4opm2<0xB6, "vpmadcswd",
+                             int_x86_xop_vpmadcswd, SchedWriteVecIMul.XMM>;
+  defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd",
+                             int_x86_xop_vpmadcsswd, SchedWriteVecIMul.XMM>;
+  defm VPMACSWW   : xop4opm2<0x95, "vpmacsww",
+                             int_x86_xop_vpmacsww, SchedWriteVecIMul.XMM>;
+  defm VPMACSWD   : xop4opm2<0x96, "vpmacswd",
+                             int_x86_xop_vpmacswd, SchedWriteVecIMul.XMM>;
+  defm VPMACSSWW  : xop4opm2<0x85, "vpmacssww",
+                             int_x86_xop_vpmacssww, SchedWriteVecIMul.XMM>;
+  defm VPMACSSWD  : xop4opm2<0x86, "vpmacsswd",
+                             int_x86_xop_vpmacsswd, SchedWriteVecIMul.XMM>;
+  defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql",
+                             int_x86_xop_vpmacssdql, SchedWritePMULLD.XMM>;
+  defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh",
+                             int_x86_xop_vpmacssdqh, SchedWritePMULLD.XMM>;
+  defm VPMACSSDD  : xop4opm2<0x8E, "vpmacssdd",
+                             int_x86_xop_vpmacssdd, SchedWritePMULLD.XMM>;
+  defm VPMACSDQL  : xop4opm2<0x97, "vpmacsdql",
+                             int_x86_xop_vpmacsdql, SchedWritePMULLD.XMM>;
+  defm VPMACSDQH  : xop4opm2<0x9F, "vpmacsdqh",
+                             int_x86_xop_vpmacsdqh, SchedWritePMULLD.XMM>;
+  defm VPMACSDD   : xop4opm2<0x9E, "vpmacsdd",
+                             int_x86_xop_vpmacsdd, SchedWritePMULLD.XMM>;
+}
+
+// IFMA patterns - for cases where we can safely ignore the overflow bits from
+// the multiply or easily match with existing intrinsics.
+let Predicates = [HasXOP] in {
+  def : Pat<(v8i16 (add (mul (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
+                        (v8i16 VR128:$src3))),
+            (VPMACSWWrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(v4i32 (add (mul (v4i32 VR128:$src1), (v4i32 VR128:$src2)),
+                        (v4i32 VR128:$src3))),
+            (VPMACSDDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(v2i64 (add (X86pmuldq (bc_v2i64 (X86PShufd (v4i32 VR128:$src1), (i8 -11))),
+                                   (bc_v2i64 (X86PShufd (v4i32 VR128:$src2), (i8 -11)))),
+                        (v2i64 VR128:$src3))),
+            (VPMACSDQHrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(v2i64 (add (X86pmuldq (v2i64 VR128:$src1), (v2i64 VR128:$src2)),
+                        (v2i64 VR128:$src3))),
+            (VPMACSDQLrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(v4i32 (add (X86vpmaddwd (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
+                        (v4i32 VR128:$src3))),
+            (VPMADCSWDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+}
+
+// Transforms to swizzle an immediate to help matching memory operand in first
+// operand.
+def CommuteVPCOMCC : SDNodeXForm<imm, [{
+  uint8_t Imm = N->getZExtValue() & 0x7;
+  Imm = X86::getSwappedVPCOMImm(Imm);
+  return getI8Imm(Imm, SDLoc(N));
+}]>;
+
+// Instruction where second source can be memory, third must be imm8
+multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
+                    X86FoldableSchedWrite sched> {
+  let ExeDomain = SSEPackedInt in { // SSE integer instructions
+    let isCommutable = 1 in
+    def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+             (ins VR128:$src1, VR128:$src2, XOPCC:$cc),
+             !strconcat("vpcom${cc}", Suffix,
+             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR128:$dst,
+                (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
+                               imm:$cc)))]>,
+             XOP_4V, Sched<[sched]>;
+    def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+             (ins VR128:$src1, i128mem:$src2, XOPCC:$cc),
+             !strconcat("vpcom${cc}", Suffix,
+             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR128:$dst,
+                (vt128 (OpNode (vt128 VR128:$src1),
+                               (vt128 (bitconvert (loadv2i64 addr:$src2))),
+                                imm:$cc)))]>,
+             XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
+    let isAsmParserOnly = 1, hasSideEffects = 0 in {
+      def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+                   (ins VR128:$src1, VR128:$src2, u8imm:$src3),
+                   !strconcat("vpcom", Suffix,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                   []>, XOP_4V, Sched<[sched]>, NotMemoryFoldable;
+      let mayLoad = 1 in
+      def mi_alt : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+                   (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
+                   !strconcat("vpcom", Suffix,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                   []>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>,
+                   NotMemoryFoldable;
+    }
+  }
+
+  def : Pat<(OpNode (bitconvert (loadv2i64 addr:$src2)),
+                    (vt128 VR128:$src1), imm:$cc),
+            (!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2,
+                                           (CommuteVPCOMCC imm:$cc))>;
+}
+
+defm VPCOMB  : xopvpcom<0xCC, "b", X86vpcom, v16i8, SchedWriteVecALU.XMM>;
+defm VPCOMW  : xopvpcom<0xCD, "w", X86vpcom, v8i16, SchedWriteVecALU.XMM>;
+defm VPCOMD  : xopvpcom<0xCE, "d", X86vpcom, v4i32, SchedWriteVecALU.XMM>;
+defm VPCOMQ  : xopvpcom<0xCF, "q", X86vpcom, v2i64, SchedWriteVecALU.XMM>;
+defm VPCOMUB : xopvpcom<0xEC, "ub", X86vpcomu, v16i8, SchedWriteVecALU.XMM>;
+defm VPCOMUW : xopvpcom<0xED, "uw", X86vpcomu, v8i16, SchedWriteVecALU.XMM>;
+defm VPCOMUD : xopvpcom<0xEE, "ud", X86vpcomu, v4i32, SchedWriteVecALU.XMM>;
+defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64, SchedWriteVecALU.XMM>;
+
+multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                  ValueType vt128, X86FoldableSchedWrite sched> {
+  def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
+            (ins VR128:$src1, VR128:$src2, VR128:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            [(set VR128:$dst,
+              (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
+                             (vt128 VR128:$src3))))]>,
+            XOP_4V, Sched<[sched]>;
+  def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
+            (ins VR128:$src1, VR128:$src2, i128mem:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            [(set VR128:$dst,
+              (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
+                             (vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
+            XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+  def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
+            (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            [(set VR128:$dst,
+              (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
+                             (vt128 VR128:$src3))))]>,
+            XOP_4V, Sched<[sched.Folded, ReadAfterLd,
+                           // 128mem:$src2
+                           ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                           ReadDefault,
+                           // VR128:$src3
+                           ReadAfterLd]>;
+  // For disassembler
+  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
+                (ins VR128:$src1, VR128:$src2, VR128:$src3),
+                !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                []>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>;
+}
+
+let ExeDomain = SSEPackedInt in {
+  defm VPPERM : xop4op<0xA3, "vpperm", X86vpperm, v16i8,
+                       SchedWriteVarShuffle.XMM>;
+}
+
+// Instruction where either second or third source can be memory
+multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                      X86MemOperand x86memop, ValueType VT,
+                      X86FoldableSchedWrite sched> {
+  def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs RC:$dst),
+            (ins RC:$src1, RC:$src2, RC:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
+                                   (X86andnp RC:$src3, RC:$src2))))]>, XOP_4V,
+            Sched<[sched]>;
+  def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst),
+            (ins RC:$src1, RC:$src2, x86memop:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            [(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
+                                   (X86andnp (load addr:$src3), RC:$src2))))]>,
+            XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+  def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
+            (ins RC:$src1, x86memop:$src2, RC:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
+                                   (X86andnp RC:$src3, (load addr:$src2)))))]>,
+            XOP_4V, Sched<[sched.Folded, ReadAfterLd,
+                           // x86memop:$src2
+                           ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                           ReadDefault,
+                           // RC::$src3
+                           ReadAfterLd]>;
+  // For disassembler
+  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
+            (ins RC:$src1, RC:$src2, RC:$src3),
+            !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+            []>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>;
+}
+
+let ExeDomain = SSEPackedInt in {
+  defm VPCMOV : xop4op_int<0xA2, "vpcmov", VR128, i128mem, v2i64,
+                           SchedWriteShuffle.XMM>;
+  defm VPCMOVY : xop4op_int<0xA2, "vpcmov", VR256, i256mem, v4i64,
+                            SchedWriteShuffle.YMM>, VEX_L;
+}
+
+multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
+                        X86MemOperand intmemop, X86MemOperand fpmemop,
+                        ValueType VT, PatFrag FPLdFrag, PatFrag IntLdFrag,
+                        X86FoldableSchedWrite sched> {
+  def rr : IXOP5<Opc, MRMSrcReg, (outs RC:$dst),
+        (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        [(set RC:$dst,
+           (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>,
+        Sched<[sched]>;
+  def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
+        (ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        [(set RC:$dst,
+          (VT (X86vpermil2 RC:$src1, RC:$src2,
+                           (bitconvert (IntLdFrag addr:$src3)),
+                           (i8 imm:$src4))))]>, VEX_W,
+        Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+  def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
+        (ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        [(set RC:$dst,
+          (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
+                           RC:$src3, (i8 imm:$src4))))]>,
+        Sched<[sched.Folded, ReadAfterLd,
+               // fpmemop:$src2
+               ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+               // RC:$src3
+               ReadAfterLd]>;
+  // For disassembler
+  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
+        (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        []>, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>;
+}
+
+let ExeDomain = SSEPackedDouble in {
+  defm VPERMIL2PD : xop_vpermil2<0x49, "vpermil2pd", VR128, i128mem, f128mem,
+                                 v2f64, loadv2f64, loadv2i64,
+                                 SchedWriteFVarShuffle.XMM>;
+  defm VPERMIL2PDY : xop_vpermil2<0x49, "vpermil2pd", VR256, i256mem, f256mem,
+                                  v4f64, loadv4f64, loadv4i64,
+                                  SchedWriteFVarShuffle.YMM>, VEX_L;
+}
+
+let ExeDomain = SSEPackedSingle in {
+  defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem,
+                                 v4f32, loadv4f32, loadv2i64,
+                                 SchedWriteFVarShuffle.XMM>;
+  defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem,
+                                  v8f32, loadv8f32, loadv4i64,
+                                  SchedWriteFVarShuffle.YMM>, VEX_L;
+}
+
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86PfmCounters.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86PfmCounters.td
@@ -0,0 +1,77 @@
+//===-- X86PfmCounters.td - X86 Hardware Counters ----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the available hardware counters for various subtargets.
+//
+//===----------------------------------------------------------------------===//
+
+let SchedModel = SandyBridgeModel in {
+def SBCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
+def SBPort0Counter : PfmIssueCounter<SBPort0, ["uops_dispatched_port:port_0"]>;
+def SBPort1Counter : PfmIssueCounter<SBPort1, ["uops_dispatched_port:port_1"]>;
+def SBPort23Counter : PfmIssueCounter<SBPort23,
+                                      ["uops_dispatched_port:port_2",
+                                       "uops_dispatched_port:port_3"]>;
+def SBPort4Counter : PfmIssueCounter<SBPort4, ["uops_dispatched_port:port_4"]>;
+def SBPort5Counter : PfmIssueCounter<SBPort5, ["uops_dispatched_port:port_5"]>;
+}
+
+let SchedModel = HaswellModel in {
+def HWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
+def HWPort0Counter : PfmIssueCounter<HWPort0, ["uops_dispatched_port:port_0"]>;
+def HWPort1Counter : PfmIssueCounter<HWPort1, ["uops_dispatched_port:port_1"]>;
+def HWPort2Counter : PfmIssueCounter<HWPort2, ["uops_dispatched_port:port_2"]>;
+def HWPort3Counter : PfmIssueCounter<HWPort3, ["uops_dispatched_port:port_3"]>;
+def HWPort4Counter : PfmIssueCounter<HWPort4, ["uops_dispatched_port:port_4"]>;
+def HWPort5Counter : PfmIssueCounter<HWPort5, ["uops_dispatched_port:port_5"]>;
+def HWPort6Counter : PfmIssueCounter<HWPort6, ["uops_dispatched_port:port_6"]>;
+def HWPort7Counter : PfmIssueCounter<HWPort7, ["uops_dispatched_port:port_7"]>;
+}
+
+let SchedModel = BroadwellModel in {
+def BWCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
+def BWPort0Counter : PfmIssueCounter<BWPort0, ["uops_executed_port:port_0"]>;
+def BWPort1Counter : PfmIssueCounter<BWPort1, ["uops_executed_port:port_1"]>;
+def BWPort2Counter : PfmIssueCounter<BWPort2, ["uops_executed_port:port_2"]>;
+def BWPort3Counter : PfmIssueCounter<BWPort3, ["uops_executed_port:port_3"]>;
+def BWPort4Counter : PfmIssueCounter<BWPort4, ["uops_executed_port:port_4"]>;
+def BWPort5Counter : PfmIssueCounter<BWPort5, ["uops_executed_port:port_5"]>;
+def BWPort6Counter : PfmIssueCounter<BWPort6, ["uops_executed_port:port_6"]>;
+def BWPort7Counter : PfmIssueCounter<BWPort7, ["uops_executed_port:port_7"]>;
+}
+
+let SchedModel = SkylakeClientModel in {
+def SKLCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
+def SKLPort0Counter : PfmIssueCounter<SKLPort0, ["uops_dispatched_port:port_0"]>;
+def SKLPort1Counter : PfmIssueCounter<SKLPort1, ["uops_dispatched_port:port_1"]>;
+def SKLPort2Counter : PfmIssueCounter<SKLPort2, ["uops_dispatched_port:port_2"]>;
+def SKLPort3Counter : PfmIssueCounter<SKLPort3, ["uops_dispatched_port:port_3"]>;
+def SKLPort4Counter : PfmIssueCounter<SKLPort4, ["uops_dispatched_port:port_4"]>;
+def SKLPort5Counter : PfmIssueCounter<SKLPort5, ["uops_dispatched_port:port_5"]>;
+def SKLPort6Counter : PfmIssueCounter<SKLPort6, ["uops_dispatched_port:port_6"]>;
+def SKLPort7Counter : PfmIssueCounter<SKLPort7, ["uops_dispatched_port:port_7"]>;
+}
+
+let SchedModel = SkylakeServerModel in {
+def SKXCycleCounter : PfmCycleCounter<"unhalted_core_cycles">;
+def SKXPort0Counter : PfmIssueCounter<SKXPort0, ["uops_dispatched_port:port_0"]>;
+def SKXPort1Counter : PfmIssueCounter<SKXPort1, ["uops_dispatched_port:port_1"]>;
+def SKXPort2Counter : PfmIssueCounter<SKXPort2, ["uops_dispatched_port:port_2"]>;
+def SKXPort3Counter : PfmIssueCounter<SKXPort3, ["uops_dispatched_port:port_3"]>;
+def SKXPort4Counter : PfmIssueCounter<SKXPort4, ["uops_dispatched_port:port_4"]>;
+def SKXPort5Counter : PfmIssueCounter<SKXPort5, ["uops_dispatched_port:port_5"]>;
+def SKXPort6Counter : PfmIssueCounter<SKXPort6, ["uops_dispatched_port:port_6"]>;
+def SKXPort7Counter : PfmIssueCounter<SKXPort7, ["uops_dispatched_port:port_7"]>;
+}
+
+let SchedModel = BtVer2Model in {
+def JCycleCounter : PfmCycleCounter<"cpu_clk_unhalted">;
+def JFPU0Counter  : PfmIssueCounter<JFPU0, ["dispatched_fpu:pipe0"]>;
+def JFPU1Counter  : PfmIssueCounter<JFPU1, ["dispatched_fpu:pipe1"]>;
+}
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86RegisterBanks.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86RegisterBanks.td
@@ -0,0 +1,17 @@
+//=- X86RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+/// General Purpose Registers: RAX, RCX,...
+def GPRRegBank : RegisterBank<"GPR", [GR64]>;
+
+/// Floating Point/Vector Registers
+def VECRRegBank : RegisterBank<"VECR", [VR512]>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86RegisterInfo.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86RegisterInfo.td
@@ -0,0 +1,591 @@
+//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 Register file, defining the registers themselves,
+// aliases between the registers, and the register classes built out of the
+// registers.
+//
+//===----------------------------------------------------------------------===//
+
+class X86Reg<string n, bits<16> Enc, list<Register> subregs = []> : Register<n> {
+  let Namespace = "X86";
+  let HWEncoding = Enc;
+  let SubRegs = subregs;
+}
+
+// Subregister indices.
+let Namespace = "X86" in {
+  def sub_8bit     : SubRegIndex<8>;
+  def sub_8bit_hi  : SubRegIndex<8, 8>;
+  def sub_8bit_hi_phony  : SubRegIndex<8, 8>;
+  def sub_16bit    : SubRegIndex<16>;
+  def sub_16bit_hi : SubRegIndex<16, 16>;
+  def sub_32bit    : SubRegIndex<32>;
+  def sub_xmm      : SubRegIndex<128>;
+  def sub_ymm      : SubRegIndex<256>;
+}
+
+//===----------------------------------------------------------------------===//
+//  Register definitions...
+//
+
+// In the register alias definitions below, we define which registers alias
+// which others.  We only specify which registers the small registers alias,
+// because the register file generator is smart enough to figure out that
+// AL aliases AX if we tell it that AX aliased AL (for example).
+
+// Dwarf numbering is different for 32-bit and 64-bit, and there are
+// variations by target as well. Currently the first entry is for X86-64,
+// second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
+// and debug information on X86-32/Darwin)
+
+// 8-bit registers
+// Low registers
+def AL : X86Reg<"al", 0>;
+def DL : X86Reg<"dl", 2>;
+def CL : X86Reg<"cl", 1>;
+def BL : X86Reg<"bl", 3>;
+
+// High registers. On x86-64, these cannot be used in any instruction
+// with a REX prefix.
+def AH : X86Reg<"ah", 4>;
+def DH : X86Reg<"dh", 6>;
+def CH : X86Reg<"ch", 5>;
+def BH : X86Reg<"bh", 7>;
+
+// X86-64 only, requires REX.
+let CostPerUse = 1 in {
+def SIL  : X86Reg<"sil",   6>;
+def DIL  : X86Reg<"dil",   7>;
+def BPL  : X86Reg<"bpl",   5>;
+def SPL  : X86Reg<"spl",   4>;
+def R8B  : X86Reg<"r8b",   8>;
+def R9B  : X86Reg<"r9b",   9>;
+def R10B : X86Reg<"r10b", 10>;
+def R11B : X86Reg<"r11b", 11>;
+def R12B : X86Reg<"r12b", 12>;
+def R13B : X86Reg<"r13b", 13>;
+def R14B : X86Reg<"r14b", 14>;
+def R15B : X86Reg<"r15b", 15>;
+}
+
+let isArtificial = 1 in {
+// High byte of the low 16 bits of the super-register:
+def SIH   : X86Reg<"", -1>;
+def DIH   : X86Reg<"", -1>;
+def BPH   : X86Reg<"", -1>;
+def SPH   : X86Reg<"", -1>;
+def R8BH  : X86Reg<"", -1>;
+def R9BH  : X86Reg<"", -1>;
+def R10BH : X86Reg<"", -1>;
+def R11BH : X86Reg<"", -1>;
+def R12BH : X86Reg<"", -1>;
+def R13BH : X86Reg<"", -1>;
+def R14BH : X86Reg<"", -1>;
+def R15BH : X86Reg<"", -1>;
+// High word of the low 32 bits of the super-register:
+def HAX   : X86Reg<"", -1>;
+def HDX   : X86Reg<"", -1>;
+def HCX   : X86Reg<"", -1>;
+def HBX   : X86Reg<"", -1>;
+def HSI   : X86Reg<"", -1>;
+def HDI   : X86Reg<"", -1>;
+def HBP   : X86Reg<"", -1>;
+def HSP   : X86Reg<"", -1>;
+def HIP   : X86Reg<"", -1>;
+def R8WH  : X86Reg<"", -1>;
+def R9WH  : X86Reg<"", -1>;
+def R10WH : X86Reg<"", -1>;
+def R11WH : X86Reg<"", -1>;
+def R12WH : X86Reg<"", -1>;
+def R13WH : X86Reg<"", -1>;
+def R14WH : X86Reg<"", -1>;
+def R15WH : X86Reg<"", -1>;
+}
+
+// 16-bit registers
+let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in {
+def AX : X86Reg<"ax", 0, [AL,AH]>;
+def DX : X86Reg<"dx", 2, [DL,DH]>;
+def CX : X86Reg<"cx", 1, [CL,CH]>;
+def BX : X86Reg<"bx", 3, [BL,BH]>;
+}
+let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CoveredBySubRegs = 1 in {
+def SI : X86Reg<"si", 6, [SIL,SIH]>;
+def DI : X86Reg<"di", 7, [DIL,DIH]>;
+def BP : X86Reg<"bp", 5, [BPL,BPH]>;
+def SP : X86Reg<"sp", 4, [SPL,SPH]>;
+}
+def IP : X86Reg<"ip", 0>;
+
+// X86-64 only, requires REX.
+let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CostPerUse = 1,
+    CoveredBySubRegs = 1 in {
+def R8W  : X86Reg<"r8w",   8, [R8B,R8BH]>;
+def R9W  : X86Reg<"r9w",   9, [R9B,R9BH]>;
+def R10W : X86Reg<"r10w", 10, [R10B,R10BH]>;
+def R11W : X86Reg<"r11w", 11, [R11B,R11BH]>;
+def R12W : X86Reg<"r12w", 12, [R12B,R12BH]>;
+def R13W : X86Reg<"r13w", 13, [R13B,R13BH]>;
+def R14W : X86Reg<"r14w", 14, [R14B,R14BH]>;
+def R15W : X86Reg<"r15w", 15, [R15B,R15BH]>;
+}
+
+// 32-bit registers
+let SubRegIndices = [sub_16bit, sub_16bit_hi], CoveredBySubRegs = 1 in {
+def EAX : X86Reg<"eax", 0, [AX, HAX]>, DwarfRegNum<[-2, 0, 0]>;
+def EDX : X86Reg<"edx", 2, [DX, HDX]>, DwarfRegNum<[-2, 2, 2]>;
+def ECX : X86Reg<"ecx", 1, [CX, HCX]>, DwarfRegNum<[-2, 1, 1]>;
+def EBX : X86Reg<"ebx", 3, [BX, HBX]>, DwarfRegNum<[-2, 3, 3]>;
+def ESI : X86Reg<"esi", 6, [SI, HSI]>, DwarfRegNum<[-2, 6, 6]>;
+def EDI : X86Reg<"edi", 7, [DI, HDI]>, DwarfRegNum<[-2, 7, 7]>;
+def EBP : X86Reg<"ebp", 5, [BP, HBP]>, DwarfRegNum<[-2, 4, 5]>;
+def ESP : X86Reg<"esp", 4, [SP, HSP]>, DwarfRegNum<[-2, 5, 4]>;
+def EIP : X86Reg<"eip", 0, [IP, HIP]>, DwarfRegNum<[-2, 8, 8]>;
+}
+
+// X86-64 only, requires REX
+let SubRegIndices = [sub_16bit, sub_16bit_hi], CostPerUse = 1,
+    CoveredBySubRegs = 1 in {
+def R8D  : X86Reg<"r8d",   8, [R8W,R8WH]>;
+def R9D  : X86Reg<"r9d",   9, [R9W,R9WH]>;
+def R10D : X86Reg<"r10d", 10, [R10W,R10WH]>;
+def R11D : X86Reg<"r11d", 11, [R11W,R11WH]>;
+def R12D : X86Reg<"r12d", 12, [R12W,R12WH]>;
+def R13D : X86Reg<"r13d", 13, [R13W,R13WH]>;
+def R14D : X86Reg<"r14d", 14, [R14W,R14WH]>;
+def R15D : X86Reg<"r15d", 15, [R15W,R15WH]>;
+}
+
+// 64-bit registers, X86-64 only
+let SubRegIndices = [sub_32bit] in {
+def RAX : X86Reg<"rax", 0, [EAX]>, DwarfRegNum<[0, -2, -2]>;
+def RDX : X86Reg<"rdx", 2, [EDX]>, DwarfRegNum<[1, -2, -2]>;
+def RCX : X86Reg<"rcx", 1, [ECX]>, DwarfRegNum<[2, -2, -2]>;
+def RBX : X86Reg<"rbx", 3, [EBX]>, DwarfRegNum<[3, -2, -2]>;
+def RSI : X86Reg<"rsi", 6, [ESI]>, DwarfRegNum<[4, -2, -2]>;
+def RDI : X86Reg<"rdi", 7, [EDI]>, DwarfRegNum<[5, -2, -2]>;
+def RBP : X86Reg<"rbp", 5, [EBP]>, DwarfRegNum<[6, -2, -2]>;
+def RSP : X86Reg<"rsp", 4, [ESP]>, DwarfRegNum<[7, -2, -2]>;
+
+// These also require REX.
+let CostPerUse = 1 in {
+def R8  : X86Reg<"r8",   8, [R8D]>,  DwarfRegNum<[ 8, -2, -2]>;
+def R9  : X86Reg<"r9",   9, [R9D]>,  DwarfRegNum<[ 9, -2, -2]>;
+def R10 : X86Reg<"r10", 10, [R10D]>, DwarfRegNum<[10, -2, -2]>;
+def R11 : X86Reg<"r11", 11, [R11D]>, DwarfRegNum<[11, -2, -2]>;
+def R12 : X86Reg<"r12", 12, [R12D]>, DwarfRegNum<[12, -2, -2]>;
+def R13 : X86Reg<"r13", 13, [R13D]>, DwarfRegNum<[13, -2, -2]>;
+def R14 : X86Reg<"r14", 14, [R14D]>, DwarfRegNum<[14, -2, -2]>;
+def R15 : X86Reg<"r15", 15, [R15D]>, DwarfRegNum<[15, -2, -2]>;
+def RIP : X86Reg<"rip",  0, [EIP]>,  DwarfRegNum<[16, -2, -2]>;
+}}
+
+// MMX Registers. These are actually aliased to ST0 .. ST7
+def MM0 : X86Reg<"mm0", 0>, DwarfRegNum<[41, 29, 29]>;
+def MM1 : X86Reg<"mm1", 1>, DwarfRegNum<[42, 30, 30]>;
+def MM2 : X86Reg<"mm2", 2>, DwarfRegNum<[43, 31, 31]>;
+def MM3 : X86Reg<"mm3", 3>, DwarfRegNum<[44, 32, 32]>;
+def MM4 : X86Reg<"mm4", 4>, DwarfRegNum<[45, 33, 33]>;
+def MM5 : X86Reg<"mm5", 5>, DwarfRegNum<[46, 34, 34]>;
+def MM6 : X86Reg<"mm6", 6>, DwarfRegNum<[47, 35, 35]>;
+def MM7 : X86Reg<"mm7", 7>, DwarfRegNum<[48, 36, 36]>;
+
+// Pseudo Floating Point registers
+def FP0 : X86Reg<"fp0", 0>;
+def FP1 : X86Reg<"fp1", 0>;
+def FP2 : X86Reg<"fp2", 0>;
+def FP3 : X86Reg<"fp3", 0>;
+def FP4 : X86Reg<"fp4", 0>;
+def FP5 : X86Reg<"fp5", 0>;
+def FP6 : X86Reg<"fp6", 0>;
+def FP7 : X86Reg<"fp7", 0>;
+
+// XMM Registers, used by the various SSE instruction set extensions.
+def XMM0: X86Reg<"xmm0", 0>, DwarfRegNum<[17, 21, 21]>;
+def XMM1: X86Reg<"xmm1", 1>, DwarfRegNum<[18, 22, 22]>;
+def XMM2: X86Reg<"xmm2", 2>, DwarfRegNum<[19, 23, 23]>;
+def XMM3: X86Reg<"xmm3", 3>, DwarfRegNum<[20, 24, 24]>;
+def XMM4: X86Reg<"xmm4", 4>, DwarfRegNum<[21, 25, 25]>;
+def XMM5: X86Reg<"xmm5", 5>, DwarfRegNum<[22, 26, 26]>;
+def XMM6: X86Reg<"xmm6", 6>, DwarfRegNum<[23, 27, 27]>;
+def XMM7: X86Reg<"xmm7", 7>, DwarfRegNum<[24, 28, 28]>;
+
+// X86-64 only
+let CostPerUse = 1 in {
+def XMM8:  X86Reg<"xmm8",   8>, DwarfRegNum<[25, -2, -2]>;
+def XMM9:  X86Reg<"xmm9",   9>, DwarfRegNum<[26, -2, -2]>;
+def XMM10: X86Reg<"xmm10", 10>, DwarfRegNum<[27, -2, -2]>;
+def XMM11: X86Reg<"xmm11", 11>, DwarfRegNum<[28, -2, -2]>;
+def XMM12: X86Reg<"xmm12", 12>, DwarfRegNum<[29, -2, -2]>;
+def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>;
+def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>;
+def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>;
+
+def XMM16:  X86Reg<"xmm16", 16>, DwarfRegNum<[67, -2, -2]>;
+def XMM17:  X86Reg<"xmm17", 17>, DwarfRegNum<[68, -2, -2]>;
+def XMM18:  X86Reg<"xmm18", 18>, DwarfRegNum<[69, -2, -2]>;
+def XMM19:  X86Reg<"xmm19", 19>, DwarfRegNum<[70, -2, -2]>;
+def XMM20:  X86Reg<"xmm20", 20>, DwarfRegNum<[71, -2, -2]>;
+def XMM21:  X86Reg<"xmm21", 21>, DwarfRegNum<[72, -2, -2]>;
+def XMM22:  X86Reg<"xmm22", 22>, DwarfRegNum<[73, -2, -2]>;
+def XMM23:  X86Reg<"xmm23", 23>, DwarfRegNum<[74, -2, -2]>;
+def XMM24:  X86Reg<"xmm24", 24>, DwarfRegNum<[75, -2, -2]>;
+def XMM25:  X86Reg<"xmm25", 25>, DwarfRegNum<[76, -2, -2]>;
+def XMM26:  X86Reg<"xmm26", 26>, DwarfRegNum<[77, -2, -2]>;
+def XMM27:  X86Reg<"xmm27", 27>, DwarfRegNum<[78, -2, -2]>;
+def XMM28:  X86Reg<"xmm28", 28>, DwarfRegNum<[79, -2, -2]>;
+def XMM29:  X86Reg<"xmm29", 29>, DwarfRegNum<[80, -2, -2]>;
+def XMM30:  X86Reg<"xmm30", 30>, DwarfRegNum<[81, -2, -2]>;
+def XMM31:  X86Reg<"xmm31", 31>, DwarfRegNum<[82, -2, -2]>;
+
+} // CostPerUse
+
+// YMM0-15 registers, used by AVX instructions and
+// YMM16-31 registers, used by AVX-512 instructions.
+let SubRegIndices = [sub_xmm] in {
+  foreach  Index = 0-31 in {
+    def YMM#Index : X86Reg<"ymm"#Index, Index, [!cast<X86Reg>("XMM"#Index)]>,
+                    DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
+  }
+}
+
+// ZMM Registers, used by AVX-512 instructions.
+let SubRegIndices = [sub_ymm] in {
+  foreach  Index = 0-31 in {
+    def ZMM#Index : X86Reg<"zmm"#Index, Index, [!cast<X86Reg>("YMM"#Index)]>,
+                    DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;
+  }
+}
+
+// Mask Registers, used by AVX-512 instructions.
+def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118,  93,  93]>;
+def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119,  94,  94]>;
+def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120,  95,  95]>;
+def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121,  96,  96]>;
+def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122,  97,  97]>;
+def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123,  98,  98]>;
+def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124,  99,  99]>;
+def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, 100, 100]>;
+
+// Floating point stack registers. These don't map one-to-one to the FP
+// pseudo registers, but we still mark them as aliasing FP registers. That
+// way both kinds can be live without exceeding the stack depth. ST registers
+// are only live around inline assembly.
+def ST0 : X86Reg<"st(0)", 0>, DwarfRegNum<[33, 12, 11]>;
+def ST1 : X86Reg<"st(1)", 1>, DwarfRegNum<[34, 13, 12]>;
+def ST2 : X86Reg<"st(2)", 2>, DwarfRegNum<[35, 14, 13]>;
+def ST3 : X86Reg<"st(3)", 3>, DwarfRegNum<[36, 15, 14]>;
+def ST4 : X86Reg<"st(4)", 4>, DwarfRegNum<[37, 16, 15]>;
+def ST5 : X86Reg<"st(5)", 5>, DwarfRegNum<[38, 17, 16]>;
+def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]>;
+def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
+
+// Floating-point status word
+def FPSW : X86Reg<"fpsw", 0>;
+
+// Status flags register.
+//
+// Note that some flags that are commonly thought of as part of the status
+// flags register are modeled separately. Typically this is due to instructions
+// reading and updating those flags independently of all the others. We don't
+// want to create false dependencies between these instructions and so we use
+// a separate register to model them.
+def EFLAGS : X86Reg<"flags", 0>;
+
+// The direction flag.
+def DF : X86Reg<"dirflag", 0>;
+
+
+// Segment registers
+def CS : X86Reg<"cs", 1>;
+def DS : X86Reg<"ds", 3>;
+def SS : X86Reg<"ss", 2>;
+def ES : X86Reg<"es", 0>;
+def FS : X86Reg<"fs", 4>;
+def GS : X86Reg<"gs", 5>;
+
+// Debug registers
+def DR0  : X86Reg<"dr0",   0>;
+def DR1  : X86Reg<"dr1",   1>;
+def DR2  : X86Reg<"dr2",   2>;
+def DR3  : X86Reg<"dr3",   3>;
+def DR4  : X86Reg<"dr4",   4>;
+def DR5  : X86Reg<"dr5",   5>;
+def DR6  : X86Reg<"dr6",   6>;
+def DR7  : X86Reg<"dr7",   7>;
+def DR8  : X86Reg<"dr8",   8>;
+def DR9  : X86Reg<"dr9",   9>;
+def DR10 : X86Reg<"dr10", 10>;
+def DR11 : X86Reg<"dr11", 11>;
+def DR12 : X86Reg<"dr12", 12>;
+def DR13 : X86Reg<"dr13", 13>;
+def DR14 : X86Reg<"dr14", 14>;
+def DR15 : X86Reg<"dr15", 15>;
+
+// Control registers
+def CR0  : X86Reg<"cr0",   0>;
+def CR1  : X86Reg<"cr1",   1>;
+def CR2  : X86Reg<"cr2",   2>;
+def CR3  : X86Reg<"cr3",   3>;
+def CR4  : X86Reg<"cr4",   4>;
+def CR5  : X86Reg<"cr5",   5>;
+def CR6  : X86Reg<"cr6",   6>;
+def CR7  : X86Reg<"cr7",   7>;
+def CR8  : X86Reg<"cr8",   8>;
+def CR9  : X86Reg<"cr9",   9>;
+def CR10 : X86Reg<"cr10", 10>;
+def CR11 : X86Reg<"cr11", 11>;
+def CR12 : X86Reg<"cr12", 12>;
+def CR13 : X86Reg<"cr13", 13>;
+def CR14 : X86Reg<"cr14", 14>;
+def CR15 : X86Reg<"cr15", 15>;
+
+// Pseudo index registers
+def EIZ : X86Reg<"eiz", 4>;
+def RIZ : X86Reg<"riz", 4>;
+
+// Bound registers, used in MPX instructions
+def BND0 : X86Reg<"bnd0",   0>;
+def BND1 : X86Reg<"bnd1",   1>;
+def BND2 : X86Reg<"bnd2",   2>;
+def BND3 : X86Reg<"bnd3",   3>;
+
+// CET registers - Shadow Stack Pointer
+def SSP : X86Reg<"ssp", 0>;
+
+//===----------------------------------------------------------------------===//
+// Register Class Definitions... now that we have all of the pieces, define the
+// top-level register classes.  The order specified in the register list is
+// implicitly defined to be the register allocation order.
+//
+
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// R12, R13, R14, and R15 for X86-64) are callee-save registers.
+// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
+// R8B, ... R15B.
+// Allocate R12 and R13 last, as these require an extra byte when
+// encoded in x86_64 instructions.
+// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
+// 64-bit mode. The main complication is that they cannot be encoded in an
+// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
+// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
+// cannot be encoded.
+def GR8 : RegisterClass<"X86", [i8],  8,
+                        (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
+                             R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
+  let AltOrders = [(sub GR8, AH, BH, CH, DH)];
+  let AltOrderSelect = [{
+    return MF.getSubtarget<X86Subtarget>().is64Bit();
+  }];
+}
+
+let isAllocatable = 0 in
+def GRH8 : RegisterClass<"X86", [i8],  8,
+                         (add SIH, DIH, BPH, SPH, R8BH, R9BH, R10BH, R11BH,
+                              R12BH, R13BH, R14BH, R15BH)>;
+
+def GR16 : RegisterClass<"X86", [i16], 16,
+                         (add AX, CX, DX, SI, DI, BX, BP, SP,
+                              R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)>;
+
+let isAllocatable = 0 in
+def GRH16 : RegisterClass<"X86", [i16], 16,
+                          (add HAX, HCX, HDX, HSI, HDI, HBX, HBP, HSP, HIP,
+                               R8WH, R9WH, R10WH, R11WH, R12WH, R13WH, R14WH,
+                               R15WH)>;
+
+def GR32 : RegisterClass<"X86", [i32], 32,
+                         (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+                              R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>;
+
+// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
+// RIP isn't really a register and it can't be used anywhere except in an
+// address, but it doesn't cause trouble.
+// FIXME: it *does* cause trouble - CheckBaseRegAndIndexReg() has extra
+// tests because of the inclusion of RIP in this register class.
+def GR64 : RegisterClass<"X86", [i64], 64,
+                         (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+                              RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
+
+// Segment registers for use by MOV instructions (and others) that have a
+//   segment register as one operand.  Always contain a 16-bit segment
+//   descriptor.
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>;
+
+// Debug registers.
+def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 15)>;
+
+// Control registers.
+def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>;
+
+// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
+// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
+// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers
+// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
+// and GR64_ABCD are classes for registers that support 8-bit h-register
+// operations.
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>;
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
+def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>;
+def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>;
+def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>;
+def GR32_TC   : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
+def GR64_TC   : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
+                                                     R8, R9, R11, RIP)>;
+def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
+                                                      R8, R9, R10, R11, RIP)>;
+
+// GR8_NOREX - GR8 registers which do not require a REX prefix.
+def GR8_NOREX : RegisterClass<"X86", [i8], 8,
+                              (add AL, CL, DL, AH, CH, DH, BL, BH)> {
+  let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
+  let AltOrderSelect = [{
+    return MF.getSubtarget<X86Subtarget>().is64Bit();
+  }];
+}
+// GR16_NOREX - GR16 registers which do not require a REX prefix.
+def GR16_NOREX : RegisterClass<"X86", [i16], 16,
+                               (add AX, CX, DX, SI, DI, BX, BP, SP)>;
+// GR32_NOREX - GR32 registers which do not require a REX prefix.
+def GR32_NOREX : RegisterClass<"X86", [i32], 32,
+                               (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)>;
+// GR64_NOREX - GR64 registers which do not require a REX prefix.
+def GR64_NOREX : RegisterClass<"X86", [i64], 64,
+                            (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>;
+
+// GR32_NOSP - GR32 registers except ESP.
+def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>;
+
+// GR64_NOSP - GR64 registers except RSP (and RIP).
+def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)>;
+
+// GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
+// ESP.
+def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
+                                    (and GR32_NOREX, GR32_NOSP)>;
+
+// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
+def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
+                                    (and GR64_NOREX, GR64_NOSP)>;
+
+// Register classes used for ABIs that use 32-bit address accesses,
+// while using the whole x84_64 ISA.
+
+// In such cases, it is fine to use RIP as we are sure the 32 high
+// bits are not set. We do not need variants for NOSP as RIP is not
+// allowed there.
+// RIP is not spilled anywhere for now, so stick to 32-bit alignment
+// to save on memory space.
+// FIXME: We could allow all 64bit registers, but we would need
+// something to check that the 32 high bits are not set,
+// which we do not have right now.
+def LOW32_ADDR_ACCESS : RegisterClass<"X86", [i32], 32, (add GR32, RIP)>;
+
+// When RBP is used as a base pointer in a 32-bit addresses environment,
+// this is also safe to use the full register to access addresses.
+// Since RBP will never be spilled, stick to a 32 alignment to save
+// on memory consumption.
+def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32,
+                                          (add LOW32_ADDR_ACCESS, RBP)>;
+
+// A class to support the 'A' assembler constraint: [ER]AX then [ER]DX.
+def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
+def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>;
+
+// Scalar SSE2 floating point registers.
+def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
+
+def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
+
+
+// FIXME: This sets up the floating point register files as though they are f64
+// values, though they really are f80 values.  This will cause us to spill
+// values as 64-bit quantities instead of 80-bit quantities, which is much much
+// faster on common hardware.  In reality, this should be controlled by a
+// command line option or something.
+
+def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
+def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
+def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
+
+// Floating point stack registers (these are not allocatable by the
+// register allocator - the floating point stackifier is responsible
+// for transforming FPn allocations to STn registers)
+def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
+  let isAllocatable = 0;
+}
+
+// Generic vector registers: VR64 and VR128.
+// Ensure that float types are declared first - only float is legal on SSE1.
+def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
+def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
+                          128, (add FR32)>;
+def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+                          256, (sequence "YMM%u", 0, 15)>;
+
+// Special classes that help the assembly parser choose some alternate
+// instructions to favor 2-byte VEX encodings.
+def VR128L : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
+                           128, (sequence "XMM%u", 0, 7)>;
+def VR128H : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
+                           128, (sequence "XMM%u", 8, 15)>;
+def VR256L : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+                           256, (sequence "YMM%u", 0, 7)>;
+def VR256H : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+                           256, (sequence "YMM%u", 8, 15)>;
+
+// Status flags registers.
+def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+  let isAllocatable = 0;
+}
+def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+  let isAllocatable = 0;
+}
+def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+  let isAllocatable = 0;
+}
+
+// AVX-512 vector/mask registers.
+def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
+                          512, (sequence "ZMM%u", 0, 31)>;
+
+// Scalar AVX-512 floating point registers.
+def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
+
+def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
+
+// Extended VR128 and VR256 for AVX-512 instructions
+def VR128X : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
+                           128, (add FR32X)>;
+def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+                           256, (sequence "YMM%u", 0, 31)>;
+
+// Mask registers
+def VK1     : RegisterClass<"X86", [v1i1],  16,  (sequence "K%u", 0, 7)> {let Size = 16;}
+def VK2     : RegisterClass<"X86", [v2i1],  16,  (add VK1)> {let Size = 16;}
+def VK4     : RegisterClass<"X86", [v4i1],  16,  (add VK2)> {let Size = 16;}
+def VK8     : RegisterClass<"X86", [v8i1],  16,  (add VK4)> {let Size = 16;}
+def VK16    : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
+def VK32    : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;}
+def VK64    : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;}
+
+def VK1WM   : RegisterClass<"X86", [v1i1],  16,  (sub VK1, K0)> {let Size = 16;}
+def VK2WM   : RegisterClass<"X86", [v2i1],  16,  (sub VK2, K0)> {let Size = 16;}
+def VK4WM   : RegisterClass<"X86", [v4i1],  16,  (sub VK4, K0)> {let Size = 16;}
+def VK8WM   : RegisterClass<"X86", [v8i1],  16,  (sub VK8, K0)> {let Size = 16;}
+def VK16WM  : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>   {let Size = 16;}
+def VK32WM  : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
+def VK64WM  : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
+
+// Bound registers
+def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedBroadwell.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedBroadwell.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedHaswell.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedHaswell.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedPredicates.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedPredicates.td
@@ -0,0 +1,49 @@
+//===-- X86SchedPredicates.td - X86 Scheduling Predicates --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are common to
+// all X86 subtargets.
+//
+//===----------------------------------------------------------------------===//
+
+// A predicate used to identify dependency-breaking instructions that clear the
+// content of the destination register. Note that this predicate only checks if
+// input registers are the same. This predicate doesn't make any assumptions on
+// the expected instruction opcodes, because different processors may implement
+// different zero-idioms.
+def ZeroIdiomPredicate : CheckSameRegOperand<1, 2>;
+
+// A predicate used to check if an instruction is a LEA, and if it uses all
+// three source operands: base, index, and offset.
+def IsThreeOperandsLEAPredicate: CheckAll<[
+  CheckOpcode<[LEA32r, LEA64r, LEA64_32r, LEA16r]>,
+
+  // isRegOperand(Base)
+  CheckIsRegOperand<1>,
+  CheckNot<CheckInvalidRegOperand<1>>,
+
+  // isRegOperand(Index)
+  CheckIsRegOperand<3>,
+  CheckNot<CheckInvalidRegOperand<3>>,
+
+  // hasLEAOffset(Offset)
+  CheckAny<[
+    CheckAll<[
+      CheckIsImmOperand<4>,
+      CheckNot<CheckZeroOperand<4>>
+    ]>,
+    CheckNonPortable<"MI.getOperand(4).isGlobal()">
+  ]>
+]>;
+
+// This predicate evaluates to true only if the input machine instruction is a
+// 3-operands LEA.  Tablegen automatically generates a new method for it in
+// X86GenInstrInfo.
+def IsThreeOperandsLEAFn :
+    TIIPredicate<"X86", "isThreeOperandsLEA", IsThreeOperandsLEAPredicate>;
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedSandyBridge.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedSandyBridge.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedSkylakeClient.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedSkylakeClient.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedSkylakeServer.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86SchedSkylakeServer.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86Schedule.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86Schedule.td
@@ -0,0 +1,661 @@
+//===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// InstrSchedModel annotations for out-of-order CPUs.
+
+// Instructions with folded loads need to read the memory operand immediately,
+// but other register operands don't have to be read until the load is ready.
+// These operands are marked with ReadAfterLd.
+def ReadAfterLd : SchedRead;
+
+// Instructions with both a load and a store folded are modeled as a folded
+// load + WriteRMW.
+def WriteRMW : SchedWrite;
+
+// Helper to set SchedWrite ExePorts/Latency/ResourceCycles/NumMicroOps.
+multiclass X86WriteRes<SchedWrite SchedRW,
+                       list<ProcResourceKind> ExePorts,
+                       int Lat, list<int> Res, int UOps> {
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ResourceCycles = Res;
+    let NumMicroOps = UOps;
+  }
+}
+
+// Most instructions can fold loads, so almost every SchedWrite comes in two
+// variants: With and without a folded load.
+// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
+// with a folded load.
+class X86FoldableSchedWrite : SchedWrite {
+  // The SchedWrite to use when a load is folded into the instruction.
+  SchedWrite Folded;
+}
+
+// Multiclass that produces a linked pair of SchedWrites.
+multiclass X86SchedWritePair {
+  // Register-Memory operation.
+  def Ld : SchedWrite;
+  // Register-Register operation.
+  def NAME : X86FoldableSchedWrite {
+    let Folded = !cast<SchedWrite>(NAME#"Ld");
+  }
+}
+
+// Helpers to mark SchedWrites as unsupported.
+multiclass X86WriteResUnsupported<SchedWrite SchedRW> {
+  let Unsupported = 1 in {
+    def : WriteRes<SchedRW, []>;
+  }
+}
+multiclass X86WriteResPairUnsupported<X86FoldableSchedWrite SchedRW> {
+  let Unsupported = 1 in {
+    def : WriteRes<SchedRW, []>;
+    def : WriteRes<SchedRW.Folded, []>;
+  }
+}
+
+// Multiclass that wraps X86FoldableSchedWrite for each vector width.
+class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
+                          X86FoldableSchedWrite s128,
+                          X86FoldableSchedWrite s256,
+                          X86FoldableSchedWrite s512> {
+  X86FoldableSchedWrite Scl = sScl; // Scalar float/double operations.
+  X86FoldableSchedWrite MMX = sScl; // MMX operations.
+  X86FoldableSchedWrite XMM = s128; // XMM operations.
+  X86FoldableSchedWrite YMM = s256; // YMM operations.
+  X86FoldableSchedWrite ZMM = s512; // ZMM operations.
+}
+
+// Multiclass that wraps X86SchedWriteWidths for each fp vector type.
+class X86SchedWriteSizes<X86SchedWriteWidths sPS,
+                         X86SchedWriteWidths sPD> {
+  X86SchedWriteWidths PS = sPS;
+  X86SchedWriteWidths PD = sPD;
+}
+
+// Multiclass that wraps move/load/store triple for a vector width.
+class X86SchedWriteMoveLS<SchedWrite MoveRR,
+                          SchedWrite LoadRM,
+                          SchedWrite StoreMR> {
+  SchedWrite RR = MoveRR;
+  SchedWrite RM = LoadRM;
+  SchedWrite MR = StoreMR;
+}
+
+// Multiclass that wraps X86SchedWriteMoveLS for each vector width.
+class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
+                                X86SchedWriteMoveLS s128,
+                                X86SchedWriteMoveLS s256,
+                                X86SchedWriteMoveLS s512> {
+  X86SchedWriteMoveLS Scl = sScl; // Scalar float/double operations.
+  X86SchedWriteMoveLS MMX = sScl; // MMX operations.
+  X86SchedWriteMoveLS XMM = s128; // XMM operations.
+  X86SchedWriteMoveLS YMM = s256; // YMM operations.
+  X86SchedWriteMoveLS ZMM = s512; // ZMM operations.
+}
+
+// Loads, stores, and moves, not folded with other operations.
+def WriteLoad    : SchedWrite;
+def WriteStore   : SchedWrite;
+def WriteStoreNT : SchedWrite;
+def WriteMove    : SchedWrite;
+
+// Arithmetic.
+defm WriteALU    : X86SchedWritePair; // Simple integer ALU op.
+defm WriteADC    : X86SchedWritePair; // Integer ALU + flags op.
+def  WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>;
+def  WriteADCRMW : WriteSequence<[WriteADCLd, WriteStore]>;
+defm WriteIMul   : X86SchedWritePair; // Integer multiplication.
+defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
+def  WriteIMulH  : SchedWrite;        // Integer multiplication, high part.
+def  WriteLEA    : SchedWrite;        // LEA instructions can't fold loads.
+
+def  WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
+def  WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
+
+// Integer division.
+defm WriteDiv8   : X86SchedWritePair;
+defm WriteDiv16  : X86SchedWritePair;
+defm WriteDiv32  : X86SchedWritePair;
+defm WriteDiv64  : X86SchedWritePair;
+defm WriteIDiv8  : X86SchedWritePair;
+defm WriteIDiv16 : X86SchedWritePair;
+defm WriteIDiv32 : X86SchedWritePair;
+defm WriteIDiv64 : X86SchedWritePair;
+
+defm WriteBSF : X86SchedWritePair; // Bit scan forward.
+defm WriteBSR : X86SchedWritePair; // Bit scan reverse.
+defm WritePOPCNT : X86SchedWritePair; // Bit population count.
+defm WriteLZCNT : X86SchedWritePair; // Leading zero count.
+defm WriteTZCNT : X86SchedWritePair; // Trailing zero count.
+defm WriteCMOV  : X86SchedWritePair; // Conditional move.
+defm WriteCMOV2 : X86SchedWritePair; // Conditional (CF + ZF flag) move.
+def  WriteFCMOV : SchedWrite; // X87 conditional move.
+def  WriteSETCC : SchedWrite; // Set register based on condition code.
+def  WriteSETCCStore : SchedWrite;
+def  WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
+def  WriteBitTest  : SchedWrite; // Bit Test - TODO add memory folding support
+
+// Integer shifts and rotates.
+defm WriteShift : X86SchedWritePair;
+// Double shift instructions.
+def  WriteSHDrri  : SchedWrite;
+def  WriteSHDrrcl : SchedWrite;
+def  WriteSHDmri  : SchedWrite;
+def  WriteSHDmrcl : SchedWrite;
+
+// BMI1 BEXTR, BMI2 BZHI
+defm WriteBEXTR : X86SchedWritePair;
+defm WriteBZHI  : X86SchedWritePair;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+def WriteZero : SchedWrite;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm WriteJump : X86SchedWritePair;
+
+// Floating point. This covers both scalar and vector operations.
+def  WriteFLD0          : SchedWrite;
+def  WriteFLD1          : SchedWrite;
+def  WriteFLDC          : SchedWrite;
+def  WriteFLoad         : SchedWrite;
+def  WriteFLoadX        : SchedWrite;
+def  WriteFLoadY        : SchedWrite;
+def  WriteFMaskedLoad   : SchedWrite;
+def  WriteFMaskedLoadY  : SchedWrite;
+def  WriteFStore        : SchedWrite;
+def  WriteFStoreX       : SchedWrite;
+def  WriteFStoreY       : SchedWrite;
+def  WriteFStoreNT      : SchedWrite;
+def  WriteFStoreNTX     : SchedWrite;
+def  WriteFStoreNTY     : SchedWrite;
+def  WriteFMaskedStore  : SchedWrite;
+def  WriteFMaskedStoreY : SchedWrite;
+def  WriteFMove         : SchedWrite;
+def  WriteFMoveX        : SchedWrite;
+def  WriteFMoveY        : SchedWrite;
+
+defm WriteFAdd    : X86SchedWritePair; // Floating point add/sub.
+defm WriteFAddX   : X86SchedWritePair; // Floating point add/sub (XMM).
+defm WriteFAddY   : X86SchedWritePair; // Floating point add/sub (YMM).
+defm WriteFAddZ   : X86SchedWritePair; // Floating point add/sub (ZMM).
+defm WriteFAdd64  : X86SchedWritePair; // Floating point double add/sub.
+defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM).
+defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM).
+defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM).
+defm WriteFCmp    : X86SchedWritePair; // Floating point compare.
+defm WriteFCmpX   : X86SchedWritePair; // Floating point compare (XMM).
+defm WriteFCmpY   : X86SchedWritePair; // Floating point compare (YMM).
+defm WriteFCmpZ   : X86SchedWritePair; // Floating point compare (ZMM).
+defm WriteFCmp64  : X86SchedWritePair; // Floating point double compare.
+defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM).
+defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM).
+defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM).
+defm WriteFCom    : X86SchedWritePair; // Floating point compare to flags.
+defm WriteFMul    : X86SchedWritePair; // Floating point multiplication.
+defm WriteFMulX   : X86SchedWritePair; // Floating point multiplication (XMM).
+defm WriteFMulY   : X86SchedWritePair; // Floating point multiplication (YMM).
+defm WriteFMulZ   : X86SchedWritePair; // Floating point multiplication (YMM).
+defm WriteFMul64  : X86SchedWritePair; // Floating point double multiplication.
+defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM).
+defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM).
+defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM).
+defm WriteFDiv    : X86SchedWritePair; // Floating point division.
+defm WriteFDivX   : X86SchedWritePair; // Floating point division (XMM).
+defm WriteFDivY   : X86SchedWritePair; // Floating point division (YMM).
+defm WriteFDivZ   : X86SchedWritePair; // Floating point division (ZMM).
+defm WriteFDiv64  : X86SchedWritePair; // Floating point double division.
+defm WriteFDiv64X : X86SchedWritePair; // Floating point double division (XMM).
+defm WriteFDiv64Y : X86SchedWritePair; // Floating point double division (YMM).
+defm WriteFDiv64Z : X86SchedWritePair; // Floating point double division (ZMM).
+defm WriteFSqrt  : X86SchedWritePair; // Floating point square root.
+defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM).
+defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM).
+defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM).
+defm WriteFSqrt64  : X86SchedWritePair; // Floating point double square root.
+defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM).
+defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM).
+defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM).
+defm WriteFSqrt80  : X86SchedWritePair; // Floating point long double square root.
+defm WriteFRcp   : X86SchedWritePair; // Floating point reciprocal estimate.
+defm WriteFRcpX  : X86SchedWritePair; // Floating point reciprocal estimate (XMM).
+defm WriteFRcpY  : X86SchedWritePair; // Floating point reciprocal estimate (YMM).
+defm WriteFRcpZ  : X86SchedWritePair; // Floating point reciprocal estimate (ZMM).
+defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
+defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM).
+defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM).
+defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM).
+defm WriteFMA    : X86SchedWritePair; // Fused Multiply Add.
+defm WriteFMAX   : X86SchedWritePair; // Fused Multiply Add (XMM).
+defm WriteFMAY   : X86SchedWritePair; // Fused Multiply Add (YMM).
+defm WriteFMAZ   : X86SchedWritePair; // Fused Multiply Add (ZMM).
+defm WriteDPPD   : X86SchedWritePair; // Floating point double dot product.
+defm WriteDPPS   : X86SchedWritePair; // Floating point single dot product.
+defm WriteDPPSY  : X86SchedWritePair; // Floating point single dot product (YMM).
+defm WriteDPPSZ  : X86SchedWritePair; // Floating point single dot product (ZMM).
+defm WriteFSign  : X86SchedWritePair; // Floating point fabs/fchs.
+defm WriteFRnd   : X86SchedWritePair; // Floating point rounding.
+defm WriteFRndY  : X86SchedWritePair; // Floating point rounding (YMM).
+defm WriteFRndZ  : X86SchedWritePair; // Floating point rounding (ZMM).
+defm WriteFLogic  : X86SchedWritePair; // Floating point and/or/xor logicals.
+defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM).
+defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM).
+defm WriteFTest   : X86SchedWritePair; // Floating point TEST instructions.
+defm WriteFTestY  : X86SchedWritePair; // Floating point TEST instructions (YMM).
+defm WriteFTestZ  : X86SchedWritePair; // Floating point TEST instructions (ZMM).
+defm WriteFShuffle  : X86SchedWritePair; // Floating point vector shuffles.
+defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM).
+defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM).
+defm WriteFVarShuffle  : X86SchedWritePair; // Floating point vector variable shuffles.
+defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM).
+defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM).
+defm WriteFBlend  : X86SchedWritePair; // Floating point vector blends.
+defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM).
+defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM).
+defm WriteFVarBlend  : X86SchedWritePair; // Fp vector variable blends.
+defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM).
+defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM).
+
+// FMA Scheduling helper class.
+class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
+
+// Horizontal Add/Sub (float and integer)
+defm WriteFHAdd  : X86SchedWritePair;
+defm WriteFHAddY : X86SchedWritePair;
+defm WriteFHAddZ : X86SchedWritePair;
+defm WritePHAdd  : X86SchedWritePair;
+defm WritePHAddX : X86SchedWritePair;
+defm WritePHAddY : X86SchedWritePair;
+defm WritePHAddZ : X86SchedWritePair;
+
+// Vector integer operations.
+def  WriteVecLoad         : SchedWrite;
+def  WriteVecLoadX        : SchedWrite;
+def  WriteVecLoadY        : SchedWrite;
+def  WriteVecLoadNT       : SchedWrite;
+def  WriteVecLoadNTY      : SchedWrite;
+def  WriteVecMaskedLoad   : SchedWrite;
+def  WriteVecMaskedLoadY  : SchedWrite;
+def  WriteVecStore        : SchedWrite;
+def  WriteVecStoreX       : SchedWrite;
+def  WriteVecStoreY       : SchedWrite;
+def  WriteVecStoreNT      : SchedWrite;
+def  WriteVecStoreNTY     : SchedWrite;
+def  WriteVecMaskedStore  : SchedWrite;
+def  WriteVecMaskedStoreY : SchedWrite;
+def  WriteVecMove         : SchedWrite;
+def  WriteVecMoveX        : SchedWrite;
+def  WriteVecMoveY        : SchedWrite;
+def  WriteVecMoveToGpr    : SchedWrite;
+def  WriteVecMoveFromGpr  : SchedWrite;
+
+defm WriteVecALU    : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecALUX   : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM).
+defm WriteVecALUY   : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM).
+defm WriteVecALUZ   : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM).
+defm WriteVecLogic  : X86SchedWritePair; // Vector integer and/or/xor logicals.
+defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM).
+defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM).
+defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM).
+defm WriteVecTest  : X86SchedWritePair; // Vector integer TEST instructions.
+defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM).
+defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM).
+defm WriteVecShift  : X86SchedWritePair; // Vector integer shifts (default).
+defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM).
+defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM).
+defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM).
+defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default).
+defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM).
+defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM).
+defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM).
+defm WriteVecIMul  : X86SchedWritePair; // Vector integer multiply (default).
+defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM).
+defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM).
+defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM).
+defm WritePMULLD   : X86SchedWritePair; // Vector PMULLD.
+defm WritePMULLDY   : X86SchedWritePair; // Vector PMULLD (YMM).
+defm WritePMULLDZ   : X86SchedWritePair; // Vector PMULLD (ZMM).
+defm WriteShuffle  : X86SchedWritePair; // Vector shuffles.
+defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM).
+defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM).
+defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM).
+defm WriteVarShuffle  : X86SchedWritePair; // Vector variable shuffles.
+defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM).
+defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM).
+defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM).
+defm WriteBlend  : X86SchedWritePair; // Vector blends.
+defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM).
+defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM).
+defm WriteVarBlend  : X86SchedWritePair; // Vector variable blends.
+defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM).
+defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM).
+defm WritePSADBW  : X86SchedWritePair; // Vector PSADBW.
+defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM).
+defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM).
+defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM).
+defm WriteMPSAD  : X86SchedWritePair; // Vector MPSAD.
+defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM).
+defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM).
+defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS.
+
+// Vector insert/extract operations.
+defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
+def  WriteVecExtract : SchedWrite; // Extract vector element to gpr.
+def  WriteVecExtractSt : SchedWrite; // Extract vector element and store.
+
+// MOVMSK operations.
+def WriteFMOVMSK    : SchedWrite;
+def WriteVecMOVMSK  : SchedWrite;
+def WriteVecMOVMSKY : SchedWrite;
+def WriteMMXMOVMSK  : SchedWrite;
+
+// Conversion between integer and float.
+defm WriteCvtSD2I  : X86SchedWritePair; // Double -> Integer.
+defm WriteCvtPD2I  : X86SchedWritePair; // Double -> Integer (XMM).
+defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM).
+defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM).
+
+defm WriteCvtSS2I  : X86SchedWritePair; // Float -> Integer.
+defm WriteCvtPS2I  : X86SchedWritePair; // Float -> Integer (XMM).
+defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM).
+defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM).
+
+defm WriteCvtI2SD  : X86SchedWritePair; // Integer -> Double.
+defm WriteCvtI2PD  : X86SchedWritePair; // Integer -> Double (XMM).
+defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM).
+defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM).
+
+defm WriteCvtI2SS  : X86SchedWritePair; // Integer -> Float.
+defm WriteCvtI2PS  : X86SchedWritePair; // Integer -> Float (XMM).
+defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM).
+defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM).
+
+defm WriteCvtSS2SD  : X86SchedWritePair; // Float -> Double size conversion.
+defm WriteCvtPS2PD  : X86SchedWritePair; // Float -> Double size conversion (XMM).
+defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM).
+defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM).
+
+defm WriteCvtSD2SS  : X86SchedWritePair; // Double -> Float size conversion.
+defm WriteCvtPD2PS  : X86SchedWritePair; // Double -> Float size conversion (XMM).
+defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM).
+defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM).
+
+defm WriteCvtPH2PS    : X86SchedWritePair; // Half -> Float size conversion.
+defm WriteCvtPH2PSY   : X86SchedWritePair; // Half -> Float size conversion (YMM).
+defm WriteCvtPH2PSZ   : X86SchedWritePair; // Half -> Float size conversion (ZMM).
+
+def  WriteCvtPS2PH    : SchedWrite; // // Float -> Half size conversion.
+def  WriteCvtPS2PHY   : SchedWrite; // // Float -> Half size conversion (YMM).
+def  WriteCvtPS2PHZ   : SchedWrite; // // Float -> Half size conversion (ZMM).
+def  WriteCvtPS2PHSt  : SchedWrite; // // Float -> Half + store size conversion.
+def  WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM).
+def  WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
+
+// CRC32 instruction.
+defm WriteCRC32 : X86SchedWritePair;
+
+// Strings instructions.
+// Packed Compare Implicit Length Strings, Return Mask
+defm WritePCmpIStrM : X86SchedWritePair;
+// Packed Compare Explicit Length Strings, Return Mask
+defm WritePCmpEStrM : X86SchedWritePair;
+// Packed Compare Implicit Length Strings, Return Index
+defm WritePCmpIStrI : X86SchedWritePair;
+// Packed Compare Explicit Length Strings, Return Index
+defm WritePCmpEStrI : X86SchedWritePair;
+
+// AES instructions.
+defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption.
+defm WriteAESIMC : X86SchedWritePair; // InvMixColumn.
+defm WriteAESKeyGen : X86SchedWritePair; // Key Generation.
+
+// Carry-less multiplication instructions.
+defm WriteCLMul : X86SchedWritePair;
+
+// EMMS/FEMMS
+def WriteEMMS : SchedWrite;
+
+// Load/store MXCSR
+def WriteLDMXCSR : SchedWrite;
+def WriteSTMXCSR : SchedWrite;
+
+// Catch-all for expensive system instructions.
+def WriteSystem : SchedWrite;
+
+// AVX2.
+defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles.
+defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles.
+defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles.
+defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles.
+defm WriteVarVecShift  : X86SchedWritePair; // Variable vector shifts.
+defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM).
+defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM).
+
+// Old microcoded instructions that nobody use.
+def WriteMicrocoded : SchedWrite;
+
+// Fence instructions.
+def WriteFence : SchedWrite;
+
+// Nop, not very useful expect it provides a model for nops!
+def WriteNop : SchedWrite;
+
+// Move/Load/Store wrappers.
+def WriteFMoveLS
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStore>;
+def WriteFMoveLSX
+ : X86SchedWriteMoveLS<WriteFMoveX, WriteFLoadX, WriteFStoreX>;
+def WriteFMoveLSY
+ : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreY>;
+def SchedWriteFMoveLS
+  : X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
+                              WriteFMoveLSY, WriteFMoveLSY>;
+
+def WriteFMoveLSNT
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
+def WriteFMoveLSNTX
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>;
+def WriteFMoveLSNTY
+ : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>;
+def SchedWriteFMoveLSNT
+  : X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX,
+                              WriteFMoveLSNTY, WriteFMoveLSNTY>;
+
+def WriteVecMoveLS
+ : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
+def WriteVecMoveLSX
+ : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadX, WriteVecStoreX>;
+def WriteVecMoveLSY
+ : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadY, WriteVecStoreY>;
+def SchedWriteVecMoveLS
+  : X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
+                              WriteVecMoveLSY, WriteVecMoveLSY>;
+
+def WriteVecMoveLSNT
+ : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
+def WriteVecMoveLSNTX
+ : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>;
+def WriteVecMoveLSNTY
+ : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>;
+def SchedWriteVecMoveLSNT
+  : X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
+                              WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
+
+// Vector width wrappers.
+def SchedWriteFAdd
+ : X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>;
+def SchedWriteFAdd64
+ : X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Z>;
+def SchedWriteFHAdd
+ : X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddZ>;
+def SchedWriteFCmp
+ : X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpZ>;
+def SchedWriteFCmp64
+ : X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Z>;
+def SchedWriteFMul
+ : X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulZ>;
+def SchedWriteFMul64
+ : X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Z>;
+def SchedWriteFMA
+ : X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAZ>;
+def SchedWriteDPPD
+ : X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>;
+def SchedWriteDPPS
+ : X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSZ>;
+def SchedWriteFDiv
+ : X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>;
+def SchedWriteFDiv64
+ : X86SchedWriteWidths<WriteFDiv64, WriteFDiv64X, WriteFDiv64Y, WriteFDiv64Z>;
+def SchedWriteFSqrt
+ : X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX,
+                       WriteFSqrtY, WriteFSqrtZ>;
+def SchedWriteFSqrt64
+ : X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X,
+                       WriteFSqrt64Y, WriteFSqrt64Z>;
+def SchedWriteFRcp
+ : X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpZ>;
+def SchedWriteFRsqrt
+ : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtZ>;
+def SchedWriteFRnd
+ : X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndZ>;
+def SchedWriteFLogic
+ : X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicZ>;
+def SchedWriteFTest
+ : X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestZ>;
+
+def SchedWriteFShuffle
+ : X86SchedWriteWidths<WriteFShuffle, WriteFShuffle,
+                       WriteFShuffleY, WriteFShuffleZ>;
+def SchedWriteFVarShuffle
+ : X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle,
+                       WriteFVarShuffleY, WriteFVarShuffleZ>;
+def SchedWriteFBlend
+ : X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendZ>;
+def SchedWriteFVarBlend
+ : X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
+                       WriteFVarBlendY, WriteFVarBlendZ>;
+
+def SchedWriteCvtDQ2PD
+ : X86SchedWriteWidths<WriteCvtI2SD, WriteCvtI2PD,
+                       WriteCvtI2PDY, WriteCvtI2PDZ>;
+def SchedWriteCvtDQ2PS
+ : X86SchedWriteWidths<WriteCvtI2SS, WriteCvtI2PS,
+                       WriteCvtI2PSY, WriteCvtI2PSZ>;
+def SchedWriteCvtPD2DQ
+ : X86SchedWriteWidths<WriteCvtSD2I, WriteCvtPD2I,
+                       WriteCvtPD2IY, WriteCvtPD2IZ>;
+def SchedWriteCvtPS2DQ
+ : X86SchedWriteWidths<WriteCvtSS2I, WriteCvtPS2I,
+                       WriteCvtPS2IY, WriteCvtPS2IZ>;
+def SchedWriteCvtPS2PD
+ : X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD,
+                       WriteCvtPS2PDY, WriteCvtPS2PDZ>;
+def SchedWriteCvtPD2PS
+ : X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS,
+                       WriteCvtPD2PSY, WriteCvtPD2PSZ>;
+
+def SchedWriteVecALU
+ : X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUZ>;
+def SchedWritePHAdd
+ : X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddZ>;
+def SchedWriteVecLogic
+ : X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX,
+                       WriteVecLogicY, WriteVecLogicZ>;
+def SchedWriteVecTest
+ : X86SchedWriteWidths<WriteVecTest, WriteVecTest,
+                       WriteVecTestY, WriteVecTestZ>;
+def SchedWriteVecShift
+ : X86SchedWriteWidths<WriteVecShift, WriteVecShiftX,
+                       WriteVecShiftY, WriteVecShiftZ>;
+def SchedWriteVecShiftImm
+ : X86SchedWriteWidths<WriteVecShiftImm, WriteVecShiftImmX,
+                       WriteVecShiftImmY, WriteVecShiftImmZ>;
+def SchedWriteVarVecShift
+ : X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift,
+                       WriteVarVecShiftY, WriteVarVecShiftZ>;
+def SchedWriteVecIMul
+ : X86SchedWriteWidths<WriteVecIMul, WriteVecIMulX,
+                       WriteVecIMulY, WriteVecIMulZ>;
+def SchedWritePMULLD
+ : X86SchedWriteWidths<WritePMULLD, WritePMULLD,
+                       WritePMULLDY, WritePMULLDZ>;
+def SchedWriteMPSAD
+ : X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
+                       WriteMPSADY, WriteMPSADZ>;
+def SchedWritePSADBW
+ : X86SchedWriteWidths<WritePSADBW, WritePSADBWX,
+                       WritePSADBWY, WritePSADBWZ>;
+
+def SchedWriteShuffle
+ : X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
+                       WriteShuffleY, WriteShuffleZ>;
+def SchedWriteVarShuffle
+ : X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX,
+                       WriteVarShuffleY, WriteVarShuffleZ>;
+def SchedWriteBlend
+ : X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendZ>;
+def SchedWriteVarBlend
+ : X86SchedWriteWidths<WriteVarBlend, WriteVarBlend,
+                       WriteVarBlendY, WriteVarBlendZ>;
+
+// Vector size wrappers.
+def SchedWriteFAddSizes
+ : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd64>;
+def SchedWriteFCmpSizes
+ : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp64>;
+def SchedWriteFMulSizes
+ : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>;
+def SchedWriteFDivSizes
+ : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv64>;
+def SchedWriteFSqrtSizes
+ : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>;
+def SchedWriteFLogicSizes
+ : X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic>;
+def SchedWriteFShuffleSizes
+ : X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle>;
+
+//===----------------------------------------------------------------------===//
+// Generic Processor Scheduler Models.
+
+// IssueWidth is analogous to the number of decode units. Core and its
+// descendants, including Nehalem and SandyBridge have 4 decoders.
+// Resources beyond the decoder operate on micro-ops and are buffered
+// so adjacent micro-ops don't directly compete.
+//
+// MicroOpBufferSize > 1 indicates that RAW dependencies can be
+// decoded in the same cycle. The value 32 is a reasonably arbitrary
+// number of in-flight instructions.
+//
+// HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef
+// indicates high latency opcodes. Alternatively, InstrItinData
+// entries may be included here to define specific operand
+// latencies. Since these latencies are not used for pipeline hazards,
+// they do not need to be exact.
+//
+// The GenericX86Model contains no instruction schedules
+// and disables PostRAScheduler.
+class GenericX86Model : SchedMachineModel {
+  let IssueWidth = 4;
+  let MicroOpBufferSize = 32;
+  let LoadLatency = 4;
+  let HighLatency = 10;
+  let PostRAScheduler = 0;
+  let CompleteModel = 0;
+}
+
+def GenericModel : GenericX86Model;
+
+// Define a model with the PostRAScheduler enabled.
+def GenericPostRAModel : GenericX86Model {
+  let PostRAScheduler = 1;
+}
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleAtom.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleAtom.td
@@ -0,0 +1,917 @@
+//===- X86ScheduleAtom.td - X86 Atom Scheduling Definitions -*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the schedule class data for the Intel Atom
+// in order (Saltwell-32nm/Bonnell-45nm) processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from the "Intel 64 and IA32 Architectures
+// Optimization Reference Manual", Chapter 13, Section 4.
+
+// Atom machine model.
+def AtomModel : SchedMachineModel {
+  let IssueWidth = 2;  // Allows 2 instructions per scheduling group.
+  let MicroOpBufferSize = 0; // In-order execution, always hide latency.
+  let LoadLatency = 3; // Expected cycles, may be overridden.
+  let HighLatency = 30;// Expected, may be overridden.
+
+  // On the Atom, the throughput for taken branches is 2 cycles. For small
+  // simple loops, expand by a small factor to hide the backedge cost.
+  let LoopMicroOpBufferSize = 10;
+  let PostRAScheduler = 1;
+  let CompleteModel = 0;
+}
+
+let SchedModel = AtomModel in {
+
+// Functional Units
+def AtomPort0 : ProcResource<1>; // ALU: ALU0, shift/rotate, load/store
+                                 // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
+def AtomPort1 : ProcResource<1>; // ALU: ALU1, bit processing, jump, and LEA
+                                 // SIMD/FP: SIMD ALU, FP Adder
+
+def AtomPort01 : ProcResGroup<[AtomPort0, AtomPort1]>;
+
+// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 3>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass AtomWriteResPair<X86FoldableSchedWrite SchedRW,
+                            list<ProcResourceKind> RRPorts,
+                            list<ProcResourceKind> RMPorts,
+                            int RRLat = 1, int RMLat = 1,
+                            list<int> RRRes = [1],
+                            list<int> RMRes = [1]> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, RRPorts> {
+    let Latency = RRLat;
+    let ResourceCycles = RRRes;
+  }
+
+  // Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, RMPorts> {
+    let Latency = RMLat;
+    let ResourceCycles = RMRes;
+  }
+}
+
+// A folded store needs a cycle on Port0 for the store data.
+def : WriteRes<WriteRMW, [AtomPort0]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Arithmetic.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteALU,    [AtomPort01], [AtomPort0]>;
+defm : AtomWriteResPair<WriteADC,    [AtomPort01], [AtomPort0]>;
+defm : AtomWriteResPair<WriteIMul,   [AtomPort01], [AtomPort01],  7,  7,  [7],  [7]>;
+defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+
+defm : X86WriteRes<WriteBSWAP32,     [AtomPort0], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64,     [AtomPort0], 1, [1], 1>;
+
+defm : AtomWriteResPair<WriteDiv8,   [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
+defm : AtomWriteResPair<WriteDiv16,  [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
+defm : AtomWriteResPair<WriteDiv32,  [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
+defm : AtomWriteResPair<WriteDiv64,  [AtomPort01], [AtomPort01],130,130,[130],[130]>;
+defm : AtomWriteResPair<WriteIDiv8,  [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteIDiv16, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteIDiv32, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteIDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
+
+defm : X86WriteResPairUnsupported<WriteCRC32>;
+
+defm : AtomWriteResPair<WriteCMOV,  [AtomPort01], [AtomPort0]>;
+defm : AtomWriteResPair<WriteCMOV2, [AtomPort01], [AtomPort0]>;
+defm : X86WriteRes<WriteFCMOV, [AtomPort01], 9, [9], 1>; // x87 conditional move.
+
+def  : WriteRes<WriteSETCC, [AtomPort01]>;
+def  : WriteRes<WriteSETCCStore, [AtomPort01]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+}
+def  : WriteRes<WriteLAHFSAHF, [AtomPort01]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+}
+def : WriteRes<WriteBitTest,[AtomPort01]>;
+
+defm : X86WriteResUnsupported<WriteIMulH>;
+
+// This is for simple LEAs with one or two input operands.
+def : WriteRes<WriteLEA, [AtomPort1]>;
+
+def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> {
+  let Latency = 8;
+  let ResourceCycles = [8];
+}
+def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>;
+
+def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 6;
+  let ResourceCycles = [6];
+}
+def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>;
+
+def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> {
+  let Latency = 14;
+  let ResourceCycles = [14];
+}
+def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32,
+                                         IMUL64rmi8, IMUL64rmi32)>;
+
+// Bit counts.
+defm : AtomWriteResPair<WriteBSF, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
+defm : AtomWriteResPair<WriteBSR, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
+defm : X86WriteResPairUnsupported<WritePOPCNT>;
+defm : X86WriteResPairUnsupported<WriteLZCNT>;
+defm : X86WriteResPairUnsupported<WriteTZCNT>;
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : X86WriteResPairUnsupported<WriteBEXTR>;
+defm : X86WriteResPairUnsupported<WriteBZHI>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer shifts and rotates.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
+
+defm : X86WriteRes<WriteSHDrri, [AtomPort01], 2, [2], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[AtomPort01], 2, [2], 1>;
+defm : X86WriteRes<WriteSHDmri, [AtomPort01], 4, [4], 1>;
+defm : X86WriteRes<WriteSHDmrcl,[AtomPort01], 4, [4], 1>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads, stores, and moves, not folded with other operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLoad,    [AtomPort0]>;
+def : WriteRes<WriteStore,   [AtomPort0]>;
+def : WriteRes<WriteStoreNT, [AtomPort0]>;
+def : WriteRes<WriteMove,    [AtomPort01]>;
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteZero,  []>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteJump, [AtomPort1], [AtomPort1]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Special case scheduling classes.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteSystem,     [AtomPort01]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [AtomPort01]> { let Latency = 100; }
+def : WriteRes<WriteFence,      [AtomPort0]>;
+
+// Nops don't have dependencies, so there's no actual latency, but we set this
+// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
+def : WriteRes<WriteNop, [AtomPort01]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Floating point. This covers both scalar and vector operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteRes<WriteFLD0,       [AtomPort01], 1, [1], 1>;
+defm : X86WriteRes<WriteFLD1,       [AtomPort01], 6, [6], 1>;
+def  : WriteRes<WriteFLoad,         [AtomPort0]>;
+def  : WriteRes<WriteFLoadX,        [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteFLoadY>;
+defm : X86WriteResUnsupported<WriteFMaskedLoad>;
+defm : X86WriteResUnsupported<WriteFMaskedLoadY>;
+
+def  : WriteRes<WriteFStore,        [AtomPort0]>;
+def  : WriteRes<WriteFStoreX,       [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteFStoreY>;
+def  : WriteRes<WriteFStoreNT,      [AtomPort0]>;
+def  : WriteRes<WriteFStoreNTX,     [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteFStoreNTY>;
+defm : X86WriteResUnsupported<WriteFMaskedStore>;
+defm : X86WriteResUnsupported<WriteFMaskedStoreY>;
+
+def  : WriteRes<WriteFMove,         [AtomPort01]>;
+def  : WriteRes<WriteFMoveX,        [AtomPort01]>;
+defm : X86WriteResUnsupported<WriteFMoveY>;
+
+defm : X86WriteRes<WriteEMMS,       [AtomPort01], 5, [5], 1>;
+
+defm : AtomWriteResPair<WriteFAdd,           [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFAddX,          [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : X86WriteResPairUnsupported<WriteFAddY>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
+defm : AtomWriteResPair<WriteFAdd64,         [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFAdd64X,       [AtomPort01], [AtomPort01],  6,  7,  [6],  [7]>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Y>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
+defm : AtomWriteResPair<WriteFCmp,           [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFCmpX,          [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : X86WriteResPairUnsupported<WriteFCmpY>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
+defm : AtomWriteResPair<WriteFCmp64,         [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFCmp64X,       [AtomPort01], [AtomPort01],  6,  7,  [6],  [7]>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Y>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
+defm : AtomWriteResPair<WriteFCom,           [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFMul,           [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
+defm : AtomWriteResPair<WriteFMulX,          [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : X86WriteResPairUnsupported<WriteFMulY>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
+defm : AtomWriteResPair<WriteFMul64,         [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : AtomWriteResPair<WriteFMul64X,       [AtomPort01], [AtomPort01],  9, 10,  [9], [10]>;
+defm : X86WriteResPairUnsupported<WriteFMul64Y>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
+defm : AtomWriteResPair<WriteFRcp,           [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
+defm : AtomWriteResPair<WriteFRcpX,         [AtomPort01], [AtomPort01],  9, 10,  [9], [10]>;
+defm : X86WriteResPairUnsupported<WriteFRcpY>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
+defm : AtomWriteResPair<WriteFRsqrt,         [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
+defm : AtomWriteResPair<WriteFRsqrtX,       [AtomPort01], [AtomPort01],  9, 10,  [9], [10]>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
+defm : AtomWriteResPair<WriteFDiv,          [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFDivX,         [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : X86WriteResPairUnsupported<WriteFDivY>;
+defm : X86WriteResPairUnsupported<WriteFDivZ>;
+defm : AtomWriteResPair<WriteFDiv64,        [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteFDiv64X,       [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
+defm : AtomWriteResPair<WriteFSqrt,         [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFSqrtX,        [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : X86WriteResPairUnsupported<WriteFSqrtY>;
+defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
+defm : AtomWriteResPair<WriteFSqrt64,       [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
+defm : AtomWriteResPair<WriteFSqrt64X,      [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
+defm : AtomWriteResPair<WriteFSqrt80,       [AtomPort01], [AtomPort01], 71, 71, [71], [71]>;
+defm : AtomWriteResPair<WriteFSign,          [AtomPort1],  [AtomPort1]>;
+defm : AtomWriteResPair<WriteFRnd,           [AtomPort0],  [AtomPort0],  5,  5,  [5],  [5]>;
+defm : X86WriteResPairUnsupported<WriteFRndY>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
+defm : AtomWriteResPair<WriteFLogic,        [AtomPort01],  [AtomPort0]>;
+defm : X86WriteResPairUnsupported<WriteFLogicY>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
+defm : AtomWriteResPair<WriteFTest,         [AtomPort01],  [AtomPort0]>;
+defm : X86WriteResPairUnsupported<WriteFTestY>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
+defm : AtomWriteResPair<WriteFShuffle,       [AtomPort0],  [AtomPort0]>;
+defm : X86WriteResPairUnsupported<WriteFShuffleY>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffle>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleY>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
+defm : X86WriteResPairUnsupported<WriteFMA>;
+defm : X86WriteResPairUnsupported<WriteFMAX>;
+defm : X86WriteResPairUnsupported<WriteFMAY>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
+defm : X86WriteResPairUnsupported<WriteDPPD>;
+defm : X86WriteResPairUnsupported<WriteDPPS>;
+defm : X86WriteResPairUnsupported<WriteDPPSY>;
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
+defm : X86WriteResPairUnsupported<WriteFBlend>;
+defm : X86WriteResPairUnsupported<WriteFBlendY>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
+defm : X86WriteResPairUnsupported<WriteFVarBlend>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
+defm : X86WriteResPairUnsupported<WriteFShuffle256>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Conversions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteCvtSS2I,   [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WriteCvtPS2I,   [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IY>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
+defm : AtomWriteResPair<WriteCvtSD2I,   [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WriteCvtPD2I,   [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IY>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
+
+defm : AtomWriteResPair<WriteCvtI2SS,   [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteCvtI2PS,   [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSY>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
+defm : AtomWriteResPair<WriteCvtI2SD,   [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteCvtI2PD,   [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDY>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
+
+defm : AtomWriteResPair<WriteCvtSS2SD,  [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteCvtPS2PD,  [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDY>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
+defm : AtomWriteResPair<WriteCvtSD2SS,  [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteCvtPD2PS,  [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSY>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
+
+defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
+defm : X86WriteResUnsupported<WriteCvtPS2PH>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector integer operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def  : WriteRes<WriteVecLoad,         [AtomPort0]>;
+def  : WriteRes<WriteVecLoadX,        [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteVecLoadY>;
+def  : WriteRes<WriteVecLoadNT,       [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteVecLoadNTY>;
+defm : X86WriteResUnsupported<WriteVecMaskedLoad>;
+defm : X86WriteResUnsupported<WriteVecMaskedLoadY>;
+
+def  : WriteRes<WriteVecStore,        [AtomPort0]>;
+def  : WriteRes<WriteVecStoreX,       [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteVecStoreY>;
+def  : WriteRes<WriteVecStoreNT,      [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteVecStoreNTY>;
+def  : WriteRes<WriteVecMaskedStore,  [AtomPort0]>;
+defm : X86WriteResUnsupported<WriteVecMaskedStoreY>;
+
+def  : WriteRes<WriteVecMove,          [AtomPort0]>;
+def  : WriteRes<WriteVecMoveX,        [AtomPort01]>;
+defm : X86WriteResUnsupported<WriteVecMoveY>;
+defm : X86WriteRes<WriteVecMoveToGpr,   [AtomPort0], 3, [3], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr, [AtomPort0], 1, [1], 1>;
+
+defm : AtomWriteResPair<WriteVecALU,       [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecALUX,      [AtomPort01],  [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WriteVecALUY>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
+defm : AtomWriteResPair<WriteVecLogic,     [AtomPort01],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVecLogicX,    [AtomPort01],  [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WriteVecLogicY>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
+defm : AtomWriteResPair<WriteVecTest,      [AtomPort01],  [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WriteVecTestY>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
+defm : AtomWriteResPair<WriteVecShift,     [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
+defm : AtomWriteResPair<WriteVecShiftX,    [AtomPort01], [AtomPort01], 2, 3, [2], [3]>;
+defm : X86WriteResPairUnsupported<WriteVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
+defm : AtomWriteResPair<WriteVecShiftImm,  [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
+defm : AtomWriteResPair<WriteVecShiftImmX, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
+defm : AtomWriteResPair<WriteVecIMul,       [AtomPort0],  [AtomPort0], 4, 4, [4], [4]>;
+defm : AtomWriteResPair<WriteVecIMulX,      [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
+defm : X86WriteResPairUnsupported<WriteVecIMulY>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
+defm : X86WriteResPairUnsupported<WritePMULLD>;
+defm : X86WriteResPairUnsupported<WritePMULLDY>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
+defm : X86WriteResPairUnsupported<WritePHMINPOS>;
+defm : X86WriteResPairUnsupported<WriteMPSAD>;
+defm : X86WriteResPairUnsupported<WriteMPSADY>;
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
+defm : AtomWriteResPair<WritePSADBW,       [AtomPort01], [AtomPort01], 4, 4, [4], [4]>;
+defm : AtomWriteResPair<WritePSADBWX,       [AtomPort0],  [AtomPort0], 5, 5, [5], [5]>;
+defm : X86WriteResPairUnsupported<WritePSADBWY>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
+defm : AtomWriteResPair<WriteShuffle,       [AtomPort0],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteShuffleX,      [AtomPort0],  [AtomPort0], 1, 1>;
+defm : X86WriteResPairUnsupported<WriteShuffleY>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
+defm : AtomWriteResPair<WriteVarShuffle,    [AtomPort0],  [AtomPort0], 1, 1>;
+defm : AtomWriteResPair<WriteVarShuffleX,  [AtomPort01], [AtomPort01], 4, 5, [4], [5]>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
+defm : X86WriteResPairUnsupported<WriteBlend>;
+defm : X86WriteResPairUnsupported<WriteBlendY>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
+defm : X86WriteResPairUnsupported<WriteVarBlend>;
+defm : X86WriteResPairUnsupported<WriteVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
+defm : X86WriteResPairUnsupported<WriteShuffle256>;
+defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
+defm : X86WriteResPairUnsupported<WriteVarVecShift>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector insert/extract operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteVecInsert,     [AtomPort0],  [AtomPort0], 1, 1>;
+def  : WriteRes<WriteVecExtract,   [AtomPort0]>;
+def  : WriteRes<WriteVecExtractSt, [AtomPort0]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE42 String instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteResPairUnsupported<WritePCmpIStrI>;
+defm : X86WriteResPairUnsupported<WritePCmpIStrM>;
+defm : X86WriteResPairUnsupported<WritePCmpEStrI>;
+defm : X86WriteResPairUnsupported<WritePCmpEStrM>;
+
+////////////////////////////////////////////////////////////////////////////////
+// MOVMSK Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def  : WriteRes<WriteFMOVMSK,    [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+def  : WriteRes<WriteVecMOVMSK,  [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
+def  : WriteRes<WriteMMXMOVMSK,  [AtomPort0]> { let Latency = 3; let ResourceCycles = [3]; }
+
+////////////////////////////////////////////////////////////////////////////////
+// AES instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteResPairUnsupported<WriteAESIMC>;
+defm : X86WriteResPairUnsupported<WriteAESKeyGen>;
+defm : X86WriteResPairUnsupported<WriteAESDecEnc>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub  instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteFHAdd,  [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WriteFHAddY, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
+defm : AtomWriteResPair<WritePHAdd,  [AtomPort01], [AtomPort01], 3, 4, [3], [4]>;
+defm : AtomWriteResPair<WritePHAddX, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WritePHAddY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Carry-less multiplication instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteResPairUnsupported<WriteCLMul>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Load/store MXCSR.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLDMXCSR, [AtomPort01]> { let Latency = 5; let ResourceCycles = [5]; }
+def : WriteRes<WriteSTMXCSR, [AtomPort01]> { let Latency = 15; let ResourceCycles = [15]; }
+
+////////////////////////////////////////////////////////////////////////////////
+// Special Cases.
+////////////////////////////////////////////////////////////////////////////////
+
+// Port0
+def AtomWrite0_1 : SchedWriteRes<[AtomPort0]> {
+  let Latency = 1;
+  let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite0_1], (instrs FXAM, LD_Frr,
+                                     MOVSX64rr32)>;
+def : SchedAlias<WriteALURMW, AtomWrite0_1>;
+def : SchedAlias<WriteADCRMW, AtomWrite0_1>;
+def : InstRW<[AtomWrite0_1], (instregex "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m",
+                                        "MOV(S|Z)X(32|64)rr(8|8_NOREX|16)")>;
+
+def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> {
+  let Latency = 5;
+  let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)")>;
+
+// Port1
+def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> {
+  let Latency = 1;
+  let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>;
+def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r",
+                                        "BT(C|R|S)?(16|32|64)(rr|ri8)")>;
+
+def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {
+  let Latency = 5;
+  let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite1_5], (instrs MMX_CVTPI2PSirr, MMX_CVTPI2PSirm,
+                                     MMX_CVTPS2PIirr, MMX_CVTTPS2PIirr)>;
+
+// Port0 and Port1
+def AtomWrite0_1_1 : SchedWriteRes<[AtomPort0, AtomPort1]> {
+  let Latency = 1;
+  let ResourceCycles = [1, 1];
+}
+def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r,
+                                       POP16rmr, POP32rmr, POP64rmr,
+                                       PUSH16r, PUSH32r, PUSH64r,
+                                       PUSHi16, PUSHi32,
+                                       PUSH16rmr, PUSH32rmr, PUSH64rmr,
+                                       PUSH16i8, PUSH32i8, PUSH64i8, PUSH64i32,
+                                       XCH_F)>;
+def : InstRW<[AtomWrite0_1_1], (instregex "RETI(L|Q|W)$",
+                                          "IRET(16|32|64)?")>;
+
+def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
+  let Latency = 5;
+  let ResourceCycles = [5, 5];
+}
+def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIirm, MMX_CVTTPS2PIirm)>;
+def : InstRW<[AtomWrite0_1_5], (instregex "ILD_F(16|32|64)")>;
+
+// Port0 or Port1
+def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 1;
+  let ResourceCycles = [1];
+}
+def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, WAIT,
+                                      LFENCE,
+                                      STOSB, STOSL, STOSQ, STOSW,
+                                      MOVSSrr, MOVSSrr_REV,
+                                      PSLLDQri, PSRLDQri)>;
+def : InstRW<[AtomWrite01_1], (instregex "MMX_PACK(SSDW|SSWB|USWB)irr",
+                                         "MMX_PUNPCKH(BW|DQ|WD)irr")>;
+
+def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 2;
+  let ResourceCycles = [2];
+}
+def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r,
+                                      PUSH16rmm, PUSH32rmm, PUSH64rmm,
+                                      LODSB, LODSL, LODSQ, LODSW,
+                                      SCASB, SCASL, SCASQ, SCASW)>;
+def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8",
+                                         "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)",
+                                         "XADD(8|16|32|64)rr",
+                                         "XCHG(8|16|32|64)(ar|rr)",
+                                         "(ST|ISTT)_F(P)?(16|32|64)?(m|rr)",
+                                         "MMX_P(ADD|SUB)Qirr",
+                                         "MOV(S|Z)X16rr8",
+                                         "MOV(UPS|UPD|DQU)mr",
+                                         "MASKMOVDQU(64)?",
+                                         "P(ADD|SUB)Qrr")>;
+
+def AtomWrite01_3 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 3;
+  let ResourceCycles = [3];
+}
+def : InstRW<[AtomWrite01_3], (instrs CLD, LDDQUrm,
+                                      CMPSB, CMPSL, CMPSQ, CMPSW,
+                                      MOVSB, MOVSL, MOVSQ, MOVSW,
+                                      POP16rmm, POP32rmm, POP64rmm)>;
+def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm",
+                                         "XCHG(8|16|32|64)rm",
+                                         "PH(ADD|SUB)Drr",
+                                         "MOV(S|Z)X16rm8",
+                                         "MMX_P(ADD|SUB)Qirm",
+                                         "MOV(UPS|UPD|DQU)rm",
+                                         "P(ADD|SUB)Qrm")>;
+
+def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 4;
+  let ResourceCycles = [4];
+}
+def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO,
+                                      JCXZ, JECXZ, JRCXZ,
+                                      LD_F80m)>;
+def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm",
+                                         "(MMX_)?PEXTRWrr(_REV)?")>;
+
+def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 5;
+  let ResourceCycles = [5];
+}
+def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, ST_FP80m)>;
+def : InstRW<[AtomWrite01_5], (instregex "MMX_PH(ADD|SUB)S?Wrr")>;
+
+def AtomWrite01_6 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 6;
+  let ResourceCycles = [6];
+}
+def : InstRW<[AtomWrite01_6], (instrs CMPXCHG8rm, INTO, XLAT,
+                                      SHLD16rrCL, SHRD16rrCL,
+                                      SHLD16rri8, SHRD16rri8,
+                                      SHLD16mrCL, SHRD16mrCL,
+                                      SHLD16mri8, SHRD16mri8)>;
+def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr",
+                                         "IST_F(P)?(16|32|64)?m",
+                                         "MMX_PH(ADD|SUB)S?Wrm")>;
+
+def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 7;
+  let ResourceCycles = [7];
+}
+def : InstRW<[AtomWrite01_7], (instrs AAD8i8)>;
+
+def AtomWrite01_8 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 8;
+  let ResourceCycles = [8];
+}
+def : InstRW<[AtomWrite01_8], (instrs LOOPE,
+                                      PUSHA16, PUSHA32,
+                                      SHLD64rrCL, SHRD64rrCL,
+                                      FNSTCW16m)>;
+
+def AtomWrite01_9 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 9;
+  let ResourceCycles = [9];
+}
+def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr,
+                                      POPA16, POPA32,
+                                      PUSHF16, PUSHF32, PUSHF64,
+                                      SHLD64mrCL, SHRD64mrCL,
+                                      SHLD64mri8, SHRD64mri8,
+                                      SHLD64rri8, SHRD64rri8,
+                                      CMPXCHG8rr)>;
+def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F",
+                                         "(U)?COMIS(D|S)rr",
+                                         "CVT(T)?SS2SI64rr(_Int)?")>;
+
+def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 10;
+  let ResourceCycles = [10];
+}
+def : SchedAlias<WriteFLDC, AtomWrite01_10>;
+def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
+                                          "CVT(T)?SS2SI64rm(_Int)?")>;
+
+def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 11;
+  let ResourceCycles = [11];
+}
+def : InstRW<[AtomWrite01_11], (instrs BOUNDS16rm, BOUNDS32rm)>;
+def : InstRW<[AtomWrite01_11], (instregex "BT(C|R|S)(16|32|64)mr")>;
+
+def AtomWrite01_13 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 13;
+  let ResourceCycles = [13];
+}
+def : InstRW<[AtomWrite01_13], (instrs AAA, AAS)>;
+
+def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 14;
+  let ResourceCycles = [14];
+}
+def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>;
+
+def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 15;
+  let ResourceCycles = [15];
+}
+def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr)>;
+
+def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 17;
+  let ResourceCycles = [17];
+}
+def : InstRW<[AtomWrite01_17], (instrs LOOPNE, PAUSE)>;
+
+def AtomWrite01_18 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 18;
+  let ResourceCycles = [18];
+}
+def : InstRW<[AtomWrite01_18], (instrs CMPXCHG8B, DAA, LOOP)>;
+
+def AtomWrite01_20 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 20;
+  let ResourceCycles = [20];
+}
+def : InstRW<[AtomWrite01_20], (instrs DAS)>;
+
+def AtomWrite01_21 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 21;
+  let ResourceCycles = [21];
+}
+def : InstRW<[AtomWrite01_21], (instrs AAM8i8, STD)>;
+
+def AtomWrite01_22 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 22;
+  let ResourceCycles = [22];
+}
+def : InstRW<[AtomWrite01_22], (instrs CMPXCHG16B)>;
+
+def AtomWrite01_23 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 23;
+  let ResourceCycles = [23];
+}
+def : InstRW<[AtomWrite01_23], (instrs ARPL16mr, ARPL16rr)>;
+
+def AtomWrite01_25 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 25;
+  let ResourceCycles = [25];
+}
+def : InstRW<[AtomWrite01_25], (instrs FNCLEX, FXTRACT)>;
+
+def AtomWrite01_26 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 26;
+  let ResourceCycles = [26];
+}
+def : InstRW<[AtomWrite01_26], (instrs POPF32, POPF64)>;
+
+def AtomWrite01_29 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 29;
+  let ResourceCycles = [29];
+}
+def : InstRW<[AtomWrite01_29], (instregex "POP(DS|ES|FS|GS)(16|32|64)")>;
+
+def AtomWrite01_30 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 30;
+  let ResourceCycles = [30];
+}
+def : InstRW<[AtomWrite01_30], (instrs RDTSC, RDTSCP)>;
+
+def AtomWrite01_32 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 32;
+  let ResourceCycles = [32];
+}
+def : InstRW<[AtomWrite01_32], (instrs ENTER, POPF16)>;
+
+def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 45;
+  let ResourceCycles = [45];
+}
+def : InstRW<[AtomWrite01_45], (instrs MONITORrrr)>;
+
+def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 46;
+  let ResourceCycles = [46];
+}
+def : InstRW<[AtomWrite01_46], (instrs FRNDINT, MWAITrr, RDPMC)>;
+
+def AtomWrite01_48 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 48;
+  let ResourceCycles = [48];
+}
+def : InstRW<[AtomWrite01_48], (instrs POPSS16, POPSS32)>;
+
+def AtomWrite01_55 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 55;
+  let ResourceCycles = [55];
+}
+def : InstRW<[AtomWrite01_55], (instrs FPREM)>;
+
+def AtomWrite01_59 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 59;
+  let ResourceCycles = [59];
+}
+def : InstRW<[AtomWrite01_59], (instrs INSB, INSL, INSW)>;
+
+def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 63;
+  let ResourceCycles = [63];
+}
+def : InstRW<[AtomWrite01_63], (instrs FNINIT)>;
+
+def AtomWrite01_68 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 68;
+  let ResourceCycles = [68];
+}
+def : InstRW<[AtomWrite01_68], (instrs OUT8rr, OUT16rr, OUT32rr)>;
+
+def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 71;
+  let ResourceCycles = [71];
+}
+def : InstRW<[AtomWrite01_71], (instrs FPREM1,
+                                       INVLPG, INVLPGA32, INVLPGA64)>;
+
+def AtomWrite01_72 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 72;
+  let ResourceCycles = [72];
+}
+def : InstRW<[AtomWrite01_72], (instrs OUT8ir, OUT16ir, OUT32ir)>;
+
+def AtomWrite01_74 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 74;
+  let ResourceCycles = [74];
+}
+def : InstRW<[AtomWrite01_74], (instrs OUTSB, OUTSL, OUTSW)>;
+
+def AtomWrite01_77 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 77;
+  let ResourceCycles = [77];
+}
+def : InstRW<[AtomWrite01_77], (instrs FSCALE)>;
+
+def AtomWrite01_78 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 78;
+  let ResourceCycles = [78];
+}
+def : InstRW<[AtomWrite01_78], (instrs RDMSR)>;
+
+def AtomWrite01_79 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 79;
+  let ResourceCycles = [79];
+}
+def : InstRW<[AtomWrite01_79], (instregex "RET(L|Q|W)?$",
+                                          "LRETI?(L|Q|W)")>;
+
+def AtomWrite01_92 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 92;
+  let ResourceCycles = [92];
+}
+def : InstRW<[AtomWrite01_92], (instrs IN8ri, IN16ri, IN32ri)>;
+
+def AtomWrite01_94 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 94;
+  let ResourceCycles = [94];
+}
+def : InstRW<[AtomWrite01_94], (instrs IN8rr, IN16rr, IN32rr)>;
+
+def AtomWrite01_99 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 99;
+  let ResourceCycles = [99];
+}
+def : InstRW<[AtomWrite01_99], (instrs F2XM1)>;
+
+def AtomWrite01_121 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 121;
+  let ResourceCycles = [121];
+}
+def : InstRW<[AtomWrite01_121], (instrs CPUID)>;
+
+def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 127;
+  let ResourceCycles = [127];
+}
+def : InstRW<[AtomWrite01_127], (instrs INT)>;
+
+def AtomWrite01_130 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 130;
+  let ResourceCycles = [130];
+}
+def : InstRW<[AtomWrite01_130], (instrs INT3)>;
+
+def AtomWrite01_140 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 140;
+  let ResourceCycles = [140];
+}
+def : InstRW<[AtomWrite01_140], (instrs FXSAVE, FXSAVE64)>;
+
+def AtomWrite01_141 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 141;
+  let ResourceCycles = [141];
+}
+def : InstRW<[AtomWrite01_141], (instrs FXRSTOR, FXRSTOR64)>;
+
+def AtomWrite01_146 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 146;
+  let ResourceCycles = [146];
+}
+def : InstRW<[AtomWrite01_146], (instrs FYL2X)>;
+
+def AtomWrite01_147 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 147;
+  let ResourceCycles = [147];
+}
+def : InstRW<[AtomWrite01_147], (instrs FYL2XP1)>;
+
+def AtomWrite01_168 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 168;
+  let ResourceCycles = [168];
+}
+def : InstRW<[AtomWrite01_168], (instrs FPTAN)>;
+
+def AtomWrite01_174 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 174;
+  let ResourceCycles = [174];
+}
+def : InstRW<[AtomWrite01_174], (instrs FSINCOS)>;
+def : InstRW<[AtomWrite01_174], (instregex "(COS|SIN)_F")>;
+
+def AtomWrite01_183 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 183;
+  let ResourceCycles = [183];
+}
+def : InstRW<[AtomWrite01_183], (instrs FPATAN)>;
+
+def AtomWrite01_202 : SchedWriteRes<[AtomPort01]> {
+  let Latency = 202;
+  let ResourceCycles = [202];
+}
+def : InstRW<[AtomWrite01_202], (instrs WRMSR)>;
+
+} // SchedModel
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleBtVer2.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleBtVer2.td
@@ -0,0 +1,682 @@
+//=- X86ScheduleBtVer2.td - X86 BtVer2 (Jaguar) Scheduling ---*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for AMD btver2 (Jaguar) to support
+// instruction scheduling and other instruction cost heuristics. Based off AMD Software
+// Optimization Guide for AMD Family 16h Processors & Instruction Latency appendix.
+//
+//===----------------------------------------------------------------------===//
+
+def BtVer2Model : SchedMachineModel {
+  // All x86 instructions are modeled as a single micro-op, and btver2 can
+  // decode 2 instructions per cycle.
+  let IssueWidth = 2;
+  let MicroOpBufferSize = 64; // Retire Control Unit
+  let LoadLatency = 5; // FPU latency (worse case cf Integer 3 cycle latency)
+  let HighLatency = 25;
+  let MispredictPenalty = 14; // Minimum branch misdirection penalty
+  let PostRAScheduler = 1;
+
+  // FIXME: SSE4/AVX is unimplemented. This flag is set to allow
+  // the scheduler to assign a default model to unrecognized opcodes.
+  let CompleteModel = 0;
+}
+
+let SchedModel = BtVer2Model in {
+
+// Jaguar can issue up to 6 micro-ops in one cycle
+def JALU0 : ProcResource<1>; // Integer Pipe0: integer ALU0 (also handle FP->INT jam)
+def JALU1 : ProcResource<1>; // Integer Pipe1: integer ALU1/MUL/DIV
+def JLAGU : ProcResource<1>; // Integer Pipe2: LAGU
+def JSAGU : ProcResource<1>; // Integer Pipe3: SAGU (also handles 3-operand LEA)
+def JFPU0 : ProcResource<1>; // Vector/FPU Pipe0: VALU0/VIMUL/FPA
+def JFPU1 : ProcResource<1>; // Vector/FPU Pipe1: VALU1/STC/FPM
+
+// The Integer PRF for Jaguar is 64 entries, and it holds the architectural and
+// speculative version of the 64-bit integer registers.
+// Reference: www.realworldtech.com/jaguar/4/
+//
+// The processor always keeps the different parts of an integer register
+// together. An instruction that writes to a part of a register will therefore
+// have a false dependence on any previous write to the same register or any
+// part of it.
+// Reference: Section 21.10 "AMD Bobcat and Jaguar pipeline: Partial register
+// access" - Agner Fog's "microarchitecture.pdf".
+def JIntegerPRF : RegisterFile<64, [GR64, CCR]>;
+
+// The Jaguar FP Retire Queue renames SIMD and FP uOps onto a pool of 72 SSE
+// registers. Operations on 256-bit data types are cracked into two COPs.
+// Reference: www.realworldtech.com/jaguar/4/
+def JFpuPRF: RegisterFile<72, [VR64, VR128, VR256], [1, 1, 2]>;
+
+// The retire control unit (RCU) can track up to 64 macro-ops in-flight. It can
+// retire up to two macro-ops per cycle.
+// Reference: "Software Optimization Guide for AMD Family 16h Processors"
+def JRCU : RetireControlUnit<64, 2>;
+
+// Integer Pipe Scheduler
+def JALU01 : ProcResGroup<[JALU0, JALU1]> {
+  let BufferSize=20;
+}
+
+// AGU Pipe Scheduler
+def JLSAGU : ProcResGroup<[JLAGU, JSAGU]> {
+  let BufferSize=12;
+}
+
+// Fpu Pipe Scheduler
+def JFPU01 : ProcResGroup<[JFPU0, JFPU1]> {
+  let BufferSize=18;
+}
+
+// Functional units
+def JDiv    : ProcResource<1>; // integer division
+def JMul    : ProcResource<1>; // integer multiplication
+def JVALU0  : ProcResource<1>; // vector integer
+def JVALU1  : ProcResource<1>; // vector integer
+def JVIMUL  : ProcResource<1>; // vector integer multiplication
+def JSTC    : ProcResource<1>; // vector store/convert
+def JFPM    : ProcResource<1>; // FP multiplication
+def JFPA    : ProcResource<1>; // FP addition
+
+// Functional unit groups
+def JFPX  : ProcResGroup<[JFPA, JFPM]>;
+def JVALU : ProcResGroup<[JVALU0, JVALU1]>;
+
+// Integer loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 3>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass JWriteResIntPair<X86FoldableSchedWrite SchedRW,
+                            list<ProcResourceKind> ExePorts,
+                            int Lat, list<int> Res = [], int UOps = 1> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ResourceCycles = Res;
+    let NumMicroOps = UOps;
+  }
+
+  // Memory variant also uses a cycle on JLAGU and adds 3 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
+    let Latency = !add(Lat, 3);
+    let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
+    let NumMicroOps = UOps;
+  }
+}
+
+multiclass JWriteResFpuPair<X86FoldableSchedWrite SchedRW,
+                            list<ProcResourceKind> ExePorts,
+                            int Lat, list<int> Res = [], int UOps = 1> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ResourceCycles = Res;
+    let NumMicroOps = UOps;
+  }
+
+  // Memory variant also uses a cycle on JLAGU and adds 5 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
+    let Latency = !add(Lat, 5);
+    let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
+    let NumMicroOps = UOps;
+  }
+}
+
+multiclass JWriteResYMMPair<X86FoldableSchedWrite SchedRW,
+                            list<ProcResourceKind> ExePorts,
+                            int Lat, list<int> Res = [2], int UOps = 2> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ResourceCycles = Res;
+    let NumMicroOps = UOps;
+  }
+
+  // Memory variant also uses 2 cycles on JLAGU and adds 5 cycles to the
+  // latency.
+  def : WriteRes<SchedRW.Folded, !listconcat([JLAGU], ExePorts)> {
+    let Latency = !add(Lat, 5);
+    let ResourceCycles = !listconcat([2], Res);
+    let NumMicroOps = UOps;
+  }
+}
+
+// A folded store needs a cycle on the SAGU for the store data.
+def : WriteRes<WriteRMW, [JSAGU]>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Arithmetic.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResIntPair<WriteALU,    [JALU01], 1>;
+defm : JWriteResIntPair<WriteADC,    [JALU01], 1, [2]>;
+defm : JWriteResIntPair<WriteIMul,   [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
+defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication
+defm : X86WriteRes<WriteIMulH,       [JALU1], 6, [4], 1>;
+
+defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>;
+
+defm : JWriteResIntPair<WriteDiv8,   [JALU1, JDiv], 12, [1, 12], 1>;
+defm : JWriteResIntPair<WriteDiv16,  [JALU1, JDiv], 17, [1, 17], 2>;
+defm : JWriteResIntPair<WriteDiv32,  [JALU1, JDiv], 25, [1, 25], 2>;
+defm : JWriteResIntPair<WriteDiv64,  [JALU1, JDiv], 41, [1, 41], 2>;
+defm : JWriteResIntPair<WriteIDiv8,  [JALU1, JDiv], 12, [1, 12], 1>;
+defm : JWriteResIntPair<WriteIDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
+defm : JWriteResIntPair<WriteIDiv32, [JALU1, JDiv], 25, [1, 25], 2>;
+defm : JWriteResIntPair<WriteIDiv64, [JALU1, JDiv], 41, [1, 41], 2>;
+
+defm : JWriteResIntPair<WriteCRC32,  [JALU01], 3, [4], 3>;
+
+defm : JWriteResIntPair<WriteCMOV,  [JALU01], 1>; // Conditional move.
+defm : JWriteResIntPair<WriteCMOV2, [JALU01], 1>; // Conditional (CF + ZF flag) move.
+defm : X86WriteRes<WriteFCMOV, [JFPU0, JFPA], 3, [1,1], 1>; // x87 conditional move.
+def  : WriteRes<WriteSETCC, [JALU01]>; // Setcc.
+def  : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>;
+def  : WriteRes<WriteLAHFSAHF, [JALU01]>;
+def  : WriteRes<WriteBitTest,[JALU01]>;
+
+// This is for simple LEAs with one or two input operands.
+def : WriteRes<WriteLEA, [JALU01]>;
+
+// Bit counts.
+defm : JWriteResIntPair<WriteBSF, [JALU01], 5, [4], 8>;
+defm : JWriteResIntPair<WriteBSR, [JALU01], 5, [4], 8>;
+defm : JWriteResIntPair<WritePOPCNT,         [JALU01], 1>;
+defm : JWriteResIntPair<WriteLZCNT,          [JALU01], 1>;
+defm : JWriteResIntPair<WriteTZCNT,          [JALU01], 2, [2]>;
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : JWriteResIntPair<WriteBEXTR, [JALU01], 1>;
+defm : X86WriteResPairUnsupported<WriteBZHI>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Integer shifts and rotates.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResIntPair<WriteShift, [JALU01], 1>;
+
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [JALU01], 3, [6], 6>;
+defm : X86WriteRes<WriteSHDrrcl,[JALU01], 4, [8], 7>;
+defm : X86WriteRes<WriteSHDmri, [JLAGU, JALU01], 9, [1, 22], 8>;
+defm : X86WriteRes<WriteSHDmrcl,[JLAGU, JALU01], 9, [1, 22], 8>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Loads, stores, and moves, not folded with other operations.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteLoad,    [JLAGU]> { let Latency = 5; }
+def : WriteRes<WriteStore,   [JSAGU]>;
+def : WriteRes<WriteStoreNT, [JSAGU]>;
+def : WriteRes<WriteMove,    [JALU01]>;
+
+// Load/store MXCSR.
+// FIXME: These are copy and pasted from WriteLoad/Store.
+def : WriteRes<WriteLDMXCSR, [JLAGU]> { let Latency = 5; }
+def : WriteRes<WriteSTMXCSR, [JSAGU]>;
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteZero,  []>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResIntPair<WriteJump,  [JALU01], 1>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Special case scheduling classes.
+////////////////////////////////////////////////////////////////////////////////
+
+def : WriteRes<WriteSystem,     [JALU01]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [JALU01]> { let Latency = 100; }
+def : WriteRes<WriteFence,  [JSAGU]>;
+
+// Nops don't have dependencies, so there's no actual latency, but we set this
+// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle.
+def : WriteRes<WriteNop, [JALU01]> { let Latency = 1; }
+
+////////////////////////////////////////////////////////////////////////////////
+// Floating point. This covers both scalar and vector operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteRes<WriteFLD0,          [JFPU1, JSTC], 3, [1,1], 1>;
+defm : X86WriteRes<WriteFLD1,          [JFPU1, JSTC], 3, [1,1], 1>;
+defm : X86WriteRes<WriteFLDC,          [JFPU1, JSTC], 3, [1,1], 1>;
+defm : X86WriteRes<WriteFLoad,         [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFLoadX,        [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFLoadY,        [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFMaskedLoad,   [JLAGU, JFPU01, JFPX], 6, [1, 1, 2], 1>;
+defm : X86WriteRes<WriteFMaskedLoadY,  [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
+
+defm : X86WriteRes<WriteFStore,        [JSAGU, JFPU1,  JSTC], 2, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreX,       [JSAGU, JFPU1,  JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreY,       [JSAGU, JFPU1,  JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNT,      [JSAGU, JFPU1,  JSTC], 3, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNTX,     [JSAGU, JFPU1,  JSTC], 3, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNTY,     [JSAGU, JFPU1,  JSTC], 3, [2, 2, 2], 1>;
+defm : X86WriteRes<WriteFMaskedStore,  [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
+defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
+
+defm : X86WriteRes<WriteFMove,         [JFPU01, JFPX], 1, [1, 1], 1>;
+defm : X86WriteRes<WriteFMoveX,        [JFPU01, JFPX], 1, [1, 1], 1>;
+defm : X86WriteRes<WriteFMoveY,        [JFPU01, JFPX], 1, [2, 2], 2>;
+
+defm : X86WriteRes<WriteEMMS,          [JFPU01, JFPX], 2, [1, 1], 1>;
+
+defm : JWriteResFpuPair<WriteFAdd,         [JFPU0, JFPA],  3>;
+defm : JWriteResFpuPair<WriteFAddX,        [JFPU0, JFPA],  3>;
+defm : JWriteResYMMPair<WriteFAddY,        [JFPU0, JFPA],  3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
+defm : JWriteResFpuPair<WriteFAdd64,       [JFPU0, JFPA],  3>;
+defm : JWriteResFpuPair<WriteFAdd64X,      [JFPU0, JFPA],  3>;
+defm : JWriteResYMMPair<WriteFAdd64Y,      [JFPU0, JFPA],  3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
+defm : JWriteResFpuPair<WriteFCmp,         [JFPU0, JFPA],  2>;
+defm : JWriteResFpuPair<WriteFCmpX,        [JFPU0, JFPA],  2>;
+defm : JWriteResYMMPair<WriteFCmpY,        [JFPU0, JFPA],  2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
+defm : JWriteResFpuPair<WriteFCmp64,       [JFPU0, JFPA],  2>;
+defm : JWriteResFpuPair<WriteFCmp64X,      [JFPU0, JFPA],  2>;
+defm : JWriteResYMMPair<WriteFCmp64Y,      [JFPU0, JFPA],  2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
+defm : JWriteResFpuPair<WriteFCom,  [JFPU0, JFPA, JALU0],  3>;
+defm : JWriteResFpuPair<WriteFMul,         [JFPU1, JFPM],  2>;
+defm : JWriteResFpuPair<WriteFMulX,        [JFPU1, JFPM],  2>;
+defm : JWriteResYMMPair<WriteFMulY,        [JFPU1, JFPM],  2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
+defm : JWriteResFpuPair<WriteFMul64,       [JFPU1, JFPM],  4, [1,2]>;
+defm : JWriteResFpuPair<WriteFMul64X,      [JFPU1, JFPM],  4, [1,2]>;
+defm : JWriteResYMMPair<WriteFMul64Y,      [JFPU1, JFPM],  4, [2,4], 2>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
+defm : X86WriteResPairUnsupported<WriteFMA>;
+defm : X86WriteResPairUnsupported<WriteFMAX>;
+defm : X86WriteResPairUnsupported<WriteFMAY>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
+defm : JWriteResFpuPair<WriteDPPD,   [JFPU1, JFPM, JFPA],  9, [1, 3, 3],  3>;
+defm : JWriteResFpuPair<WriteDPPS,   [JFPU1, JFPM, JFPA], 11, [1, 3, 3],  5>;
+defm : JWriteResYMMPair<WriteDPPSY,  [JFPU1, JFPM, JFPA], 12, [2, 6, 6], 10>;
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
+defm : JWriteResFpuPair<WriteFRcp,         [JFPU1, JFPM],  2>;
+defm : JWriteResFpuPair<WriteFRcpX,        [JFPU1, JFPM],  2>;
+defm : JWriteResYMMPair<WriteFRcpY,        [JFPU1, JFPM],  2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
+defm : JWriteResFpuPair<WriteFRsqrt,       [JFPU1, JFPM],  2>;
+defm : JWriteResFpuPair<WriteFRsqrtX,      [JFPU1, JFPM],  2>;
+defm : JWriteResYMMPair<WriteFRsqrtY,      [JFPU1, JFPM],  2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
+defm : JWriteResFpuPair<WriteFDiv,         [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResFpuPair<WriteFDivX,        [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResYMMPair<WriteFDivY,        [JFPU1, JFPM], 38, [2, 38], 2>;
+defm : X86WriteResPairUnsupported<WriteFDivZ>;
+defm : JWriteResFpuPair<WriteFDiv64,       [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResFpuPair<WriteFDiv64X,      [JFPU1, JFPM], 19, [1, 19]>;
+defm : JWriteResYMMPair<WriteFDiv64Y,      [JFPU1, JFPM], 38, [2, 38], 2>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
+defm : JWriteResFpuPair<WriteFSqrt,        [JFPU1, JFPM], 21, [1, 21]>;
+defm : JWriteResFpuPair<WriteFSqrtX,       [JFPU1, JFPM], 21, [1, 21]>;
+defm : JWriteResYMMPair<WriteFSqrtY,       [JFPU1, JFPM], 42, [2, 42], 2>;
+defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
+defm : JWriteResFpuPair<WriteFSqrt64,      [JFPU1, JFPM], 27, [1, 27]>;
+defm : JWriteResFpuPair<WriteFSqrt64X,     [JFPU1, JFPM], 27, [1, 27]>;
+defm : JWriteResYMMPair<WriteFSqrt64Y,     [JFPU1, JFPM], 54, [2, 54], 2>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
+defm : JWriteResFpuPair<WriteFSqrt80,      [JFPU1, JFPM], 35, [1, 35]>;
+defm : JWriteResFpuPair<WriteFSign,        [JFPU1, JFPM],  2>;
+defm : JWriteResFpuPair<WriteFRnd,         [JFPU1, JSTC],  3>;
+defm : JWriteResYMMPair<WriteFRndY,        [JFPU1, JSTC],  3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
+defm : JWriteResFpuPair<WriteFLogic,      [JFPU01, JFPX],  1>;
+defm : JWriteResYMMPair<WriteFLogicY,     [JFPU01, JFPX],  1, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
+defm : JWriteResFpuPair<WriteFTest,       [JFPU0, JFPA, JALU0], 3>;
+defm : JWriteResYMMPair<WriteFTestY ,     [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
+defm : JWriteResFpuPair<WriteFShuffle,    [JFPU01, JFPX],  1>;
+defm : JWriteResYMMPair<WriteFShuffleY,   [JFPU01, JFPX],  1, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
+defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX],  2, [1, 4], 3>;
+defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX],  3, [2, 6], 6>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
+defm : JWriteResFpuPair<WriteFBlend,      [JFPU01, JFPX],  1>;
+defm : JWriteResYMMPair<WriteFBlendY,     [JFPU01, JFPX],  1, [2, 2], 2>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
+defm : JWriteResFpuPair<WriteFVarBlend,   [JFPU01, JFPX],  2, [1, 4], 3>;
+defm : JWriteResYMMPair<WriteFVarBlendY,  [JFPU01, JFPX],  3, [2, 6], 6>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
+defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX],  1>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Conversions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResFpuPair<WriteCvtSS2I,      [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtPS2I,      [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPS2IY,     [JFPU1, JSTC], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
+defm : JWriteResFpuPair<WriteCvtSD2I,      [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtPD2I,      [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPD2IY,     [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
+
+// FIXME: f+3 ST, LD+STC latency
+defm : JWriteResFpuPair<WriteCvtI2SS,      [JFPU1, JSTC], 9, [1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtI2PS,      [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtI2PSY,     [JFPU1, JSTC], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
+defm : JWriteResFpuPair<WriteCvtI2SD,      [JFPU1, JSTC], 9, [1,1], 2>;
+defm : JWriteResFpuPair<WriteCvtI2PD,      [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtI2PDY,     [JFPU1, JSTC], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
+
+defm : JWriteResFpuPair<WriteCvtSS2SD,      [JFPU1, JSTC], 7, [1,2], 2>;
+defm : JWriteResFpuPair<WriteCvtPS2PD,      [JFPU1, JSTC], 2, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPS2PDY,     [JFPU1, JSTC], 2, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
+
+defm : JWriteResFpuPair<WriteCvtSD2SS,    [JFPU1, JSTC], 7, [1,2], 2>;
+defm : JWriteResFpuPair<WriteCvtPD2PS,    [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPD2PSY,   [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
+
+defm : JWriteResFpuPair<WriteCvtPH2PS,     [JFPU1, JSTC], 3, [1,1], 1>;
+defm : JWriteResYMMPair<WriteCvtPH2PSY,    [JFPU1, JSTC], 3, [2,2], 2>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
+
+defm : X86WriteRes<WriteCvtPS2PH,                 [JFPU1, JSTC], 3, [1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHY,          [JFPU1, JSTC, JFPX], 6, [2,2,2], 3>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
+defm : X86WriteRes<WriteCvtPS2PHSt,        [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector integer operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteRes<WriteVecLoad,          [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadX,         [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadY,         [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadNT,        [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY,       [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecMaskedLoad,    [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
+defm : X86WriteRes<WriteVecMaskedLoadY,   [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
+
+defm : X86WriteRes<WriteVecStore,         [JSAGU, JFPU1,   JSTC], 2, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreX,        [JSAGU, JFPU1,   JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreY,        [JSAGU, JFPU1,   JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreNT,       [JSAGU, JFPU1,   JSTC], 2, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreNTY,      [JSAGU, JFPU1,   JSTC], 2, [2, 2, 2], 1>;
+defm : X86WriteRes<WriteVecMaskedStore,   [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
+defm : X86WriteRes<WriteVecMaskedStoreY,  [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
+
+defm : X86WriteRes<WriteVecMove,          [JFPU01, JVALU], 1, [1, 1], 1>;
+defm : X86WriteRes<WriteVecMoveX,         [JFPU01, JVALU], 1, [1, 1], 1>;
+defm : X86WriteRes<WriteVecMoveY,         [JFPU01, JVALU], 1, [2, 2], 2>;
+defm : X86WriteRes<WriteVecMoveToGpr,     [JFPU0, JFPA, JALU0], 4, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr,   [JFPU01, JFPX], 8, [1, 1], 2>;
+
+defm : JWriteResFpuPair<WriteVecALU,      [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecALUX,     [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecALUY>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
+defm : JWriteResFpuPair<WriteVecShift,    [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecShiftX,   [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
+defm : JWriteResFpuPair<WriteVecShiftImm, [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmY>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
+defm : X86WriteResPairUnsupported<WriteVarVecShift>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
+defm : JWriteResFpuPair<WriteVecIMul,     [JFPU0, JVIMUL], 2>;
+defm : JWriteResFpuPair<WriteVecIMulX,    [JFPU0, JVIMUL], 2>;
+defm : X86WriteResPairUnsupported<WriteVecIMulY>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
+defm : JWriteResFpuPair<WritePMULLD,      [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>;
+defm : X86WriteResPairUnsupported<WritePMULLDY>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
+defm : JWriteResFpuPair<WriteMPSAD,       [JFPU0, JVIMUL], 3, [1, 2]>;
+defm : X86WriteResPairUnsupported<WriteMPSADY>;
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
+defm : JWriteResFpuPair<WritePSADBW,      [JFPU01, JVALU], 2>;
+defm : JWriteResFpuPair<WritePSADBWX,     [JFPU01, JVALU], 2>;
+defm : X86WriteResPairUnsupported<WritePSADBWY>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
+defm : JWriteResFpuPair<WritePHMINPOS,    [JFPU0,  JVALU], 2>;
+defm : JWriteResFpuPair<WriteShuffle,     [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteShuffleX,    [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteShuffleY>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
+defm : JWriteResFpuPair<WriteVarShuffle,  [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : JWriteResFpuPair<WriteVarShuffleX, [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
+defm : JWriteResFpuPair<WriteBlend,       [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteBlendY>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
+defm : JWriteResFpuPair<WriteVarBlend,    [JFPU01, JVALU], 2, [1, 4], 3>;
+defm : X86WriteResPairUnsupported<WriteVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
+defm : JWriteResFpuPair<WriteVecLogic,    [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WriteVecLogicX,   [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WriteVecLogicY>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
+defm : JWriteResFpuPair<WriteVecTest,     [JFPU0, JFPA, JALU0], 3>;
+defm : JWriteResYMMPair<WriteVecTestY,    [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
+defm : X86WriteResPairUnsupported<WriteShuffle256>;
+defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Vector insert/extract operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : X86WriteRes<WriteVecInsert,      [JFPU01, JVALU], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecInsertLd,    [JFPU01, JVALU, JLAGU], 4, [1,1,1], 1>;
+defm : X86WriteRes<WriteVecExtract,     [JFPU0, JFPA, JALU0], 3, [1,1,1], 1>;
+defm : X86WriteRes<WriteVecExtractSt,   [JFPU1, JSTC, JSAGU], 3, [1,1,1], 1>;
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE42 String instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResFpuPair<WritePCmpIStrI, [JFPU1, JVALU1, JFPA, JALU0], 7, [1, 2, 1, 1], 3>;
+defm : JWriteResFpuPair<WritePCmpIStrM, [JFPU1, JVALU1, JFPA, JALU0], 8, [1, 2, 1, 1], 3>;
+defm : JWriteResFpuPair<WritePCmpEStrI, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
+defm : JWriteResFpuPair<WritePCmpEStrM, [JFPU1, JSAGU, JLAGU, JVALU, JVALU1, JFPA, JALU0], 14, [1, 2, 2, 6, 4, 1, 1], 9>;
+
+////////////////////////////////////////////////////////////////////////////////
+// MOVMSK Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def  : WriteRes<WriteFMOVMSK,    [JFPU0, JFPA, JALU0]> { let Latency = 3; }
+def  : WriteRes<WriteVecMOVMSK,  [JFPU0, JFPA, JALU0]> { let Latency = 3; }
+defm : X86WriteResUnsupported<WriteVecMOVMSKY>;
+def  : WriteRes<WriteMMXMOVMSK,  [JFPU0, JFPA, JALU0]> { let Latency = 3; }
+
+////////////////////////////////////////////////////////////////////////////////
+// AES Instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResFpuPair<WriteAESIMC,      [JFPU0, JVIMUL], 2>;
+defm : JWriteResFpuPair<WriteAESKeyGen,   [JFPU0, JVIMUL], 2>;
+defm : JWriteResFpuPair<WriteAESDecEnc,   [JFPU0, JVIMUL], 3, [1, 1], 2>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub  instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResFpuPair<WriteFHAdd,         [JFPU0, JFPA], 3>;
+defm : JWriteResYMMPair<WriteFHAddY,        [JFPU0, JFPA], 3, [2,2], 2>;
+defm : JWriteResFpuPair<WritePHAdd,       [JFPU01, JVALU], 1>;
+defm : JWriteResFpuPair<WritePHAddX,      [JFPU01, JVALU], 1>;
+defm : X86WriteResPairUnsupported<WritePHAddY>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Carry-less multiplication instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : JWriteResFpuPair<WriteCLMul,       [JFPU0, JVIMUL], 2>;
+
+////////////////////////////////////////////////////////////////////////////////
+// SSE4A instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def JWriteINSERTQ: SchedWriteRes<[JFPU01, JVALU]> {
+  let Latency = 2;
+  let ResourceCycles = [1, 4];
+}
+def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
+
+////////////////////////////////////////////////////////////////////////////////
+// AVX instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
+  let Latency = 6;
+  let ResourceCycles = [1, 2, 4];
+  let NumMicroOps = 2;
+}
+def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
+                                                         VBROADCASTSSYrm)>;
+
+def JWriteJVZEROALL: SchedWriteRes<[]> {
+  let Latency = 90;
+  let NumMicroOps = 73;
+}
+def : InstRW<[JWriteJVZEROALL], (instrs VZEROALL)>;
+
+def JWriteJVZEROUPPER: SchedWriteRes<[]> {
+  let Latency = 46;
+  let NumMicroOps = 37;
+}
+def : InstRW<[JWriteJVZEROUPPER], (instrs VZEROUPPER)>;
+
+///////////////////////////////////////////////////////////////////////////////
+//  SchedWriteVariant definitions.
+///////////////////////////////////////////////////////////////////////////////
+
+def JWriteZeroLatency : SchedWriteRes<[]> {
+  let Latency = 0;
+}
+
+// Certain instructions that use the same register for both source
+// operands do not have a real dependency on the previous contents of the
+// register, and thus, do not have to wait before completing. They can be
+// optimized out at register renaming stage.
+// Reference: Section 10.8 of the "Software Optimization Guide for AMD Family
+// 15h Processors".
+// Reference: Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// Section 21.8 [Dependency-breaking instructions].
+
+def JWriteZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteALU]>
+]>;
+def : InstRW<[JWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+                                        XOR32rr, XOR64rr)>;
+
+def JWriteFZeroIdiom : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteFLogic]>
+]>;
+def : InstRW<[JWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, VXORPDrr,
+                                         ANDNPSrr, VANDNPSrr,
+                                         ANDNPDrr, VANDNPDrr)>;
+
+def JWriteVZeroIdiomLogic : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecLogic]>
+]>;
+def : InstRW<[JWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>;
+
+def JWriteVZeroIdiomLogicX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecLogicX]>
+]>;
+def : InstRW<[JWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
+                                               PANDNrr, VPANDNrr)>;
+
+def JWriteVZeroIdiomALU : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecALU]>
+]>;
+def : InstRW<[JWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr,
+                                            MMX_PSUBQirr, MMX_PSUBWirr,
+                                            MMX_PCMPGTBirr, MMX_PCMPGTDirr,
+                                            MMX_PCMPGTWirr)>;
+
+def JWriteVZeroIdiomALUX : SchedWriteVariant<[
+    SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [JWriteZeroLatency]>,
+    SchedVar<MCSchedPredicate<TruePred>,           [WriteVecALUX]>
+]>;
+def : InstRW<[JWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
+                                             PSUBDrr, VPSUBDrr,
+                                             PSUBQrr, VPSUBQrr,
+                                             PSUBWrr, VPSUBWrr,
+                                             PCMPGTBrr, VPCMPGTBrr,
+                                             PCMPGTDrr, VPCMPGTDrr,
+                                             PCMPGTQrr, VPCMPGTQrr,
+                                             PCMPGTWrr, VPCMPGTWrr)>;
+
+// This write is used for slow LEA instructions.
+def JWrite3OpsLEA : SchedWriteRes<[JALU1, JSAGU]> {
+  let Latency = 2;
+}
+
+// On Jaguar, a slow LEA is either a 3Ops LEA (base, index, offset), or an LEA
+// with a `Scale` value different than 1.
+def JSlowLEAPredicate : MCSchedPredicate<
+  CheckAny<[
+    // A 3-operand LEA (base, index, offset).
+    IsThreeOperandsLEAFn,
+    // An LEA with a "Scale" different than 1.
+    CheckAll<[
+      CheckIsImmOperand<2>,
+      CheckNot<CheckImmOperand<2, 1>>
+    ]>
+  ]>
+>;
+
+def JWriteLEA : SchedWriteVariant<[
+    SchedVar<JSlowLEAPredicate,          [JWrite3OpsLEA]>,
+    SchedVar<MCSchedPredicate<TruePred>, [WriteLEA]>
+]>;
+
+def : InstRW<[JWriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
+
+def JSlowLEA16r : SchedWriteRes<[JALU01]> {
+  let Latency = 3;
+  let ResourceCycles = [4];
+}
+
+def : InstRW<[JSlowLEA16r], (instrs LEA16r)>;
+
+} // SchedModel
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleSLM.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleSLM.td
@@ -0,0 +1,486 @@
+//=- X86ScheduleSLM.td - X86 Silvermont Scheduling -----------*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Intel Silvermont to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def SLMModel : SchedMachineModel {
+  // All x86 instructions are modeled as a single micro-op, and SLM can decode 2
+  // instructions per cycle.
+  let IssueWidth = 2;
+  let MicroOpBufferSize = 32; // Based on the reorder buffer.
+  let LoadLatency = 3;
+  let MispredictPenalty = 10;
+  let PostRAScheduler = 1;
+
+  // For small loops, expand by a small factor to hide the backedge cost.
+  let LoopMicroOpBufferSize = 10;
+
+  // FIXME: SSE4 is unimplemented. This flag is set to allow
+  // the scheduler to assign a default model to unrecognized opcodes.
+  let CompleteModel = 0;
+}
+
+let SchedModel = SLMModel in {
+
+// Silvermont has 5 reservation stations for micro-ops
+def SLM_IEC_RSV0 : ProcResource<1>;
+def SLM_IEC_RSV1 : ProcResource<1>;
+def SLM_FPC_RSV0 : ProcResource<1> { let BufferSize = 1; }
+def SLM_FPC_RSV1 : ProcResource<1> { let BufferSize = 1; }
+def SLM_MEC_RSV  : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def SLM_IEC_RSV01  : ProcResGroup<[SLM_IEC_RSV0, SLM_IEC_RSV1]>;
+def SLM_FPC_RSV01  : ProcResGroup<[SLM_FPC_RSV0, SLM_FPC_RSV1]>;
+
+def SLMDivider      : ProcResource<1>;
+def SLMFPMultiplier : ProcResource<1>;
+def SLMFPDivider    : ProcResource<1>;
+
+// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 3>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
+                           list<ProcResourceKind> ExePorts,
+                           int Lat, list<int> Res = [1], int UOps = 1,
+                           int LoadLat = 3> {
+  // Register variant is using a single cycle on ExePort.
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ResourceCycles = Res;
+    let NumMicroOps = UOps;
+  }
+
+  // Memory variant also uses a cycle on MEC_RSV and adds LoadLat cycles to
+  // the latency (default = 3).
+  def : WriteRes<SchedRW.Folded, !listconcat([SLM_MEC_RSV], ExePorts)> {
+    let Latency = !add(Lat, LoadLat);
+    let ResourceCycles = !listconcat([1], Res);
+    let NumMicroOps = UOps;
+  }
+}
+
+// A folded store needs a cycle on MEC_RSV for the store data, but it does not
+// need an extra port cycle to recompute the address.
+def : WriteRes<WriteRMW, [SLM_MEC_RSV]>;
+
+def : WriteRes<WriteStore,   [SLM_IEC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteStoreNT, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteLoad,    [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteMove,    [SLM_IEC_RSV01]>;
+def : WriteRes<WriteZero,    []>;
+
+// Load/store MXCSR.
+// FIXME: These are probably wrong. They are copy pasted from WriteStore/Load.
+def : WriteRes<WriteSTMXCSR, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteLDMXCSR,  [SLM_MEC_RSV]> { let Latency = 3; }
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+defm : SLMWriteResPair<WriteALU,    [SLM_IEC_RSV01], 1>;
+defm : SLMWriteResPair<WriteADC,    [SLM_IEC_RSV01], 1>;
+defm : SLMWriteResPair<WriteIMul,   [SLM_IEC_RSV1],  3>;
+defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1],  3>;
+
+defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
+
+defm : SLMWriteResPair<WriteShift,  [SLM_IEC_RSV0],  1>;
+
+defm : X86WriteRes<WriteSHDrri, [SLM_IEC_RSV0],  1, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[SLM_IEC_RSV0],  1, [1], 1>;
+defm : X86WriteRes<WriteSHDmri, [SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
+defm : X86WriteRes<WriteSHDmrcl,[SLM_MEC_RSV, SLM_IEC_RSV0], 4, [2, 1], 2>;
+
+defm : SLMWriteResPair<WriteJump,   [SLM_IEC_RSV1],  1>;
+defm : SLMWriteResPair<WriteCRC32,  [SLM_IEC_RSV1],  3>;
+
+defm : SLMWriteResPair<WriteCMOV,  [SLM_IEC_RSV01], 2, [2]>;
+defm : SLMWriteResPair<WriteCMOV2, [SLM_IEC_RSV01], 2, [2]>;
+defm : X86WriteRes<WriteFCMOV, [SLM_FPC_RSV1], 3, [1], 1>; // x87 conditional move.
+def  : WriteRes<WriteSETCC, [SLM_IEC_RSV01]>;
+def  : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
+  // FIXME Latency and NumMicrOps?
+  let ResourceCycles = [2,1];
+}
+def  : WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01]>;
+def  : WriteRes<WriteBitTest,[SLM_IEC_RSV01]>;
+
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [SLM_IEC_RSV1]>;
+
+// Bit counts.
+defm : SLMWriteResPair<WriteBSF, [SLM_IEC_RSV01], 10, [20], 10>;
+defm : SLMWriteResPair<WriteBSR, [SLM_IEC_RSV01], 10, [20], 10>;
+defm : SLMWriteResPair<WriteLZCNT,          [SLM_IEC_RSV0], 3>;
+defm : SLMWriteResPair<WriteTZCNT,          [SLM_IEC_RSV0], 3>;
+defm : SLMWriteResPair<WritePOPCNT,         [SLM_IEC_RSV0], 3>;
+
+// BMI1 BEXTR, BMI2 BZHI
+defm : X86WriteResPairUnsupported<WriteBEXTR>;
+defm : X86WriteResPairUnsupported<WriteBZHI>;
+
+defm : SLMWriteResPair<WriteDiv8,   [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteDiv16,  [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteDiv32,  [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteDiv64,  [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteIDiv8,  [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteIDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteIDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteIDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+
+// Scalar and vector floating point.
+defm : X86WriteRes<WriteFLD0,       [SLM_FPC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteFLD1,       [SLM_FPC_RSV01], 1, [1], 1>;
+defm : X86WriteRes<WriteFLDC,       [SLM_FPC_RSV01], 1, [2], 2>;
+def  : WriteRes<WriteFLoad,         [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteFLoadX,        [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteFLoadY,        [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteFMaskedLoad,   [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteFMaskedLoadY,  [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteFStore,        [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFStoreX,       [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFStoreY,       [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFStoreNT,      [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFStoreNTX,     [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFStoreNTY,     [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFMaskedStore,  [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>;
+def  : WriteRes<WriteFMove,         [SLM_FPC_RSV01]>;
+def  : WriteRes<WriteFMoveX,        [SLM_FPC_RSV01]>;
+def  : WriteRes<WriteFMoveY,        [SLM_FPC_RSV01]>;
+defm : X86WriteRes<WriteEMMS,       [SLM_FPC_RSV01], 10, [10], 9>;
+
+defm : SLMWriteResPair<WriteFAdd,     [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAddX,    [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAddY,    [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFAddZ>;
+defm : SLMWriteResPair<WriteFAdd64,   [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAdd64X,  [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAdd64Y,  [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
+defm : SLMWriteResPair<WriteFCmp,     [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmpX,    [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmpY,    [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFCmpZ>;
+defm : SLMWriteResPair<WriteFCmp64,   [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmp64X,  [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFCmp64Y,  [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
+defm : SLMWriteResPair<WriteFCom,     [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFMul,     [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMulX,    [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMulY,    [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : X86WriteResPairUnsupported<WriteFMulZ>;
+defm : SLMWriteResPair<WriteFMul64,   [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMul64X,  [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMul64Y,  [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : X86WriteResPairUnsupported<WriteFMul64Z>;
+defm : SLMWriteResPair<WriteFDiv,     [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
+defm : SLMWriteResPair<WriteFDivX,    [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : SLMWriteResPair<WriteFDivY,    [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : X86WriteResPairUnsupported<WriteFDivZ>;
+defm : SLMWriteResPair<WriteFDiv64,   [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
+defm : SLMWriteResPair<WriteFDiv64X,  [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
+defm : SLMWriteResPair<WriteFDiv64Y,  [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
+defm : SLMWriteResPair<WriteFRcp,     [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRcpX,    [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRcpY,    [SLM_FPC_RSV0], 5>;
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
+defm : SLMWriteResPair<WriteFRsqrt,   [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRsqrtX,  [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRsqrtY,  [SLM_FPC_RSV0], 5>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
+defm : SLMWriteResPair<WriteFSqrt,    [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrtX,   [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrtY,   [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
+defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
+defm : SLMWriteResPair<WriteFSqrt64,  [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
+defm : SLMWriteResPair<WriteFSqrt80,  [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
+defm : SLMWriteResPair<WriteDPPD,   [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteDPPS,   [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteDPPSY,  [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteDPPSZ>;
+defm : SLMWriteResPair<WriteFSign,  [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteFRnd,   [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFRndY,  [SLM_FPC_RSV1], 3>;
+defm : X86WriteResPairUnsupported<WriteFRndZ>;
+defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WriteFLogicZ>;
+defm : SLMWriteResPair<WriteFTest,  [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteFTestY, [SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WriteFTestZ>;
+defm : SLMWriteResPair<WriteFShuffle,  [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFShuffleY, [SLM_FPC_RSV0], 1>;
+defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
+defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
+defm : SLMWriteResPair<WriteFBlend,  [SLM_FPC_RSV0],  1>;
+
+// Conversion between integer and float.
+defm : SLMWriteResPair<WriteCvtSS2I,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPS2I,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPS2IY,  [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
+defm : SLMWriteResPair<WriteCvtSD2I,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPD2I,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPD2IY,  [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
+
+defm : SLMWriteResPair<WriteCvtI2SS,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtI2PS,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtI2PSY,  [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
+defm : SLMWriteResPair<WriteCvtI2SD,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtI2PD,   [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtI2PDY,  [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
+
+defm : SLMWriteResPair<WriteCvtSS2SD,  [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPS2PD,  [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
+defm : SLMWriteResPair<WriteCvtSD2SS,  [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPD2PS,  [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV01], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
+
+// Vector integer operations.
+def  : WriteRes<WriteVecLoad,         [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecLoadX,        [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecLoadY,        [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecLoadNT,       [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecLoadNTY,      [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecMaskedLoad,   [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecMaskedLoadY,  [SLM_MEC_RSV]> { let Latency = 3; }
+def  : WriteRes<WriteVecStore,        [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecStoreX,       [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecStoreY,       [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecStoreNT,      [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecStoreNTY,     [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecMaskedStore,  [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>;
+def  : WriteRes<WriteVecMove,         [SLM_FPC_RSV01]>;
+def  : WriteRes<WriteVecMoveX,        [SLM_FPC_RSV01]>;
+def  : WriteRes<WriteVecMoveY,        [SLM_FPC_RSV01]>;
+def  : WriteRes<WriteVecMoveToGpr,    [SLM_IEC_RSV01]>;
+def  : WriteRes<WriteVecMoveFromGpr,  [SLM_IEC_RSV01]>;
+
+defm : SLMWriteResPair<WriteVecShift,    [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVecShiftX,   [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVecShiftY,   [SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
+defm : SLMWriteResPair<WriteVecShiftImm, [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVecShiftImmX,[SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVecShiftImmY,[SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
+defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteVecLogicX,[SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
+defm : SLMWriteResPair<WriteVecTest,  [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteVecTestY, [SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WriteVecTestZ>;
+defm : SLMWriteResPair<WriteVecALU,   [SLM_FPC_RSV01],  1>;
+defm : SLMWriteResPair<WriteVecALUX,  [SLM_FPC_RSV01],  1>;
+defm : SLMWriteResPair<WriteVecALUY,  [SLM_FPC_RSV01],  1>;
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
+defm : SLMWriteResPair<WriteVecIMul,  [SLM_FPC_RSV0],   4>;
+defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0],   4>;
+defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0],   4>;
+defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
+// FIXME: The below is closer to correct, but caused some perf regressions.
+//defm : SLMWriteResPair<WritePMULLD,  [SLM_FPC_RSV0],   11, [11], 7>;
+defm : SLMWriteResPair<WritePMULLD,  [SLM_FPC_RSV0],   4>;
+defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0],   4>;
+defm : X86WriteResPairUnsupported<WritePMULLDZ>;
+defm : SLMWriteResPair<WriteShuffle,  [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteShuffleZ>;
+defm : SLMWriteResPair<WriteShuffleX, [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVarShuffle,  [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
+defm : SLMWriteResPair<WriteBlend,  [SLM_FPC_RSV0],  1>;
+defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteBlendZ>;
+defm : SLMWriteResPair<WriteMPSAD,  [SLM_FPC_RSV0],  7>;
+defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0],  7>;
+defm : X86WriteResPairUnsupported<WriteMPSADZ>;
+defm : SLMWriteResPair<WritePSADBW,  [SLM_FPC_RSV0],  4>;
+defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0],  4>;
+defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0],  4>;
+defm : X86WriteResPairUnsupported<WritePSADBWZ>;
+defm : SLMWriteResPair<WritePHMINPOS,  [SLM_FPC_RSV0],   4>;
+
+// Vector insert/extract operations.
+defm : SLMWriteResPair<WriteVecInsert, [SLM_FPC_RSV0],  1>;
+
+def  : WriteRes<WriteVecExtract, [SLM_FPC_RSV0]>;
+def  : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1, 2];
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub  instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : SLMWriteResPair<WriteFHAdd,   [SLM_FPC_RSV01], 3, [2]>;
+defm : SLMWriteResPair<WriteFHAddY,  [SLM_FPC_RSV01], 3, [2]>;
+defm : X86WriteResPairUnsupported<WriteFHAddZ>;
+defm : SLMWriteResPair<WritePHAdd,   [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WritePHAddX,  [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WritePHAddY,  [SLM_FPC_RSV01], 1>;
+defm : X86WriteResPairUnsupported<WritePHAddZ>;
+
+// String instructions.
+// Packed Compare Implicit Length Strings, Return Mask
+def : WriteRes<WritePCmpIStrM, [SLM_FPC_RSV0]> {
+  let Latency = 13;
+  let ResourceCycles = [13];
+}
+def : WriteRes<WritePCmpIStrMLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 13;
+  let ResourceCycles = [13, 1];
+}
+
+// Packed Compare Explicit Length Strings, Return Mask
+def : WriteRes<WritePCmpEStrM, [SLM_FPC_RSV0]> {
+  let Latency = 17;
+  let ResourceCycles = [17];
+}
+def : WriteRes<WritePCmpEStrMLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 17;
+  let ResourceCycles = [17, 1];
+}
+
+// Packed Compare Implicit Length Strings, Return Index
+def : WriteRes<WritePCmpIStrI, [SLM_FPC_RSV0]> {
+  let Latency = 17;
+  let ResourceCycles = [17];
+}
+def : WriteRes<WritePCmpIStrILd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 17;
+  let ResourceCycles = [17, 1];
+}
+
+// Packed Compare Explicit Length Strings, Return Index
+def : WriteRes<WritePCmpEStrI, [SLM_FPC_RSV0]> {
+  let Latency = 21;
+  let ResourceCycles = [21];
+}
+def : WriteRes<WritePCmpEStrILd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 21;
+  let ResourceCycles = [21, 1];
+}
+
+// MOVMSK Instructions.
+def : WriteRes<WriteFMOVMSK,    [SLM_FPC_RSV1]> { let Latency = 4; }
+def : WriteRes<WriteVecMOVMSK,  [SLM_FPC_RSV1]> { let Latency = 4; }
+def : WriteRes<WriteVecMOVMSKY, [SLM_FPC_RSV1]> { let Latency = 4; }
+def : WriteRes<WriteMMXMOVMSK,  [SLM_FPC_RSV1]> { let Latency = 4; }
+
+// AES Instructions.
+def : WriteRes<WriteAESDecEnc, [SLM_FPC_RSV0]> {
+  let Latency = 8;
+  let ResourceCycles = [5];
+}
+def : WriteRes<WriteAESDecEncLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 8;
+  let ResourceCycles = [5, 1];
+}
+
+def : WriteRes<WriteAESIMC, [SLM_FPC_RSV0]> {
+  let Latency = 8;
+  let ResourceCycles = [5];
+}
+def : WriteRes<WriteAESIMCLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 8;
+  let ResourceCycles = [5, 1];
+}
+
+def : WriteRes<WriteAESKeyGen, [SLM_FPC_RSV0]> {
+  let Latency = 8;
+  let ResourceCycles = [5];
+}
+def : WriteRes<WriteAESKeyGenLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 8;
+  let ResourceCycles = [5, 1];
+}
+
+// Carry-less multiplication instructions.
+def : WriteRes<WriteCLMul, [SLM_FPC_RSV0]> {
+  let Latency = 10;
+  let ResourceCycles = [10];
+}
+def : WriteRes<WriteCLMulLd, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+  let Latency = 10;
+  let ResourceCycles = [10, 1];
+}
+
+def : WriteRes<WriteSystem,     [SLM_FPC_RSV0]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [SLM_FPC_RSV0]> { let Latency = 100; }
+def : WriteRes<WriteFence, [SLM_MEC_RSV]>;
+def : WriteRes<WriteNop, []>;
+
+// AVX/FMA is not supported on that architecture, but we should define the basic
+// scheduling resources anyway.
+def  : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
+defm : X86WriteResPairUnsupported<WriteFBlendY>;
+defm : X86WriteResPairUnsupported<WriteFBlendZ>;
+defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
+defm : X86WriteResPairUnsupported<WriteVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
+defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
+defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
+defm : X86WriteResPairUnsupported<WriteFShuffle256>;
+defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
+defm : X86WriteResPairUnsupported<WriteShuffle256>;
+defm : X86WriteResPairUnsupported<WriteVarShuffle256>;
+defm : SLMWriteResPair<WriteVarVecShift,  [SLM_FPC_RSV0],  1>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftY>;
+defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
+defm : X86WriteResPairUnsupported<WriteFMA>;
+defm : X86WriteResPairUnsupported<WriteFMAX>;
+defm : X86WriteResPairUnsupported<WriteFMAY>;
+defm : X86WriteResPairUnsupported<WriteFMAZ>;
+
+defm : X86WriteResPairUnsupported<WriteCvtPH2PS>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>;
+defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>;
+defm : X86WriteResUnsupported<WriteCvtPS2PH>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHY>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+
+} // SchedModel
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleZnver1.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86ScheduleZnver1.td
--- a/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86_reduce.td
+++ b/thirdparty/capstone/suite/synctools/tablegen/X86/back/X86_reduce.td
@@ -0,0 +1,459 @@
+//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the Intel i386 architecture, referred
+// to here as the "X86" architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget state
+//
+
+def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
+                                  "64-bit mode (x86_64)">;
+def Mode32Bit : SubtargetFeature<"32bit-mode", "In32BitMode", "true",
+                                  "32-bit mode (80386)">;
+def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
+                                  "16-bit mode (i8086)">;
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget features
+//===----------------------------------------------------------------------===//
+
+def FeatureX87     : SubtargetFeature<"x87","HasX87", "true",
+                                      "Enable X87 float instructions">;
+
+def FeatureNOPL    : SubtargetFeature<"nopl", "HasNOPL", "true",
+                                      "Enable NOPL instruction">;
+
+def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
+                                      "Enable conditional move instructions">;
+
+def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
+                                       "Support POPCNT instruction">;
+
+def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
+                                      "Support fxsave/fxrestore instructions">;
+
+def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
+                                       "Support xsave instructions">;
+
+def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
+                                       "Support xsaveopt instructions">;
+
+def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
+                                       "Support xsavec instructions">;
+
+def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
+                                       "Support xsaves instructions">;
+
+def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
+                                      "Enable SSE instructions",
+                                      // SSE codegen depends on cmovs, and all
+                                      // SSE1+ processors support them.
+                                      [FeatureCMOV]>;
+def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
+                                      "Enable SSE2 instructions",
+                                      [FeatureSSE1]>;
+def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
+                                      "Enable SSE3 instructions",
+                                      [FeatureSSE2]>;
+def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
+                                      "Enable SSSE3 instructions",
+                                      [FeatureSSE3]>;
+def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
+                                      "Enable SSE 4.1 instructions",
+                                      [FeatureSSSE3]>;
+def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
+                                      "Enable SSE 4.2 instructions",
+                                      [FeatureSSE41]>;
+// The MMX subtarget feature is separate from the rest of the SSE features
+// because it's important (for odd compatibility reasons) to be able to
+// turn it off explicitly while allowing SSE+ to be on.
+def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
+                                      "Enable MMX instructions">;
+def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
+                                      "Enable 3DNow! instructions",
+                                      [FeatureMMX]>;
+def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
+                                      "Enable 3DNow! Athlon instructions",
+                                      [Feature3DNow]>;
+// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
+// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
+// without disabling 64-bit mode.
+def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
+                                      "Support 64-bit instructions",
+                                      [FeatureCMOV]>;
+def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
+                                      "64-bit with cmpxchg16b",
+                                      [Feature64Bit]>;
+def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
+                                       "SHLD instruction is slow">;
+def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
+                                        "PMULLD instruction is slow">;
+// FIXME: This should not apply to CPUs that do not have SSE.
+def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
+                                "IsUAMem16Slow", "true",
+                                "Slow unaligned 16-byte memory access">;
+def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
+                                "IsUAMem32Slow", "true",
+                                "Slow unaligned 32-byte memory access">;
+def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
+                                      "Support SSE 4a instructions",
+                                      [FeatureSSE3]>;
+
+def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
+                                      "Enable AVX instructions",
+                                      [FeatureSSE42]>;
+def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
+                                      "Enable AVX2 instructions",
+                                      [FeatureAVX]>;
+def FeatureFMA     : SubtargetFeature<"fma", "HasFMA", "true",
+                                      "Enable three-operand fused multiple-add",
+                                      [FeatureAVX]>;
+def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
+                       "Support 16-bit floating point conversion instructions",
+                       [FeatureAVX]>;
+def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
+                                      "Enable AVX-512 instructions",
+                                      [FeatureAVX2, FeatureFMA, FeatureF16C]>;
+def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
+                      "Enable AVX-512 Exponential and Reciprocal Instructions",
+                                      [FeatureAVX512]>;
+def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
+                      "Enable AVX-512 Conflict Detection Instructions",
+                                      [FeatureAVX512]>;
+def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
+                       "true", "Enable AVX-512 Population Count Instructions",
+                                      [FeatureAVX512]>;
+def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
+                      "Enable AVX-512 PreFetch Instructions",
+                                      [FeatureAVX512]>;
+def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
+                                   "true",
+                                   "Prefetch with Intent to Write and T1 Hint">;
+def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
+                      "Enable AVX-512 Doubleword and Quadword Instructions",
+                                      [FeatureAVX512]>;
+def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
+                      "Enable AVX-512 Byte and Word Instructions",
+                                      [FeatureAVX512]>;
+def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
+                      "Enable AVX-512 Vector Length eXtensions",
+                                      [FeatureAVX512]>;
+def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
+                      "Enable AVX-512 Vector Byte Manipulation Instructions",
+                                      [FeatureBWI]>;
+def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
+                      "Enable AVX-512 further Vector Byte Manipulation Instructions",
+                                      [FeatureBWI]>;
+def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
+                      "Enable AVX-512 Integer Fused Multiple-Add",
+                                      [FeatureAVX512]>;
+def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
+                      "Enable protection keys">;
+def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
+                          "Enable AVX-512 Vector Neural Network Instructions",
+                                      [FeatureAVX512]>;
+def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
+                       "Enable AVX-512 Bit Algorithms",
+                        [FeatureBWI]>;
+def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
+                         "Enable packed carry-less multiplication instructions",
+                               [FeatureSSE2]>;
+def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true",
+                         "Enable Galois Field Arithmetic Instructions",
+                               [FeatureSSE2]>;
+def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",
+                                         "Enable vpclmulqdq instructions",
+                                         [FeatureAVX, FeaturePCLMUL]>;
+def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
+                                      "Enable four-operand fused multiple-add",
+                                      [FeatureAVX, FeatureSSE4A]>;
+def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
+                                      "Enable XOP instructions",
+                                      [FeatureFMA4]>;
+def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
+                                          "HasSSEUnalignedMem", "true",
+                      "Allow unaligned memory operands with SSE instructions">;
+def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
+                                      "Enable AES instructions",
+                                      [FeatureSSE2]>;
+def FeatureVAES    : SubtargetFeature<"vaes", "HasVAES", "true",
+                       "Promote selected AES instructions to AVX512/AVX registers",
+                        [FeatureAVX, FeatureAES]>;
+def FeatureTBM     : SubtargetFeature<"tbm", "HasTBM", "true",
+                                      "Enable TBM instructions">;
+def FeatureLWP     : SubtargetFeature<"lwp", "HasLWP", "true",
+                                      "Enable LWP instructions">;
+def FeatureMOVBE   : SubtargetFeature<"movbe", "HasMOVBE", "true",
+                                      "Support MOVBE instruction">;
+def FeatureRDRAND  : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
+                                      "Support RDRAND instruction">;
+def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
+                                       "Support FS/GS Base instructions">;
+def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
+                                      "Support LZCNT instruction">;
+def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
+                                      "Support BMI instructions">;
+def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
+                                      "Support BMI2 instructions">;
+def FeatureRTM     : SubtargetFeature<"rtm", "HasRTM", "true",
+                                      "Support RTM instructions">;
+def FeatureADX     : SubtargetFeature<"adx", "HasADX", "true",
+                                      "Support ADX instructions">;
+def FeatureSHA     : SubtargetFeature<"sha", "HasSHA", "true",
+                                      "Enable SHA instructions",
+                                      [FeatureSSE2]>;
+def FeatureSHSTK   : SubtargetFeature<"shstk", "HasSHSTK", "true",
+                       "Support CET Shadow-Stack instructions">;
+def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
+                                      "Support PRFCHW instructions">;
+def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
+                                      "Support RDSEED instruction">;
+def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
+                                       "Support LAHF and SAHF instructions">;
+def FeatureMWAITX  : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
+                                      "Enable MONITORX/MWAITX timer functionality">;
+def FeatureCLZERO  : SubtargetFeature<"clzero", "HasCLZERO", "true",
+                                      "Enable Cache Line Zero">;
+def FeatureCLDEMOTE  : SubtargetFeature<"cldemote", "HasCLDEMOTE", "true",
+                                      "Enable Cache Demote">;
+def FeaturePTWRITE  : SubtargetFeature<"ptwrite", "HasPTWRITE", "true",
+                                      "Support ptwrite instruction">;
+def FeatureMPX     : SubtargetFeature<"mpx", "HasMPX", "true",
+                                      "Support MPX instructions">;
+def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
+                                     "Use LEA for adjusting the stack pointer">;
+def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
+                                     "HasSlowDivide32", "true",
+                                     "Use 8-bit divide for positive values less than 256">;
+def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
+                                     "HasSlowDivide64", "true",
+                                     "Use 32-bit divide for positive values less than 2^32">;
+def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
+                                     "PadShortFunctions", "true",
+                                     "Pad short functions">;
+def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
+                                      "Invalidate Process-Context Identifier">;
+def FeatureSGX     : SubtargetFeature<"sgx", "HasSGX", "true",
+                                      "Enable Software Guard Extensions">;
+def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
+                                      "Flush A Cache Line Optimized">;
+def FeatureCLWB    : SubtargetFeature<"clwb", "HasCLWB", "true",
+                                      "Cache Line Write Back">;
+def FeatureWBNOINVD    : SubtargetFeature<"wbnoinvd", "HasWBNOINVD", "true",
+                                      "Write Back No Invalidate">;
+def FeatureRDPID : SubtargetFeature<"rdpid", "HasRDPID", "true",
+                                    "Support RDPID instructions">;
+def FeatureWAITPKG  : SubtargetFeature<"waitpkg", "HasWAITPKG", "true",
+                                      "Wait and pause enhancements">;
+// On some processors, instructions that implicitly take two memory operands are
+// slow. In practice, this means that CALL, PUSH, and POP with memory operands
+// should be avoided in favor of a MOV + register CALL/PUSH/POP.
+def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
+                                     "SlowTwoMemOps", "true",
+                                     "Two memory operand instructions are slow">;
+def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
+                                   "LEA instruction needs inputs at AG stage">;
+def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
+                                   "LEA instruction with certain arguments is slow">;
+def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
+                                   "LEA instruction with 3 ops or certain registers is slow">;
+def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
+                                   "INC and DEC instructions are slower than ADD and SUB">;
+def FeatureSoftFloat
+    : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
+                       "Use software floating point features.">;
+def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
+                                     "HasPOPCNTFalseDeps", "true",
+                                     "POPCNT has a false dependency on dest register">;
+def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
+                                     "HasLZCNTFalseDeps", "true",
+                                     "LZCNT/TZCNT have a false dependency on dest register">;
+def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
+                                      "platform configuration instruction">;
+// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
+// using a variable mask over multiple fixed shuffles.
+def FeatureFastVariableShuffle
+    : SubtargetFeature<"fast-variable-shuffle",
+                       "HasFastVariableShuffle",
+                       "true", "Shuffles with variable masks are fast">;
+// On some X86 processors, there is no performance hazard to writing only the
+// lower parts of a YMM or ZMM register without clearing the upper part.
+def FeatureFastPartialYMMorZMMWrite
+    : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
+                       "HasFastPartialYMMorZMMWrite",
+                       "true", "Partial writes to YMM/ZMM registers are fast">;
+// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
+// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
+// vector FSQRT has higher throughput than the corresponding NR code.
+// The idea is that throughput bound code is likely to be vectorized, so for
+// vectorized code we should care about the throughput of SQRT operations.
+// But if the code is scalar that probably means that the code has some kind of
+// dependency and we should care more about reducing the latency.
+def FeatureFastScalarFSQRT
+    : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
+                       "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
+def FeatureFastVectorFSQRT
+    : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
+                       "true", "Vector SQRT is fast (disable Newton-Raphson)">;
+// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
+// be used to replace test/set sequences.
+def FeatureFastLZCNT
+    : SubtargetFeature<
+          "fast-lzcnt", "HasFastLZCNT", "true",
+          "LZCNT instructions are as fast as most simple integer ops">;
+// If the target can efficiently decode NOPs upto 11-bytes in length.
+def FeatureFast11ByteNOP
+    : SubtargetFeature<
+          "fast-11bytenop", "HasFast11ByteNOP", "true",
+          "Target can quickly decode up to 11 byte NOPs">;
+// If the target can efficiently decode NOPs upto 15-bytes in length.
+def FeatureFast15ByteNOP
+    : SubtargetFeature<
+          "fast-15bytenop", "HasFast15ByteNOP", "true",
+          "Target can quickly decode up to 15 byte NOPs">;
+// Sandy Bridge and newer processors can use SHLD with the same source on both
+// inputs to implement rotate to avoid the partial flag update of the normal
+// rotate instructions.
+def FeatureFastSHLDRotate
+    : SubtargetFeature<
+          "fast-shld-rotate", "HasFastSHLDRotate", "true",
+          "SHLD can be used as a faster rotate">;
+
+// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
+// "string operations"). See "REP String Enhancement" in the Intel Software
+// Development Manual. This feature essentially means that REP MOVSB will copy
+// using the largest available size instead of copying bytes one by one, making
+// it at least as fast as REPMOVS{W,D,Q}.
+def FeatureERMSB
+    : SubtargetFeature<
+          "ermsb", "HasERMSB", "true",
+          "REP MOVS/STOS are fast">;
+
+// Sandy Bridge and newer processors have many instructions that can be
+// fused with conditional branches and pass through the CPU as a single
+// operation.
+def FeatureMacroFusion
+    : SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
+                 "Various instructions can be fused with conditional branches">;
+
+// Gather is available since Haswell (AVX2 set). So technically, we can
+// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
+// Skylake Client processor has faster Gathers than HSW and performance is
+// similar to Skylake Server (AVX-512).
+def FeatureHasFastGather
+    : SubtargetFeature<"fast-gather", "HasFastGather", "true",
+                       "Indicates if gather is reasonably fast.">;
+
+def FeaturePrefer256Bit
+    : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
+                       "Prefer 256-bit AVX instructions">;
+
+// Enable mitigation of some aspects of speculative execution related
+// vulnerabilities by removing speculatable indirect branches. This disables
+// jump-table formation, rewrites explicit `indirectbr` instructions into
+// `switch` instructions, and uses a special construct called a "retpoline" to
+// prevent speculation of the remaining indirect branches (indirect calls and
+// tail calls).
+def FeatureRetpoline
+    : SubtargetFeature<"retpoline", "UseRetpoline", "true",
+                       "Remove speculation of indirect branches from the "
+                       "generated code, either by avoiding them entirely or "
+                       "lowering them with a speculation blocking construct.">;
+
+// Rely on external thunks for the emitted retpoline calls. This allows users
+// to provide their own custom thunk definitions in highly specialized
+// environments such as a kernel that does boot-time hot patching.
+def FeatureRetpolineExternalThunk
+    : SubtargetFeature<
+          "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
+          "Enable retpoline, but with an externally provided thunk.",
+          [FeatureRetpoline]>;
+
+// Direct Move instructions.
+def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
+                                       "Support movdiri instruction">;
+def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
+                                        "Support movdir64b instruction">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "X86RegisterInfo.td"
+include "X86RegisterBanks.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "X86Schedule.td"
+include "X86InstrInfo_reduce.td"
+
+def X86InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Assembly Parser
+//===----------------------------------------------------------------------===//
+
+def ATTAsmParserVariant : AsmParserVariant {
+  int Variant = 0;
+
+  // Variant name.
+  string Name = "att";
+
+  // Discard comments in assembly strings.
+  string CommentDelimiter = "#";
+
+  // Recognize hard coded registers.
+  string RegisterPrefix = "%";
+}
+
+def IntelAsmParserVariant : AsmParserVariant {
+  int Variant = 1;
+
+  // Variant name.
+  string Name = "intel";
+
+  // Discard comments in assembly strings.
+  string CommentDelimiter = ";";
+
+  // Recognize hard coded registers.
+  string RegisterPrefix = "";
+}
+
+//===----------------------------------------------------------------------===//
+// Assembly Printers
+//===----------------------------------------------------------------------===//
+
+// The X86 target supports two different syntaxes for emitting machine code.
+// This is controlled by the -x86-asm-syntax={att|intel}
+def ATTAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "ATTInstPrinter";
+  int Variant = 0;
+}
+def IntelAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "IntelInstPrinter";
+  int Variant = 1;
+}
+
+def X86 : Target {
+  // Information about the instructions...
+  let InstructionSet = X86InstrInfo;
+  let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
+  let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
+  let AllowRegisterRenaming = 1;
+}