diff --git a/PowerRecomp/recompiler.cpp b/PowerRecomp/recompiler.cpp index b8eab36..30a1aff 100644 --- a/PowerRecomp/recompiler.cpp +++ b/PowerRecomp/recompiler.cpp @@ -560,7 +560,7 @@ bool Recompiler::Recompile( case PPC_INST_FADDS: printSetFlushMode(false); - println("\t{}.f64 = float({}.f64 + {}.f64);", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2])); + println("\t{}.f64 = double(float({}.f64 + {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2])); break; case PPC_INST_FCFID: @@ -575,17 +575,17 @@ bool Recompiler::Recompile( case PPC_INST_FCTID: printSetFlushMode(false); - println("\t{}.s64 = ({}.f64 > double(LLONG_MAX)) ? LLONG_MAX : _mm_cvtsd_si64(_mm_load1_pd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1])); + println("\t{}.s64 = ({}.f64 > double(LLONG_MAX)) ? LLONG_MAX : _mm_cvtsd_si64(_mm_load_sd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1])); break; case PPC_INST_FCTIDZ: printSetFlushMode(false); - println("\t{}.s64 = ({}.f64 > double(LLONG_MAX)) ? LLONG_MAX : _mm_cvttsd_si64(_mm_load1_pd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1])); + println("\t{}.s64 = ({}.f64 > double(LLONG_MAX)) ? LLONG_MAX : _mm_cvttsd_si64(_mm_load_sd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1])); break; case PPC_INST_FCTIWZ: printSetFlushMode(false); - println("\t{}.s64 = ({}.f64 > double(INT_MAX)) ? INT_MAX : _mm_cvttsd_si32(_mm_load1_pd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1])); + println("\t{}.s64 = ({}.f64 > double(INT_MAX)) ? INT_MAX : _mm_cvttsd_si32(_mm_load_sd(&{}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[1])); break; case PPC_INST_FDIV: @@ -595,7 +595,7 @@ bool Recompiler::Recompile( case PPC_INST_FDIVS: printSetFlushMode(false); - println("\t{}.f64 = float({}.f64 / {}.f64);", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2])); + println("\t{}.f64 = double(float({}.f64 / {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2])); break; case PPC_INST_FMADD: @@ -605,7 +605,7 @@ bool Recompiler::Recompile( case PPC_INST_FMADDS: printSetFlushMode(false); - println("\t{}.f64 = float({}.f64 * {}.f64 + {}.f64);", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3])); + println("\t{}.f64 = double(float({}.f64 * {}.f64 + {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3])); break; case PPC_INST_FMR: @@ -620,7 +620,7 @@ bool Recompiler::Recompile( case PPC_INST_FMSUBS: printSetFlushMode(false); - println("\t{}.f64 = float({}.f64 * {}.f64 - {}.f64);", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3])); + println("\t{}.f64 = double(float({}.f64 * {}.f64 - {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3])); break; case PPC_INST_FMUL: @@ -630,7 +630,7 @@ bool Recompiler::Recompile( case PPC_INST_FMULS: printSetFlushMode(false); - println("\t{}.f64 = float({}.f64 * {}.f64);", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2])); + println("\t{}.f64 = double(float({}.f64 * {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2])); break; case PPC_INST_FNABS: @@ -645,7 +645,7 @@ bool Recompiler::Recompile( case PPC_INST_FNMADDS: printSetFlushMode(false); - println("\t{}.f64 = float(-({}.f64 * {}.f64 + {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3])); + println("\t{}.f64 = double(float(-({}.f64 * {}.f64 + {}.f64)));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3])); break; case PPC_INST_FNMSUB: @@ -655,7 +655,7 @@ bool Recompiler::Recompile( case PPC_INST_FNMSUBS: printSetFlushMode(false); - println("\t{}.f64 = float(-({}.f64 * {}.f64 - {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3])); + println("\t{}.f64 = double(float(-({}.f64 * {}.f64 - {}.f64)));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2]), f(insn.operands[3])); break; case PPC_INST_FRES: @@ -665,7 +665,7 @@ bool Recompiler::Recompile( case PPC_INST_FRSP: printSetFlushMode(false); - println("\t{}.f64 = float({}.f64);", f(insn.operands[0]), f(insn.operands[1])); + println("\t{}.f64 = double(float({}.f64));", f(insn.operands[0]), f(insn.operands[1])); break; case PPC_INST_FSEL: @@ -680,7 +680,7 @@ bool Recompiler::Recompile( case PPC_INST_FSQRTS: printSetFlushMode(false); - println("\t{}.f64 = float(sqrt({}.f64));", f(insn.operands[0]), f(insn.operands[1])); + println("\t{}.f64 = double(float(sqrt({}.f64)));", f(insn.operands[0]), f(insn.operands[1])); break; case PPC_INST_FSUB: @@ -690,7 +690,7 @@ bool Recompiler::Recompile( case PPC_INST_FSUBS: printSetFlushMode(false); - println("\t{}.f64 = float({}.f64 - {}.f64);", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2])); + println("\t{}.f64 = double(float({}.f64 - {}.f64));", f(insn.operands[0]), f(insn.operands[1]), f(insn.operands[2])); break; case PPC_INST_LBZ: @@ -763,7 +763,7 @@ bool Recompiler::Recompile( if (insn.operands[2] != 0) print("{}.u32 + ", r(insn.operands[2])); println("{});", int32_t(insn.operands[1])); - println("\t{}.f64 = {}.f32;", f(insn.operands[0]), temp()); + println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp()); break; case PPC_INST_LFSX: @@ -772,7 +772,7 @@ bool Recompiler::Recompile( if (insn.operands[1] != 0) print("{}.u32 + ", r(insn.operands[1])); println("{}.u32);", r(insn.operands[2])); - println("\t{}.f64 = {}.f32;", f(insn.operands[0]), temp()); + println("\t{}.f64 = double({}.f32);", f(insn.operands[0]), temp()); break; case PPC_INST_LHA: @@ -1227,7 +1227,7 @@ bool Recompiler::Recompile( case PPC_INST_STFS: printSetFlushMode(false); - println("\t{}.f32 = {}.f64;", temp(), f(insn.operands[0])); + println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0])); print("\tPPC_STORE_U32("); if (insn.operands[2] != 0) print("{}.u32 + ", r(insn.operands[2])); @@ -1236,7 +1236,7 @@ bool Recompiler::Recompile( case PPC_INST_STFSX: printSetFlushMode(false); - println("\t{}.f32 = {}.f64;", temp(), f(insn.operands[0])); + println("\t{}.f32 = float({}.f64);", temp(), f(insn.operands[0])); print("\tPPC_STORE_U32("); if (insn.operands[1] != 0) print("{}.u32 + ", r(insn.operands[1])); diff --git a/PowerUtils/ppc_context.h b/PowerUtils/ppc_context.h index ef02bb8..5e68e0d 100644 --- a/PowerUtils/ppc_context.h +++ b/PowerUtils/ppc_context.h @@ -27,8 +27,7 @@ #define PPC_FUNC(x) extern "C" PPC_NOINLINE void x(PPCContext& __restrict__ ctx, uint8_t* base) -#define PPC_FUNC_PROLOGUE() \ - __builtin_assume(((size_t)base & 0xFFFFFFFF) == 0); \ +#define PPC_FUNC_PROLOGUE() __builtin_assume(((size_t)base & 0xFFFFFFFF) == 0) #define PPC_LOAD_U8(x) *(uint8_t*)(base + (x)) #define PPC_LOAD_U16(x) __builtin_bswap16(*(uint16_t*)(base + (x))) @@ -141,20 +140,29 @@ struct alignas(0x10) PPCVRegister }; }; +#define PPC_ROUND_NEAREST 0x00 +#define PPC_ROUND_TOWARD_ZERO 0x01 +#define PPC_ROUND_UP 0x02 +#define PPC_ROUND_DOWN 0x03 +#define PPC_ROUND_MASK 0x03 + struct PPCFPSCRRegister { uint32_t csr; + static constexpr size_t GuestToHost[] = { _MM_ROUND_NEAREST, _MM_ROUND_TOWARD_ZERO, _MM_ROUND_UP, _MM_ROUND_DOWN }; + static constexpr size_t HostToGuest[] = { PPC_ROUND_NEAREST, PPC_ROUND_DOWN, PPC_ROUND_UP, PPC_ROUND_TOWARD_ZERO }; + inline uint32_t loadFromHost() noexcept { csr = _mm_getcsr(); - return (0x6C >> ((csr & _MM_ROUND_MASK) >> 12)) & 3; + return HostToGuest[(csr & _MM_ROUND_MASK) >> 13]; } inline void storeFromGuest(uint32_t value) noexcept { csr &= ~_MM_ROUND_MASK; - csr |= ((0x6C >> (2 * (value & 3))) & 3) << 13; + csr |= GuestToHost[value & PPC_ROUND_MASK]; _mm_setcsr(csr); }