From 45a490c24593013e027130d66278c67190250293 Mon Sep 17 00:00:00 2001 From: Skyth <19259897+blueskythlikesclouds@users.noreply.github.com> Date: Fri, 20 Sep 2024 20:53:56 +0300 Subject: [PATCH] Even more inaccuracy fixes. --- PowerRecomp/main.cpp | 79 +++++++++++++++++++++---------------- PowerSample/CMakeLists.txt | 7 +++- thirdparty/disasm/ppc-dis.c | 4 +- 3 files changed, 52 insertions(+), 38 deletions(-) diff --git a/PowerRecomp/main.cpp b/PowerRecomp/main.cpp index f4d59c9..ecb96e9 100644 --- a/PowerRecomp/main.cpp +++ b/PowerRecomp/main.cpp @@ -55,6 +55,7 @@ int main(int argc, char* argv[]) constexpr uint32_t cxxFrameHandler = std::byteswap(0x831B1C90); constexpr uint32_t cSpecificFrameHandler = std::byteswap(0x8324B3BC); + constexpr uint32_t yetAnotherFrameHandler = std::byteswap(0x831C8B50); std::vector functions; auto& pdata = *image.Find(".pdata"); @@ -205,7 +206,7 @@ int main(int argc, char* argv[]) continue; } - if (*(uint32_t*)data == cxxFrameHandler || *(uint32_t*)data == cSpecificFrameHandler) + if (*(uint32_t*)data == cxxFrameHandler || *(uint32_t*)data == cSpecificFrameHandler || *(uint32_t*)data == yetAnotherFrameHandler) { data += 8; base += 8; @@ -345,6 +346,7 @@ int main(int argc, char* argv[]) println("\t__assume((reinterpret_cast(base) & 0xFFFFFFFF) == 0);"); println("\tPPCRegister temp;"); + println("\tPPCVRegister vtemp;"); println("\tuint32_t ea;\n"); auto switchTable = switchTables.end(); @@ -364,7 +366,10 @@ int main(int argc, char* argv[]) if (insn.opcode == nullptr) { println("\t// {}", insn.op_str); - std::println("Unable to decode instruction at 0x{:X}", base - 4); +#if 0 + if (*(data - 1) != 0) + std::println("Unable to decode instruction {:X} at {:X}", *(data - 1), base - 4); +#endif } else { @@ -737,7 +742,6 @@ int main(int argc, char* argv[]) println("\tctx.r{}.s64 = ctx.r{}.s32;", insn.operands[0], insn.operands[1]); break; - // TODO: fpu operations require denormal flushing checks case PPC_INST_FABS: println("\tctx.csr.setFlushMode(false);"); println("\tctx.f{}.f64 = fabs(ctx.f{}.f64);", insn.operands[0], insn.operands[1]); @@ -754,7 +758,6 @@ int main(int argc, char* argv[]) break; case PPC_INST_FCFID: - // TODO: rounding mode? println("\tctx.csr.setFlushMode(false);"); println("\tctx.f{}.f64 = ctx.f{}.s64;", insn.operands[0], insn.operands[1]); break; @@ -765,9 +768,8 @@ int main(int argc, char* argv[]) break; case PPC_INST_FCTID: - // TODO: rounding mode? println("\tctx.csr.setFlushMode(false);"); - println("\tctx.f{}.s64 = ctx.f{}.f64;", insn.operands[0], insn.operands[1]); + println("\tctx.f{}.s64 = round(ctx.f{}.f64);", insn.operands[0], insn.operands[1]); break; case PPC_INST_FCTIDZ: @@ -837,7 +839,7 @@ int main(int argc, char* argv[]) case PPC_INST_FNMADDS: println("\tctx.csr.setFlushMode(false);"); - println("\tctx.f{}.f64 = -float(ctx.f{}.f64 * ctx.f{}.f64 + ctx.f{}.f64);", insn.operands[0], insn.operands[1], insn.operands[2], insn.operands[3]); + println("\tctx.f{}.f64 = float(-(ctx.f{}.f64 * ctx.f{}.f64 + ctx.f{}.f64));", insn.operands[0], insn.operands[1], insn.operands[2], insn.operands[3]); break; case PPC_INST_FNMSUB: @@ -847,7 +849,7 @@ int main(int argc, char* argv[]) case PPC_INST_FNMSUBS: println("\tctx.csr.setFlushMode(false);"); - println("\tctx.f{}.f64 = -float(ctx.f{}.f64 * ctx.f{}.f64 - ctx.f{}.f64);", insn.operands[0], insn.operands[1], insn.operands[2], insn.operands[3]); + println("\tctx.f{}.f64 = float(-(ctx.f{}.f64 * ctx.f{}.f64 - ctx.f{}.f64));", insn.operands[0], insn.operands[1], insn.operands[2], insn.operands[3]); break; case PPC_INST_FRES: @@ -866,6 +868,7 @@ int main(int argc, char* argv[]) break; case PPC_INST_FSQRT: + println("\tctx.csr.setFlushMode(false);"); println("\tctx.f{}.f64 = sqrt(ctx.f{}.f64);", insn.operands[0], insn.operands[1]); break; @@ -1392,7 +1395,7 @@ int main(int argc, char* argv[]) println("\ttemp.f32 = ctx.f{}.f64;", insn.operands[0]); print("\tPPC_STORE_U32("); if (insn.operands[2] != 0) - print("ctx.r{}.u32 +", insn.operands[2]); + print("ctx.r{}.u32 + ", insn.operands[2]); println("{}, temp.u32);", int32_t(insn.operands[1])); break; @@ -1469,7 +1472,7 @@ int main(int argc, char* argv[]) println("ctx.r{}.u32;", insn.operands[2]); println("\tfor (size_t i = 0; i < (ea & 0xF); i++)"); - println("\t\tPPC_STORE_U8((ea & ~0xF) + i, ctx.v{}.u8[15 - ((16 - (ea & 0xF)) + i)]);", insn.operands[0]); + println("\t\tPPC_STORE_U8(ea - i - 1, ctx.v{}.u8[i]);", insn.operands[0]); break; case PPC_INST_STVX: @@ -1539,7 +1542,7 @@ int main(int argc, char* argv[]) case PPC_INST_SUBFE: // TODO: do we need to set the carry flag here? - println("\tctx.r{}.s64 = ~ctx.r{}.u64 + ctx.r{}.u64 + ctx.xer.ca;", insn.operands[0], insn.operands[1], insn.operands[2]); + println("\tctx.r{}.u64 = ~ctx.r{}.u64 + ctx.r{}.u64 + ctx.xer.ca;", insn.operands[0], insn.operands[1], insn.operands[2]); break; case PPC_INST_SUBFIC: @@ -1548,7 +1551,7 @@ int main(int argc, char* argv[]) break; case PPC_INST_SYNC: - // no op? + println("\t__faststorefence();"); break; case PPC_INST_TDLGEI: @@ -1571,7 +1574,6 @@ int main(int argc, char* argv[]) // no op break; - // TODO: vector instructions require denormal flushing checks case PPC_INST_VADDFP: case PPC_INST_VADDFP128: println("\tctx.csr.setFlushMode(true);"); @@ -1626,20 +1628,28 @@ int main(int argc, char* argv[]) case PPC_INST_VCTSXS: case PPC_INST_VCFPSXWS128: println("\tctx.csr.setFlushMode(true);"); - println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_vctsxs(_mm_mul_ps(_mm_load_ps(ctx.v{}.f32), _mm_set1_ps(exp2f({})))));", insn.operands[0], insn.operands[1], insn.operands[2]); + println("\t_mm_store_si128((__m128i*)ctx.v{}.s32, _mm_vctsxs(_mm_mul_ps(_mm_load_ps(ctx.v{}.f32), _mm_set1_ps({}))));", insn.operands[0], insn.operands[1], 1u << insn.operands[2]); break; case PPC_INST_VCFSX: case PPC_INST_VCSXWFP128: + { + const float v = ldexp(1.0f, -int32_t(insn.operands[2])); + println("\tctx.csr.setFlushMode(true);"); - println("\t_mm_store_ps(ctx.v{}.f32, _mm_mul_ps(_mm_cvtepi32_ps(_mm_load_si128((__m128i*)ctx.v{}.u32)), _mm_set1_ps(ldexpf(1.0f, {}))));", insn.operands[0], insn.operands[1], -int32_t(insn.operands[2])); + println("\t_mm_store_ps(ctx.v{}.f32, _mm_mul_ps(_mm_cvtepi32_ps(_mm_load_si128((__m128i*)ctx.v{}.u32)), _mm_castps_si128(_mm_set1_epi32(int(0x{:X})))));", insn.operands[0], insn.operands[1], *reinterpret_cast(&v)); break; + } case PPC_INST_VCFUX: case PPC_INST_VCUXWFP128: + { + const float v = ldexp(1.0f, -int32_t(insn.operands[2])); + println("\tctx.csr.setFlushMode(true);"); - println("\t_mm_store_ps(ctx.v{}.f32, _mm_mul_ps(_mm_cvtepu32_ps_(_mm_load_si128((__m128i*)ctx.v{}.u32)), _mm_set1_ps(ldexpf(1.0f, {}))));", insn.operands[0], insn.operands[1], -int32_t(insn.operands[2])); + println("\t_mm_store_ps(ctx.v{}.f32, _mm_mul_ps(_mm_cvtepu32_ps_(_mm_load_si128((__m128i*)ctx.v{}.u32)), _mm_castps_si128(_mm_set1_epi32(int(0x{:X})))));", insn.operands[0], insn.operands[1], *reinterpret_cast(&v)); break; + } case PPC_INST_VCMPBFP128: println("\t__debugbreak();"); @@ -1726,29 +1736,29 @@ int main(int argc, char* argv[]) break; case PPC_INST_VMRGHB: - println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_unpackhi_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]); + println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_unpackhi_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[2], insn.operands[1]); break; case PPC_INST_VMRGHH: - println("\t_mm_store_si128((__m128i*)ctx.v{}.u16, _mm_unpackhi_epi16(_mm_load_si128((__m128i*)ctx.v{}.u16), _mm_load_si128((__m128i*)ctx.v{}.u16)));", insn.operands[0], insn.operands[1], insn.operands[2]); + println("\t_mm_store_si128((__m128i*)ctx.v{}.u16, _mm_unpackhi_epi16(_mm_load_si128((__m128i*)ctx.v{}.u16), _mm_load_si128((__m128i*)ctx.v{}.u16)));", insn.operands[0], insn.operands[2], insn.operands[1]); break; case PPC_INST_VMRGHW: case PPC_INST_VMRGHW128: - println("\t_mm_store_si128((__m128i*)ctx.v{}.u32, _mm_unpackhi_epi32(_mm_load_si128((__m128i*)ctx.v{}.u32), _mm_load_si128((__m128i*)ctx.v{}.u32)));", insn.operands[0], insn.operands[1], insn.operands[2]); + println("\t_mm_store_si128((__m128i*)ctx.v{}.u32, _mm_unpackhi_epi32(_mm_load_si128((__m128i*)ctx.v{}.u32), _mm_load_si128((__m128i*)ctx.v{}.u32)));", insn.operands[0], insn.operands[2], insn.operands[1]); break; case PPC_INST_VMRGLB: - println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_unpacklo_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2]); + println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_unpacklo_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[2], insn.operands[1]); break; case PPC_INST_VMRGLH: - println("\t_mm_store_si128((__m128i*)ctx.v{}.u16, _mm_unpacklo_epi16(_mm_load_si128((__m128i*)ctx.v{}.u16), _mm_load_si128((__m128i*)ctx.v{}.u16)));", insn.operands[0], insn.operands[1], insn.operands[2]); + println("\t_mm_store_si128((__m128i*)ctx.v{}.u16, _mm_unpacklo_epi16(_mm_load_si128((__m128i*)ctx.v{}.u16), _mm_load_si128((__m128i*)ctx.v{}.u16)));", insn.operands[0], insn.operands[2], insn.operands[1]); break; case PPC_INST_VMRGLW: case PPC_INST_VMRGLW128: - println("\t_mm_store_si128((__m128i*)ctx.v{}.u32, _mm_unpacklo_epi32(_mm_load_si128((__m128i*)ctx.v{}.u32), _mm_load_si128((__m128i*)ctx.v{}.u32)));", insn.operands[0], insn.operands[1], insn.operands[2]); + println("\t_mm_store_si128((__m128i*)ctx.v{}.u32, _mm_unpacklo_epi32(_mm_load_si128((__m128i*)ctx.v{}.u32), _mm_load_si128((__m128i*)ctx.v{}.u32)));", insn.operands[0], insn.operands[2], insn.operands[1]); break; case PPC_INST_VMSUM3FP128: @@ -1808,7 +1818,7 @@ int main(int argc, char* argv[]) for (size_t i = 0; i < 4; i++) { constexpr size_t indices[] = { 3, 0, 1, 2 }; - println("\ttemp.u32 {}= ctx.v{}.u8[{}] << {};", i == 0 ? "" : "|", insn.operands[1], i * 4, indices[i] * 8); + println("\ttemp.u32 {}= uint32_t(ctx.v{}.u8[{}]) << {};", i == 0 ? "" : "|", insn.operands[1], i * 4, indices[i] * 8); } println("\tctx.v{}.u32[3] = temp.u32;", insn.operands[0]); break; @@ -1820,7 +1830,7 @@ int main(int argc, char* argv[]) break; case PPC_INST_VPKSHUS: - println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*)ctx.v{}.s16), _mm_load_si128((__m128i*)ctx.v{}.s16)));", insn.operands[0], insn.operands[1], insn.operands[2]); + println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_packus_epi16(_mm_load_si128((__m128i*)ctx.v{}.s16), _mm_load_si128((__m128i*)ctx.v{}.s16)));", insn.operands[0], insn.operands[2], insn.operands[1]); break; case PPC_INST_VREFP: @@ -1901,11 +1911,8 @@ int main(int argc, char* argv[]) break; case PPC_INST_VSPLTISW: - println("\t_mm_store_si128((__m128i*)ctx.v{}.u32, _mm_set1_epi32(int(0x{:X})));", insn.operands[0], insn.operands[1]); - break; - case PPC_INST_VSPLTISW128: - println("\t_mm_store_si128((__m128i*)ctx.v{}.u32, _mm_set1_epi32(int(0x{:X})));", insn.operands[0], insn.operands[2]); + println("\t_mm_store_si128((__m128i*)ctx.v{}.u32, _mm_set1_epi32(int(0x{:X})));", insn.operands[0], insn.operands[1]); break; case PPC_INST_VSPLTW: @@ -1970,21 +1977,21 @@ int main(int argc, char* argv[]) for (size_t i = 0; i < 4; i++) { constexpr size_t indices[] = { 3, 0, 1, 2 }; - println("\ttemp.f32 = 1.0f;"); - println("\ttemp.u32 |= ctx.v{}.u8[{}];", insn.operands[1], indices[i]); - println("\tctx.v{}.f32[{}] = temp.f32;", insn.operands[0], i); + println("\tvtemp.u32[{}] = ctx.v{}.u8[{}] | 0x3F800000;", i, insn.operands[1], indices[i]); } + println("\tctx.v{} = vtemp;", insn.operands[0]); break; case 1: // 2 shorts for (size_t i = 0; i < 2; i++) { println("\ttemp.f32 = 3.0f;"); - println("\ttemp.s32 += ctx.v{}.s16[{}];", insn.operands[1], i); // TODO: not sure about the indexing here - println("\tctx.v{}.f32[{}] = temp.f32;", insn.operands[0], 3 - i); + //println("\ttemp.s32 += ctx.v{}.s16[{}];", insn.operands[1], i); // TODO: not sure about the indexing here + println("\tvtemp.f32[{}] = temp.f32;", 3 - i); } - println("\tctx.v{}.f32[1] = 0.0f;", insn.operands[0]); - println("\tctx.v{}.f32[0] = 1.0f;", insn.operands[0]); + println("\tvtemp.f32[1] = 0.0f;"); + println("\tvtemp.f32[0] = 1.0f;"); + println("\tctx.v{} = vtemp;", insn.operands[0]); break; default: @@ -2035,12 +2042,14 @@ int main(int argc, char* argv[]) break; } +#if 0 if (strchr(insn.opcode->name, '.')) { int lastLine = out.find_last_of('\n', out.size() - 2); if (out.find("ctx.cr", lastLine + 1) == std::string::npos) std::println("Instruction at {:X} has RC bit enabled but no comparison was generated", base - 4); } +#endif } } diff --git a/PowerSample/CMakeLists.txt b/PowerSample/CMakeLists.txt index 89434f3..c760839 100644 --- a/PowerSample/CMakeLists.txt +++ b/PowerSample/CMakeLists.txt @@ -3,7 +3,12 @@ project("PowerSample") set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_COMPILER "clang-cl") set(CMAKE_C_COMPILER "clang-cl") -add_compile_options("/clang:-march=x86-64-v3") +add_compile_options( + "/clang:-march=x86-64-v3" + "/clang:-Wall" + "/clang:-Wno-unused-label" + "/clang:-Wno-unused-variable" + "/clang:-Wno-c++23-extensions") file(GLOB RecompiledFiles *.cpp) add_library(PowerSample ${RecompiledFiles}) diff --git a/thirdparty/disasm/ppc-dis.c b/thirdparty/disasm/ppc-dis.c index 2d6b3bc..b13bca8 100644 --- a/thirdparty/disasm/ppc-dis.c +++ b/thirdparty/disasm/ppc-dis.c @@ -832,7 +832,7 @@ const struct powerpc_operand powerpc_operands[] = /* The VC128 field in a VA, VX, VXR or X form instruction. */ #define VC128 VB128 + 1 #define VC128_MASK (0x1f << 21) - { 3, 6, NULL, NULL, PPC_OPERAND_VR }, + { 7, 6, NULL, NULL, PPC_OPERAND_VR }, /* The VPERM field in a VPERM128 form instruction. */ #define VPERM128 VC128 + 1 @@ -2521,7 +2521,7 @@ const struct powerpc_opcode powerpc_opcodes[] = { { "vlogefp128", VX128_3(6, 1776), VX128_3_MASK, PPCVEC128, { VD128, VB128 }, PPC_INST_VLOGEFP128 }, { "vrlimi128", VX128_4(6, 1808), VX128_4_MASK, PPCVEC128, { VD128, VB128, UIMM, VD3D2 }, PPC_INST_VRLIMI128 }, { "vspltw128", VX128_3(6, 1840), VX128_3_MASK, PPCVEC128, { VD128, VB128, UIMM }, PPC_INST_VSPLTW128 }, -{ "vspltisw128", VX128_3(6, 1904), VX128_3_MASK, PPCVEC128, { VD128, VB128, SIMM }, PPC_INST_VSPLTISW128 }, +{ "vspltisw128", VX128_3(6, 1904), VX128_3_MASK, PPCVEC128, { VD128, SIMM }, PPC_INST_VSPLTISW128 }, { "vupkd3d128", VX128_3(6, 2032), VX128_3_MASK, PPCVEC128, { VD128, VB128, UIMM }, PPC_INST_VUPKD3D128 }, { "vcmpeqfp128", VX128(6, 0), VX128_MASK, PPCVEC128, { VD128, VA128, VB128 }, PPC_INST_VCMPEQFP128 }, { "vcmpeqfp128.", VX128(6, 64), VX128_MASK, PPCVEC128, { VD128, VA128, VB128 }, PPC_INST_VCMPEQFP128 },