From 06aeb62472e38ef17f96d5bfdc0479910e7b55db Mon Sep 17 00:00:00 2001 From: Skyth <19259897+blueskythlikesclouds@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:19:20 +0300 Subject: [PATCH] Add a sample project for compiling the recompiled functions. --- CMakeLists.txt | 1 + PowerRecomp/main.cpp | 34 +++++++++++++++------------------- PowerSample/.gitignore | 1 + PowerSample/CMakeLists.txt | 13 +++++++++++++ PowerSample/main.cpp | 0 PowerUtils/ppc_context.h | 6 +++--- 6 files changed, 33 insertions(+), 22 deletions(-) create mode 100644 PowerSample/.gitignore create mode 100644 PowerSample/CMakeLists.txt create mode 100644 PowerSample/main.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2da5e35..8594d15 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,3 +25,4 @@ project ("PowerRecomp-ALL") add_subdirectory(PowerAnalyse) add_subdirectory(PowerRecomp) add_subdirectory(PowerUtils) +add_subdirectory(PowerSample) diff --git a/PowerRecomp/main.cpp b/PowerRecomp/main.cpp index 5303460..1912e9c 100644 --- a/PowerRecomp/main.cpp +++ b/PowerRecomp/main.cpp @@ -10,8 +10,6 @@ #include #include -#define TEST_FILE "private/default.xex" - static uint64_t computeMask(uint32_t mstart, uint32_t mstop) { mstart &= 0x3F; @@ -20,9 +18,13 @@ static uint64_t computeMask(uint32_t mstart, uint32_t mstop) return mstart <= mstop ? value : ~value; } -int main() +// argv 1: xex file path +// argv 2: switches toml file path +// argv 3: output directory path + +int main(int argc, char* argv[]) { - const auto file = LoadFile(TEST_FILE).value(); + const auto file = LoadFile(argv[1]).value(); auto image = Image::ParseImage(file.data(), file.size()).value(); std::println("Loading switch tables..."); @@ -35,7 +37,7 @@ int main() std::unordered_map switchTables; - toml::table toml = toml::parse_file("out/switches.toml"); + toml::table toml = toml::parse_file(argv[2]); for (auto& entry : *toml["switch"].as_array()) { auto& table = *entry.as_table(); @@ -50,14 +52,8 @@ int main() std::println("Analysing functions..."); - uint32_t cxxFrameHandler = std::byteswap(0x831B1C90); - uint32_t cSpecificFrameHandler = std::byteswap(0x8324B3BC); - image.symbols.emplace("__CxxFrameHandler", 0x831B1C90, 0x38, Symbol_Function); - image.symbols.emplace("__C_specific_handler", 0x8324B3BC, 0x38, Symbol_Function); - image.symbols.emplace("__memcpy", 0x831B0ED0, 0x488, Symbol_Function); - image.symbols.emplace("__memset", 0x831B0BA0, 0xA0, Symbol_Function); - image.symbols.emplace("__blkmov", 0x831B1358, 0xA8, Symbol_Function); - image.symbols.emplace(std::format("sub_{:X}", 0x82EF5D78), 0x82EF5D78, 0x3F8, Symbol_Function); + constexpr uint32_t cxxFrameHandler = std::byteswap(0x831B1C90); + constexpr uint32_t cSpecificFrameHandler = std::byteswap(0x8324B3BC); std::vector functions; auto& pdata = *image.Find(".pdata"); @@ -146,7 +142,7 @@ int main() { if (name.empty()) { - name = std::format("out/ppc_recomp.{}.cpp", cppFileIndex); + name = std::format("{}/ppc_recomp.{}.cpp", argv[3], cppFileIndex); ++cppFileIndex; } @@ -165,7 +161,7 @@ int main() for (auto& symbol : image.symbols) println("PPC_FUNC void {}(PPCContext& __restrict ctx, uint8_t* base);", symbol.name); - saveFile("out/ppc_recomp_shared.h"); + saveFile(std::format("{}/ppc_recomp_shared.h", argv[3])); } { @@ -178,12 +174,12 @@ int main() println("\t{{ 0, nullptr }}"); println("}};"); - saveFile("out/ppc_func_mapping.cpp"); + saveFile(std::format("{}/ppc_func_mapping.cpp", argv[3])); } for (size_t funcIdx = 0; funcIdx < functions.size(); funcIdx++) { - if ((funcIdx % 1000) == 0) + if ((funcIdx % 100) == 0) { std::println("Recompiling functions... {}%", static_cast(funcIdx) / functions.size() * 100.0f); @@ -1417,7 +1413,7 @@ int main() case PPC_INST_VCFUX: case PPC_INST_VCUXWFP128: - println("\t_mm_store_ps(ctx.v{}.f32, _mm_mul_ps(_mm_cvtepu32_ps(_mm_load_si128((__m128i*)ctx.v{}.u32)), _mm_set1_ps(ldexpf(1.0f, {}))));", insn.operands[0], insn.operands[1], -int32_t(insn.operands[2])); + println("\t_mm_store_ps(ctx.v{}.f32, _mm_mul_ps(_mm_cvtepu32_ps_(_mm_load_si128((__m128i*)ctx.v{}.u32)), _mm_set1_ps(ldexpf(1.0f, {}))));", insn.operands[0], insn.operands[1], -int32_t(insn.operands[2])); break; case PPC_INST_VCMPBFP128: @@ -1552,7 +1548,7 @@ int main() case PPC_INST_VPERM: case PPC_INST_VPERM128: - println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_perm_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2], insn.operands[3]); + println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_perm_epi8_(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)ctx.v{}.u8)));", insn.operands[0], insn.operands[1], insn.operands[2], insn.operands[3]); break; case PPC_INST_VPERMWI128: diff --git a/PowerSample/.gitignore b/PowerSample/.gitignore new file mode 100644 index 0000000..1d6f231 --- /dev/null +++ b/PowerSample/.gitignore @@ -0,0 +1 @@ +ppc_* \ No newline at end of file diff --git a/PowerSample/CMakeLists.txt b/PowerSample/CMakeLists.txt new file mode 100644 index 0000000..b85914d --- /dev/null +++ b/PowerSample/CMakeLists.txt @@ -0,0 +1,13 @@ +project("PowerSample") + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_COMPILER "clang-cl") +set(CMAKE_C_COMPILER "clang-cl") +add_compile_options("-march=x86-64-v3") + +file(GLOB RecompiledFiles *.cpp) +add_library(PowerSample SHARED ${RecompiledFiles}) + +target_precompile_headers(PowerSample PUBLIC "ppc_recomp_shared.h") + +target_link_libraries(PowerSample PUBLIC PowerUtils) diff --git a/PowerSample/main.cpp b/PowerSample/main.cpp new file mode 100644 index 0000000..e69de29 diff --git a/PowerUtils/ppc_context.h b/PowerUtils/ppc_context.h index 80191a6..e69feb2 100644 --- a/PowerUtils/ppc_context.h +++ b/PowerUtils/ppc_context.h @@ -13,7 +13,7 @@ #define isnan __builtin_isnan #define __assume __builtin_assume #define __unreachable() __builtin_unreachable() -#define PPC_FUNC __attribute__((weak,noinline)) +#define PPC_FUNC __attribute__((noinline)) #else #include #define PPC_FUNC __declspec(noinline) @@ -466,7 +466,7 @@ inline __m128i _mm_avg_epi16(__m128i a, __m128i b) return _mm_add_epi16(c, _mm_avg_epu16(_mm_add_epi16(c, a), _mm_add_epi16(c, b))); } -inline __m128 _mm_cvtepu32_ps(__m128i v) +inline __m128 _mm_cvtepu32_ps_(__m128i v) { __m128i v2 = _mm_srli_epi32(v, 1); __m128i v1 = _mm_sub_epi32(v, v2); @@ -475,7 +475,7 @@ inline __m128 _mm_cvtepu32_ps(__m128i v) return _mm_add_ps(v2f, v1f); } -inline __m128i _mm_perm_epi8(__m128i a, __m128i b, __m128i c) +inline __m128i _mm_perm_epi8_(__m128i a, __m128i b, __m128i c) { __m128i d = _mm_set1_epi8(0xF); __m128i e = _mm_sub_epi8(d, _mm_and_si128(c, d));