From a5d63829753be07e845a5546c69720259a5e8bde Mon Sep 17 00:00:00 2001 From: DeaTh-G <55578911+DeaTh-G@users.noreply.github.com> Date: Sun, 3 Nov 2024 21:27:33 +0100 Subject: [PATCH] add remaining altivec instructions --- XenonRecomp/recompiler.cpp | 18 ++++++++++++++++++ XenonUtils/ppc_context.h | 13 +++++++++++++ 2 files changed, 31 insertions(+) diff --git a/XenonRecomp/recompiler.cpp b/XenonRecomp/recompiler.cpp index c4ab869..801d80f 100644 --- a/XenonRecomp/recompiler.cpp +++ b/XenonRecomp/recompiler.cpp @@ -1986,6 +1986,16 @@ bool Recompiler::Recompile( println("_mm_load_ps({}.f32)));", v(insn.operands[1])); break; + case PPC_INST_VCTUXS: + case PPC_INST_VCFPUXWS128: + printSetFlushMode(true); + print("\t_mm_store_si128((__m128i*){}.u32, _mm_vctuxs(", v(insn.operands[0])); + if (insn.operands[2] != 0) + println("_mm_mul_ps(_mm_load_ps({}.f32), _mm_set1_ps({}))));", v(insn.operands[1]), 1u << insn.operands[2]); + else + println("_mm_load_ps({}.f32)));", v(insn.operands[1])); + break; + case PPC_INST_VCFSX: case PPC_INST_VCSXWFP128: { @@ -2325,6 +2335,14 @@ bool Recompiler::Recompile( break; } + case PPC_INST_VRLH: + for (size_t i = 0; i < 8; i++) + { + println("\t{0}.u16[{1}] = ({2}.u16[{1}] << ({3}.u16[{1}] & 0xF)) | ({2}.u16[{1}] >> (16 - ({3}.u16[{1}] & 0xF)));", vTemp(), i, v(insn.operands[1]), v(insn.operands[2])); + } + println("{} = {};", v(insn.operands[0]), vTemp()); + break; + case PPC_INST_VRSQRTEFP: case PPC_INST_VRSQRTEFP128: // TODO: see if we can use rsqrt safely diff --git a/XenonUtils/ppc_context.h b/XenonUtils/ppc_context.h index c1091d1..d7ddbd5 100644 --- a/XenonUtils/ppc_context.h +++ b/XenonUtils/ppc_context.h @@ -645,6 +645,19 @@ inline __m128i _mm_vctsxs(__m128 src1) return _mm_andnot_si128(_mm_castps_si128(xmm2), _mm_castps_si128(dest)); } +inline __m128i _mm_vctuxs(__m128 src1) +{ + __m128 xmm0 = _mm_max_ps(src1, _mm_set1_epi32(0)); + __m128 xmm1 = _mm_cmpge_ps(xmm0, _mm_set1_ps((float)0x80000000)); + __m128 xmm2 = _mm_sub_ps(xmm0, _mm_set1_ps((float)0x80000000)); + xmm0 = _mm_blendv_ps(xmm0, xmm2, xmm1); + __m128i dest = _mm_cvttps_epi32(xmm0); + xmm0 = _mm_cmpeq_epi32(dest, _mm_set1_epi32(INT_MIN)); + xmm1 = _mm_and_si128(xmm1, _mm_set1_epi32(INT_MIN)); + dest = _mm_add_epi32(dest, xmm1); + return _mm_or_si128(dest, xmm0); +} + inline __m128i _mm_vsr(__m128i a, __m128i b) { b = _mm_srli_epi64(_mm_slli_epi64(b, 61), 61);