mirror of
				https://github.com/hedge-dev/XenonRecomp.git
				synced 2025-11-04 06:47:09 +00:00 
			
		
		
		
	Some vector load & store instructions.
This commit is contained in:
		@@ -138,6 +138,7 @@ int main()
 | 
				
			|||||||
            println("PPC_FUNC void sub_{:X}(PPCContext& __restrict ctx, uint8_t* base) {{", fn.base);
 | 
					            println("PPC_FUNC void sub_{:X}(PPCContext& __restrict ctx, uint8_t* base) {{", fn.base);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        println("\t__assume((reinterpret_cast<size_t>(base) & 0xFFFFFFFF) == 0);");
 | 
				
			||||||
        println("\tPPCRegister temp;");
 | 
					        println("\tPPCRegister temp;");
 | 
				
			||||||
        println("\tuint32_t ea;\n");
 | 
					        println("\tuint32_t ea;\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -714,19 +715,46 @@ int main()
 | 
				
			|||||||
                case PPC_INST_LVEWX128:
 | 
					                case PPC_INST_LVEWX128:
 | 
				
			||||||
                case PPC_INST_LVX:
 | 
					                case PPC_INST_LVX:
 | 
				
			||||||
                case PPC_INST_LVX128:
 | 
					                case PPC_INST_LVX128:
 | 
				
			||||||
                    // TODO: endian swap
 | 
					                    // NOTE: for endian swapping, we reverse the whole vector instead of individual elements.
 | 
				
			||||||
                    print("\t_mm_store_ps(ctx.v{}.f32, _mm_load_ps(reinterpret_cast<float*>(base + ", insn.operands[0]);
 | 
					                    // this is accounted for in every instruction (eg. dp3 sums yzw instead of xyz)
 | 
				
			||||||
 | 
					                    print("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_shuffle_epi8(_mm_load_si128((__m128i*)(base + ((", insn.operands[0]);
 | 
				
			||||||
                    if (insn.operands[1] != 0)
 | 
					                    if (insn.operands[1] != 0)
 | 
				
			||||||
                        print("ctx.r{}.u32 + ", insn.operands[1]);
 | 
					                        print("ctx.r{}.u32 + ", insn.operands[1]);
 | 
				
			||||||
                    println("ctx.r{}.u32)));", insn.operands[2]);
 | 
					                    println("ctx.r{}.u32) & ~0xF))), _mm_load_si128((__m128i*)VectorMaskL)));", insn.operands[2]);
 | 
				
			||||||
                    break;
 | 
					                    break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                case PPC_INST_LVLX:
 | 
					                case PPC_INST_LVLX:
 | 
				
			||||||
                case PPC_INST_LVLX128:
 | 
					                case PPC_INST_LVLX128:
 | 
				
			||||||
 | 
					                    print("\ttemp.u32 = ");
 | 
				
			||||||
 | 
					                    if (insn.operands[1] != 0)
 | 
				
			||||||
 | 
					                        print("ctx.r{}.u32 + ", insn.operands[1]);
 | 
				
			||||||
 | 
					                    println("ctx.r{}.u32;", insn.operands[2]);
 | 
				
			||||||
 | 
					                    println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_shuffle_epi8(_mm_load_si128((__m128i*)(base + (temp.u32 & ~0xF))), _mm_load_si128((__m128i*)&VectorMaskL[(temp.u32 & 0xF) * 16])));", insn.operands[0]);
 | 
				
			||||||
 | 
					                    break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                case PPC_INST_LVRX:
 | 
					                case PPC_INST_LVRX:
 | 
				
			||||||
                case PPC_INST_LVRX128:
 | 
					                case PPC_INST_LVRX128:
 | 
				
			||||||
 | 
					                    print("\ttemp.u32 = ");
 | 
				
			||||||
 | 
					                    if (insn.operands[1] != 0)
 | 
				
			||||||
 | 
					                        print("ctx.r{}.u32 + ", insn.operands[1]);
 | 
				
			||||||
 | 
					                    println("ctx.r{}.u32;", insn.operands[2]);
 | 
				
			||||||
 | 
					                    println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, temp.u32 & 0xF ? _mm_shuffle_epi8(_mm_load_si128((__m128i*)(base + (temp.u32 & ~0xF))), _mm_load_si128((__m128i*)&VectorMaskR[(temp.u32 & 0xF) * 16])) : _mm_setzero_si128());", insn.operands[0]);
 | 
				
			||||||
 | 
					                    break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                case PPC_INST_LVSL:
 | 
					                case PPC_INST_LVSL:
 | 
				
			||||||
 | 
					                    print("\ttemp.u32 = ");
 | 
				
			||||||
 | 
					                    if (insn.operands[1] != 0)
 | 
				
			||||||
 | 
					                        print("ctx.r{}.u32 + ", insn.operands[1]);
 | 
				
			||||||
 | 
					                    println("ctx.r{}.u32;", insn.operands[2]);
 | 
				
			||||||
 | 
					                    println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_load_si128((__m128i*)&VectorShiftTableL[(temp.u32 & 0xF) * 16]));", insn.operands[0]);
 | 
				
			||||||
 | 
					                    break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                case PPC_INST_LVSR:
 | 
					                case PPC_INST_LVSR:
 | 
				
			||||||
 | 
					                    print("\ttemp.u32 = ");
 | 
				
			||||||
 | 
					                    if (insn.operands[1] != 0)
 | 
				
			||||||
 | 
					                        print("ctx.r{}.u32 + ", insn.operands[1]);
 | 
				
			||||||
 | 
					                    println("ctx.r{}.u32;", insn.operands[2]);
 | 
				
			||||||
 | 
					                    println("\t_mm_store_si128((__m128i*)ctx.v{}.u8, _mm_load_si128((__m128i*)&VectorShiftTableR[(temp.u32 & 0xF) * 16]));", insn.operands[0]);
 | 
				
			||||||
                    break;
 | 
					                    break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                case PPC_INST_LWA:
 | 
					                case PPC_INST_LWA:
 | 
				
			||||||
@@ -1115,8 +1143,14 @@ int main()
 | 
				
			|||||||
                case PPC_INST_STVLX128:
 | 
					                case PPC_INST_STVLX128:
 | 
				
			||||||
                case PPC_INST_STVRX:
 | 
					                case PPC_INST_STVRX:
 | 
				
			||||||
                case PPC_INST_STVRX128:
 | 
					                case PPC_INST_STVRX128:
 | 
				
			||||||
 | 
					                    break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                case PPC_INST_STVX:
 | 
					                case PPC_INST_STVX:
 | 
				
			||||||
                case PPC_INST_STVX128:
 | 
					                case PPC_INST_STVX128:
 | 
				
			||||||
 | 
					                    print("\t_mm_store_si128((__m128i*)(base + ((");
 | 
				
			||||||
 | 
					                    if (insn.operands[1] != 0)
 | 
				
			||||||
 | 
					                        print("ctx.r{}.u32 + ", insn.operands[1]);
 | 
				
			||||||
 | 
					                    println("ctx.r{}.u32) & ~0xF)), _mm_shuffle_epi8(_mm_load_si128((__m128i*)ctx.v{}.u8), _mm_load_si128((__m128i*)VectorMaskL)));", insn.operands[2], insn.operands[0]);
 | 
				
			||||||
                    break;
 | 
					                    break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                case PPC_INST_STW:
 | 
					                case PPC_INST_STW:
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -11,6 +11,7 @@
 | 
				
			|||||||
#define _byteswap_ulong __builtin_bswap32
 | 
					#define _byteswap_ulong __builtin_bswap32
 | 
				
			||||||
#define _byteswap_uint64 __builtin_bswap64
 | 
					#define _byteswap_uint64 __builtin_bswap64
 | 
				
			||||||
#define isnan __builtin_isnan
 | 
					#define isnan __builtin_isnan
 | 
				
			||||||
 | 
					#define __assume __builtin_assume
 | 
				
			||||||
#define PPC_FUNC __attribute__((weak,noinline))
 | 
					#define PPC_FUNC __attribute__((weak,noinline))
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
#include <intrin.h>
 | 
					#include <intrin.h>
 | 
				
			||||||
@@ -342,6 +343,86 @@ struct PPCContext
 | 
				
			|||||||
    };
 | 
					    };
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline uint8_t VectorMaskL[] =
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
 | 
				
			||||||
 | 
					    0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D, 0x0C,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E, 0x0D,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x0E,
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline uint8_t VectorMaskR[] =
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF, 0xFF,
 | 
				
			||||||
 | 
					    0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0xFF,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline uint8_t VectorShiftTableL[] =
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
 | 
				
			||||||
 | 
					    0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
 | 
				
			||||||
 | 
					    0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02,
 | 
				
			||||||
 | 
					    0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03,
 | 
				
			||||||
 | 
					    0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
 | 
				
			||||||
 | 
					    0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05,
 | 
				
			||||||
 | 
					    0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06,
 | 
				
			||||||
 | 
					    0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07,
 | 
				
			||||||
 | 
					    0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08,
 | 
				
			||||||
 | 
					    0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09,
 | 
				
			||||||
 | 
					    0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A,
 | 
				
			||||||
 | 
					    0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B,
 | 
				
			||||||
 | 
					    0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C,
 | 
				
			||||||
 | 
					    0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D,
 | 
				
			||||||
 | 
					    0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E,
 | 
				
			||||||
 | 
					    0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					inline uint8_t VectorShiftTableR[] =
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    0x1F, 0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
 | 
				
			||||||
 | 
					    0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F,
 | 
				
			||||||
 | 
					    0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E,
 | 
				
			||||||
 | 
					    0x1C, 0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D,
 | 
				
			||||||
 | 
					    0x1B, 0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C,
 | 
				
			||||||
 | 
					    0x1A, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B,
 | 
				
			||||||
 | 
					    0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A,
 | 
				
			||||||
 | 
					    0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09,
 | 
				
			||||||
 | 
					    0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08,
 | 
				
			||||||
 | 
					    0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07,
 | 
				
			||||||
 | 
					    0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06,
 | 
				
			||||||
 | 
					    0x14, 0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05,
 | 
				
			||||||
 | 
					    0x13, 0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
 | 
				
			||||||
 | 
					    0x12, 0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03,
 | 
				
			||||||
 | 
					    0x11, 0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02,
 | 
				
			||||||
 | 
					    0x10, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
inline __m128i _mm_adds_epu32(__m128i a, __m128i b) 
 | 
					inline __m128i _mm_adds_epu32(__m128i a, __m128i b) 
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    return _mm_add_epi32(_mm_min_epu32(a, _mm_xor_si128(b, _mm_cmpeq_epi32(b, b))), b);
 | 
					    return _mm_add_epi32(_mm_min_epu32(a, _mm_xor_si128(b, _mm_cmpeq_epi32(b, b))), b);
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user