[valgrind] Add valgrind-3.8.1-mmxext.patch.
Mark Wielaard
mjw at fedoraproject.org
Thu Sep 5 15:15:44 UTC 2013
commit d3173fc08b0d01883e209038e1e78ec83c53ebff
Author: Mark Wielaard <mjw at redhat.com>
Date: Thu Sep 5 16:56:43 2013 +0200
Add valgrind-3.8.1-mmxext.patch.
valgrind-3.8.1-mmxext.patch | 975 +++++++++++++++++++++++++++++++++++++++++++
valgrind.spec | 5 +
2 files changed, 980 insertions(+), 0 deletions(-)
---
diff --git a/valgrind-3.8.1-mmxext.patch b/valgrind-3.8.1-mmxext.patch
new file mode 100644
index 0000000..5c88907
--- /dev/null
+++ b/valgrind-3.8.1-mmxext.patch
@@ -0,0 +1,975 @@
+commit a4b7b67db47021c424c18a5729f250016d34df27
+Author: mjw <mjw at 8f6e269a-dfd6-0310-a8e1-e2731360e62c>
+Date: Tue Aug 27 10:19:03 2013 +0000
+
+ Support mmxext (integer sse) subset on i386 (athlon).
+
+ Some processors like the AMD Athlon "Classic" support mmxext,
+ a sse1 subset. This subset is not properly detected by VEX.
+ The subset uses the same encoding as the sse1 instructions.
+
+ The subset is described at:
+ http://support.amd.com/us/Embedded_TechDocs/22466.pdf
+ https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions
+
+ This introduces a new VEX_HWCAPS_X86_MMXEXT that sits between
+ the baseline (0) and VEX_HWCAPS_X86_SSE1. There is also a new
+ x86g_dirtyhelper_CPUID_mmxext to mimics a Athlon "Classic"
+ (Model 2, K75 "Pluto/Orion").
+
+ Groups all mmxext instructions together in one block.
+
+ git-svn-id: svn://svn.valgrind.org/vex/trunk@2745 8f6e269a-dfd6-0310-a8e1-e2731360e62c
+
+diff --git a/VEX/priv/guest_x86_defs.h b/VEX/priv/guest_x86_defs.h
+index 389e6bb..1a16a0b 100644
+--- a/VEX/priv/guest_x86_defs.h
++++ b/VEX/priv/guest_x86_defs.h
+@@ -144,6 +144,7 @@ extern ULong x86g_dirtyhelper_loadF80le ( UInt );
+ extern void x86g_dirtyhelper_storeF80le ( UInt, ULong );
+
+ extern void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* );
++extern void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* );
+ extern void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* );
+ extern void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* );
+
+diff --git a/VEX/priv/guest_x86_helpers.c b/VEX/priv/guest_x86_helpers.c
+index 9c26794..e87e89f 100644
+--- a/VEX/priv/guest_x86_helpers.c
++++ b/VEX/priv/guest_x86_helpers.c
+@@ -2207,6 +2207,63 @@ void x86g_dirtyhelper_CPUID_sse0 ( VexGuestX86State* st )
+
+ /* CALLED FROM GENERATED CODE */
+ /* DIRTY HELPER (modifies guest state) */
++/* Claim to be a Athlon "Classic" (Model 2, K75 "Pluto/Orion") */
++/* But without 3DNow support (weird, but we really don't support it). */
++void x86g_dirtyhelper_CPUID_mmxext ( VexGuestX86State* st )
++{
++ switch (st->guest_EAX) {
++ /* vendor ID */
++ case 0:
++ st->guest_EAX = 0x1;
++ st->guest_EBX = 0x68747541;
++ st->guest_ECX = 0x444d4163;
++ st->guest_EDX = 0x69746e65;
++ break;
++ /* feature bits */
++ case 1:
++ st->guest_EAX = 0x621;
++ st->guest_EBX = 0x0;
++ st->guest_ECX = 0x0;
++ st->guest_EDX = 0x183f9ff;
++ break;
++ /* Highest Extended Function Supported (0x80000004 brand string) */
++ case 0x80000000:
++ st->guest_EAX = 0x80000004;
++ st->guest_EBX = 0x68747541;
++ st->guest_ECX = 0x444d4163;
++ st->guest_EDX = 0x69746e65;
++ break;
++ /* Extended Processor Info and Feature Bits */
++ case 0x80000001:
++ st->guest_EAX = 0x721;
++ st->guest_EBX = 0x0;
++ st->guest_ECX = 0x0;
++ st->guest_EDX = 0x1c3f9ff; /* Note no 3DNow. */
++ break;
++ /* Processor Brand String "AMD Athlon(tm) Processor" */
++ case 0x80000002:
++ st->guest_EAX = 0x20444d41;
++ st->guest_EBX = 0x6c687441;
++ st->guest_ECX = 0x74286e6f;
++ st->guest_EDX = 0x5020296d;
++ break;
++ case 0x80000003:
++ st->guest_EAX = 0x65636f72;
++ st->guest_EBX = 0x726f7373;
++ st->guest_ECX = 0x0;
++ st->guest_EDX = 0x0;
++ break;
++ default:
++ st->guest_EAX = 0x0;
++ st->guest_EBX = 0x0;
++ st->guest_ECX = 0x0;
++ st->guest_EDX = 0x0;
++ break;
++ }
++}
++
++/* CALLED FROM GENERATED CODE */
++/* DIRTY HELPER (modifies guest state) */
+ /* Claim to be the following SSE1-capable CPU:
+ vendor_id : GenuineIntel
+ cpu family : 6
+diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c
+index 90499b0..e98f19c 100644
+--- a/VEX/priv/guest_x86_toIR.c
++++ b/VEX/priv/guest_x86_toIR.c
+@@ -8318,7 +8318,18 @@ DisResult disInstr_X86_WRK (
+ guest subarchitecture. */
+ if (archinfo->hwcaps == 0/*baseline, no sse at all*/)
+ goto after_sse_decoders;
+-
++
++ /* With mmxext only some extended MMX instructions are recognized.
++ The mmxext instructions are MASKMOVQ MOVNTQ PAVGB PAVGW PMAXSW
++ PMAXUB PMINSW PMINUB PMULHUW PSADBW PSHUFW PEXTRW PINSRW PMOVMSKB
++ PREFETCHNTA PREFETCHT0 PREFETCHT1 PREFETCHT2 SFENCE
++
++ http://support.amd.com/us/Embedded_TechDocs/22466.pdf
++ https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions */
++
++ if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
++ goto mmxext;
++
+ /* Otherwise we must be doing sse1 or sse2, so we can at least try
+ for SSE1 here. */
+
+@@ -8627,6 +8638,11 @@ DisResult disInstr_X86_WRK (
+ goto decode_success;
+ }
+
++
++ /* mmxext sse1 subset starts here. mmxext only arches will parse
++ only this subset of the sse1 instructions. */
++ mmxext:
++
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F F7 = MASKMOVQ -- 8x8 masked store */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) {
+@@ -8637,203 +8653,6 @@ DisResult disInstr_X86_WRK (
+ goto decode_success;
+ }
+
+- /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
+- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) {
+- delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
+- goto decode_success;
+- }
+-
+- /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
+- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) {
+- vassert(sz == 4);
+- delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 );
+- goto decode_success;
+- }
+-
+- /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
+- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) {
+- delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 );
+- goto decode_success;
+- }
+-
+- /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
+- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) {
+- vassert(sz == 4);
+- delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 );
+- goto decode_success;
+- }
+-
+- /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
+- /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
+- if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
+- modrm = getIByte(delta+2);
+- if (epartIsReg(modrm)) {
+- putXMMReg( gregOfRM(modrm),
+- getXMMReg( eregOfRM(modrm) ));
+- DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+- nameXMMReg(gregOfRM(modrm)));
+- delta += 2+1;
+- } else {
+- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+- if (insn[1] == 0x28/*movaps*/)
+- gen_SEGV_if_not_16_aligned( addr );
+- putXMMReg( gregOfRM(modrm),
+- loadLE(Ity_V128, mkexpr(addr)) );
+- DIP("mov[ua]ps %s,%s\n", dis_buf,
+- nameXMMReg(gregOfRM(modrm)));
+- delta += 2+alen;
+- }
+- goto decode_success;
+- }
+-
+- /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
+- /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
+- if (sz == 4 && insn[0] == 0x0F
+- && (insn[1] == 0x29 || insn[1] == 0x11)) {
+- modrm = getIByte(delta+2);
+- if (epartIsReg(modrm)) {
+- /* fall through; awaiting test case */
+- } else {
+- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+- if (insn[1] == 0x29/*movaps*/)
+- gen_SEGV_if_not_16_aligned( addr );
+- storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
+- DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)),
+- dis_buf );
+- delta += 2+alen;
+- goto decode_success;
+- }
+- }
+-
+- /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
+- /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
+- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) {
+- modrm = getIByte(delta+2);
+- if (epartIsReg(modrm)) {
+- delta += 2+1;
+- putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
+- getXMMRegLane64( eregOfRM(modrm), 0 ) );
+- DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+- nameXMMReg(gregOfRM(modrm)));
+- } else {
+- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+- delta += 2+alen;
+- putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
+- loadLE(Ity_I64, mkexpr(addr)) );
+- DIP("movhps %s,%s\n", dis_buf,
+- nameXMMReg( gregOfRM(modrm) ));
+- }
+- goto decode_success;
+- }
+-
+- /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
+- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) {
+- if (!epartIsReg(insn[2])) {
+- delta += 2;
+- addr = disAMode ( &alen, sorb, delta, dis_buf );
+- delta += alen;
+- storeLE( mkexpr(addr),
+- getXMMRegLane64( gregOfRM(insn[2]),
+- 1/*upper lane*/ ) );
+- DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
+- dis_buf);
+- goto decode_success;
+- }
+- /* else fall through */
+- }
+-
+- /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
+- /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
+- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) {
+- modrm = getIByte(delta+2);
+- if (epartIsReg(modrm)) {
+- delta += 2+1;
+- putXMMRegLane64( gregOfRM(modrm),
+- 0/*lower lane*/,
+- getXMMRegLane64( eregOfRM(modrm), 1 ));
+- DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)),
+- nameXMMReg(gregOfRM(modrm)));
+- } else {
+- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+- delta += 2+alen;
+- putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
+- loadLE(Ity_I64, mkexpr(addr)) );
+- DIP("movlps %s, %s\n",
+- dis_buf, nameXMMReg( gregOfRM(modrm) ));
+- }
+- goto decode_success;
+- }
+-
+- /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
+- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) {
+- if (!epartIsReg(insn[2])) {
+- delta += 2;
+- addr = disAMode ( &alen, sorb, delta, dis_buf );
+- delta += alen;
+- storeLE( mkexpr(addr),
+- getXMMRegLane64( gregOfRM(insn[2]),
+- 0/*lower lane*/ ) );
+- DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
+- dis_buf);
+- goto decode_success;
+- }
+- /* else fall through */
+- }
+-
+- /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
+- to 4 lowest bits of ireg(G) */
+- if (insn[0] == 0x0F && insn[1] == 0x50) {
+- modrm = getIByte(delta+2);
+- if (sz == 4 && epartIsReg(modrm)) {
+- Int src;
+- t0 = newTemp(Ity_I32);
+- t1 = newTemp(Ity_I32);
+- t2 = newTemp(Ity_I32);
+- t3 = newTemp(Ity_I32);
+- delta += 2+1;
+- src = eregOfRM(modrm);
+- assign( t0, binop( Iop_And32,
+- binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
+- mkU32(1) ));
+- assign( t1, binop( Iop_And32,
+- binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
+- mkU32(2) ));
+- assign( t2, binop( Iop_And32,
+- binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
+- mkU32(4) ));
+- assign( t3, binop( Iop_And32,
+- binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
+- mkU32(8) ));
+- putIReg(4, gregOfRM(modrm),
+- binop(Iop_Or32,
+- binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
+- binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
+- )
+- );
+- DIP("movmskps %s,%s\n", nameXMMReg(src),
+- nameIReg(4, gregOfRM(modrm)));
+- goto decode_success;
+- }
+- /* else fall through */
+- }
+-
+- /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
+- /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
+- if (insn[0] == 0x0F && insn[1] == 0x2B) {
+- modrm = getIByte(delta+2);
+- if (!epartIsReg(modrm)) {
+- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+- gen_SEGV_if_not_16_aligned( addr );
+- storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
+- DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
+- dis_buf,
+- nameXMMReg(gregOfRM(modrm)));
+- delta += 2+alen;
+- goto decode_success;
+- }
+- /* else fall through */
+- }
+-
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
+ Intel manual does not say anything about the usual business of
+@@ -8854,70 +8673,6 @@ DisResult disInstr_X86_WRK (
+ /* else fall through */
+ }
+
+- /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
+- (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
+- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) {
+- vassert(sz == 4);
+- modrm = getIByte(delta+3);
+- if (epartIsReg(modrm)) {
+- putXMMRegLane32( gregOfRM(modrm), 0,
+- getXMMRegLane32( eregOfRM(modrm), 0 ));
+- DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+- nameXMMReg(gregOfRM(modrm)));
+- delta += 3+1;
+- } else {
+- addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+- /* zero bits 127:64 */
+- putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
+- /* zero bits 63:32 */
+- putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) );
+- /* write bits 31:0 */
+- putXMMRegLane32( gregOfRM(modrm), 0,
+- loadLE(Ity_I32, mkexpr(addr)) );
+- DIP("movss %s,%s\n", dis_buf,
+- nameXMMReg(gregOfRM(modrm)));
+- delta += 3+alen;
+- }
+- goto decode_success;
+- }
+-
+- /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
+- or lo 1/4 xmm). */
+- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) {
+- vassert(sz == 4);
+- modrm = getIByte(delta+3);
+- if (epartIsReg(modrm)) {
+- /* fall through, we don't yet have a test case */
+- } else {
+- addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+- storeLE( mkexpr(addr),
+- getXMMRegLane32(gregOfRM(modrm), 0) );
+- DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)),
+- dis_buf);
+- delta += 3+alen;
+- goto decode_success;
+- }
+- }
+-
+- /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
+- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) {
+- delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 );
+- goto decode_success;
+- }
+-
+- /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
+- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) {
+- vassert(sz == 4);
+- delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 );
+- goto decode_success;
+- }
+-
+- /* 0F 56 = ORPS -- G = G and E */
+- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) {
+- delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 );
+- goto decode_success;
+- }
+-
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) {
+@@ -9173,6 +8928,284 @@ DisResult disInstr_X86_WRK (
+ goto decode_success;
+ }
+
++ /* 0F AE /7 = SFENCE -- flush pending operations to memory */
++ if (insn[0] == 0x0F && insn[1] == 0xAE
++ && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
++ vassert(sz == 4);
++ delta += 3;
++ /* Insert a memory fence. It's sometimes important that these
++ are carried through to the generated code. */
++ stmt( IRStmt_MBE(Imbe_Fence) );
++ DIP("sfence\n");
++ goto decode_success;
++ }
++
++ /* End of mmxext sse1 subset. No more sse parsing for mmxext only arches. */
++ if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
++ goto after_sse_decoders;
++
++
++ /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) {
++ delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
++ goto decode_success;
++ }
++
++ /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
++ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) {
++ vassert(sz == 4);
++ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 );
++ goto decode_success;
++ }
++
++ /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) {
++ delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 );
++ goto decode_success;
++ }
++
++ /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
++ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) {
++ vassert(sz == 4);
++ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 );
++ goto decode_success;
++ }
++
++ /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
++ /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
++ if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
++ modrm = getIByte(delta+2);
++ if (epartIsReg(modrm)) {
++ putXMMReg( gregOfRM(modrm),
++ getXMMReg( eregOfRM(modrm) ));
++ DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
++ nameXMMReg(gregOfRM(modrm)));
++ delta += 2+1;
++ } else {
++ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
++ if (insn[1] == 0x28/*movaps*/)
++ gen_SEGV_if_not_16_aligned( addr );
++ putXMMReg( gregOfRM(modrm),
++ loadLE(Ity_V128, mkexpr(addr)) );
++ DIP("mov[ua]ps %s,%s\n", dis_buf,
++ nameXMMReg(gregOfRM(modrm)));
++ delta += 2+alen;
++ }
++ goto decode_success;
++ }
++
++ /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
++ /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
++ if (sz == 4 && insn[0] == 0x0F
++ && (insn[1] == 0x29 || insn[1] == 0x11)) {
++ modrm = getIByte(delta+2);
++ if (epartIsReg(modrm)) {
++ /* fall through; awaiting test case */
++ } else {
++ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
++ if (insn[1] == 0x29/*movaps*/)
++ gen_SEGV_if_not_16_aligned( addr );
++ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
++ DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)),
++ dis_buf );
++ delta += 2+alen;
++ goto decode_success;
++ }
++ }
++
++ /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
++ /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) {
++ modrm = getIByte(delta+2);
++ if (epartIsReg(modrm)) {
++ delta += 2+1;
++ putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
++ getXMMRegLane64( eregOfRM(modrm), 0 ) );
++ DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
++ nameXMMReg(gregOfRM(modrm)));
++ } else {
++ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
++ delta += 2+alen;
++ putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
++ loadLE(Ity_I64, mkexpr(addr)) );
++ DIP("movhps %s,%s\n", dis_buf,
++ nameXMMReg( gregOfRM(modrm) ));
++ }
++ goto decode_success;
++ }
++
++ /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) {
++ if (!epartIsReg(insn[2])) {
++ delta += 2;
++ addr = disAMode ( &alen, sorb, delta, dis_buf );
++ delta += alen;
++ storeLE( mkexpr(addr),
++ getXMMRegLane64( gregOfRM(insn[2]),
++ 1/*upper lane*/ ) );
++ DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
++ dis_buf);
++ goto decode_success;
++ }
++ /* else fall through */
++ }
++
++ /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
++ /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) {
++ modrm = getIByte(delta+2);
++ if (epartIsReg(modrm)) {
++ delta += 2+1;
++ putXMMRegLane64( gregOfRM(modrm),
++ 0/*lower lane*/,
++ getXMMRegLane64( eregOfRM(modrm), 1 ));
++ DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)),
++ nameXMMReg(gregOfRM(modrm)));
++ } else {
++ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
++ delta += 2+alen;
++ putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
++ loadLE(Ity_I64, mkexpr(addr)) );
++ DIP("movlps %s, %s\n",
++ dis_buf, nameXMMReg( gregOfRM(modrm) ));
++ }
++ goto decode_success;
++ }
++
++ /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) {
++ if (!epartIsReg(insn[2])) {
++ delta += 2;
++ addr = disAMode ( &alen, sorb, delta, dis_buf );
++ delta += alen;
++ storeLE( mkexpr(addr),
++ getXMMRegLane64( gregOfRM(insn[2]),
++ 0/*lower lane*/ ) );
++ DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
++ dis_buf);
++ goto decode_success;
++ }
++ /* else fall through */
++ }
++
++ /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
++ to 4 lowest bits of ireg(G) */
++ if (insn[0] == 0x0F && insn[1] == 0x50) {
++ modrm = getIByte(delta+2);
++ if (sz == 4 && epartIsReg(modrm)) {
++ Int src;
++ t0 = newTemp(Ity_I32);
++ t1 = newTemp(Ity_I32);
++ t2 = newTemp(Ity_I32);
++ t3 = newTemp(Ity_I32);
++ delta += 2+1;
++ src = eregOfRM(modrm);
++ assign( t0, binop( Iop_And32,
++ binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
++ mkU32(1) ));
++ assign( t1, binop( Iop_And32,
++ binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
++ mkU32(2) ));
++ assign( t2, binop( Iop_And32,
++ binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
++ mkU32(4) ));
++ assign( t3, binop( Iop_And32,
++ binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
++ mkU32(8) ));
++ putIReg(4, gregOfRM(modrm),
++ binop(Iop_Or32,
++ binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
++ binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
++ )
++ );
++ DIP("movmskps %s,%s\n", nameXMMReg(src),
++ nameIReg(4, gregOfRM(modrm)));
++ goto decode_success;
++ }
++ /* else fall through */
++ }
++
++ /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
++ /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
++ if (insn[0] == 0x0F && insn[1] == 0x2B) {
++ modrm = getIByte(delta+2);
++ if (!epartIsReg(modrm)) {
++ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
++ gen_SEGV_if_not_16_aligned( addr );
++ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
++ DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
++ dis_buf,
++ nameXMMReg(gregOfRM(modrm)));
++ delta += 2+alen;
++ goto decode_success;
++ }
++ /* else fall through */
++ }
++
++ /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
++ (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
++ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) {
++ vassert(sz == 4);
++ modrm = getIByte(delta+3);
++ if (epartIsReg(modrm)) {
++ putXMMRegLane32( gregOfRM(modrm), 0,
++ getXMMRegLane32( eregOfRM(modrm), 0 ));
++ DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
++ nameXMMReg(gregOfRM(modrm)));
++ delta += 3+1;
++ } else {
++ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
++ /* zero bits 127:64 */
++ putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
++ /* zero bits 63:32 */
++ putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) );
++ /* write bits 31:0 */
++ putXMMRegLane32( gregOfRM(modrm), 0,
++ loadLE(Ity_I32, mkexpr(addr)) );
++ DIP("movss %s,%s\n", dis_buf,
++ nameXMMReg(gregOfRM(modrm)));
++ delta += 3+alen;
++ }
++ goto decode_success;
++ }
++
++ /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
++ or lo 1/4 xmm). */
++ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) {
++ vassert(sz == 4);
++ modrm = getIByte(delta+3);
++ if (epartIsReg(modrm)) {
++ /* fall through, we don't yet have a test case */
++ } else {
++ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
++ storeLE( mkexpr(addr),
++ getXMMRegLane32(gregOfRM(modrm), 0) );
++ DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)),
++ dis_buf);
++ delta += 3+alen;
++ goto decode_success;
++ }
++ }
++
++ /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) {
++ delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 );
++ goto decode_success;
++ }
++
++ /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
++ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) {
++ vassert(sz == 4);
++ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 );
++ goto decode_success;
++ }
++
++ /* 0F 56 = ORPS -- G = G and E */
++ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) {
++ delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 );
++ goto decode_success;
++ }
++
+ /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
+ if (insn[0] == 0x0F && insn[1] == 0x53) {
+ vassert(sz == 4);
+@@ -9205,18 +9238,6 @@ DisResult disInstr_X86_WRK (
+ goto decode_success;
+ }
+
+- /* 0F AE /7 = SFENCE -- flush pending operations to memory */
+- if (insn[0] == 0x0F && insn[1] == 0xAE
+- && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
+- vassert(sz == 4);
+- delta += 3;
+- /* Insert a memory fence. It's sometimes important that these
+- are carried through to the generated code. */
+- stmt( IRStmt_MBE(Imbe_Fence) );
+- DIP("sfence\n");
+- goto decode_success;
+- }
+-
+ /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) {
+ Int select;
+@@ -14674,6 +14695,11 @@ DisResult disInstr_X86_WRK (
+ fAddr = &x86g_dirtyhelper_CPUID_sse1;
+ }
+ else
++ if (archinfo->hwcaps & VEX_HWCAPS_X86_MMXEXT) {
++ fName = "x86g_dirtyhelper_CPUID_mmxext";
++ fAddr = &x86g_dirtyhelper_CPUID_mmxext;
++ }
++ else
+ if (archinfo->hwcaps == 0/*no SSE*/) {
+ fName = "x86g_dirtyhelper_CPUID_sse0";
+ fAddr = &x86g_dirtyhelper_CPUID_sse0;
+diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c
+index 21a05a9..693eaa2 100644
+--- a/VEX/priv/host_x86_defs.c
++++ b/VEX/priv/host_x86_defs.c
+@@ -727,7 +727,8 @@ X86Instr* X86Instr_MFence ( UInt hwcaps ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_MFence;
+ i->Xin.MFence.hwcaps = hwcaps;
+- vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1
++ vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT
++ |VEX_HWCAPS_X86_SSE1
+ |VEX_HWCAPS_X86_SSE2
+ |VEX_HWCAPS_X86_SSE3
+ |VEX_HWCAPS_X86_LZCNT)));
+@@ -2695,7 +2696,7 @@ Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
+ *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
+ goto done;
+ }
+- if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_SSE1) {
++ if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) {
+ /* sfence */
+ *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
+ /* lock addl $0,0(%esp) */
+diff --git a/VEX/priv/host_x86_defs.h b/VEX/priv/host_x86_defs.h
+index f810ab4..e03becf 100644
+--- a/VEX/priv/host_x86_defs.h
++++ b/VEX/priv/host_x86_defs.h
+@@ -360,7 +360,7 @@ typedef
+ Xin_Store, /* store 16/8 bit value in memory */
+ Xin_Set32, /* convert condition code to 32-bit value */
+ Xin_Bsfr32, /* 32-bit bsf/bsr */
+- Xin_MFence, /* mem fence (not just sse2, but sse0 and 1 too) */
++ Xin_MFence, /* mem fence (not just sse2, but sse0 and 1/mmxext too) */
+ Xin_ACAS, /* 8/16/32-bit lock;cmpxchg */
+ Xin_DACAS, /* lock;cmpxchg8b (doubleword ACAS, 2 x 32-bit only) */
+
+@@ -508,13 +508,13 @@ typedef
+ HReg src;
+ HReg dst;
+ } Bsfr32;
+- /* Mem fence (not just sse2, but sse0 and 1 too). In short,
+- an insn which flushes all preceding loads and stores as
+- much as possible before continuing. On SSE2 we emit a
+- real "mfence", on SSE1 "sfence ; lock addl $0,0(%esp)" and
+- on SSE0 "lock addl $0,0(%esp)". This insn therefore
+- carries the host's hwcaps so the assembler knows what to
+- emit. */
++ /* Mem fence (not just sse2, but sse0 and sse1/mmxext too).
++ In short, an insn which flushes all preceding loads and
++ stores as much as possible before continuing. On SSE2
++ we emit a real "mfence", on SSE1 or the MMXEXT subset
++ "sfence ; lock addl $0,0(%esp)" and on SSE0
++ "lock addl $0,0(%esp)". This insn therefore carries the
++ host's hwcaps so the assembler knows what to emit. */
+ struct {
+ UInt hwcaps;
+ } MFence;
+diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c
+index 086aefc..90bc563 100644
+--- a/VEX/priv/host_x86_isel.c
++++ b/VEX/priv/host_x86_isel.c
+@@ -3251,7 +3251,8 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
+ {
+
+ # define REQUIRE_SSE1 \
+- do { if (env->hwcaps == 0/*baseline, no sse*/) \
++ do { if (env->hwcaps == 0/*baseline, no sse*/ \
++ || env->hwcaps == VEX_HWCAPS_X86_MMXEXT /*Integer SSE*/) \
+ goto vec_fail; \
+ } while (0)
+
+@@ -4388,7 +4389,8 @@ HInstrArray* iselSB_X86 ( IRSB* bb,
+ /* sanity ... */
+ vassert(arch_host == VexArchX86);
+ vassert(0 == (hwcaps_host
+- & ~(VEX_HWCAPS_X86_SSE1
++ & ~(VEX_HWCAPS_X86_MMXEXT
++ | VEX_HWCAPS_X86_SSE1
+ | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_SSE3
+ | VEX_HWCAPS_X86_LZCNT)));
+diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c
+index e425950..5bb762f 100644
+--- a/VEX/priv/main_main.c
++++ b/VEX/priv/main_main.c
+@@ -1086,23 +1086,25 @@
+
+ static HChar* show_hwcaps_x86 ( UInt hwcaps )
+ {
+- /* Monotonic, SSE3 > SSE2 > SSE1 > baseline. */
++ /* Monotonic, LZCNT > SSE3 > SSE2 > SSE1 > MMXEXT > baseline. */
+ switch (hwcaps) {
+ case 0:
+ return "x86-sse0";
+- case VEX_HWCAPS_X86_SSE1:
+- return "x86-sse1";
+- case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2:
+- return "x86-sse1-sse2";
+- case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
++ case VEX_HWCAPS_X86_MMXEXT:
++ return "x86-mmxext";
++ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1:
++ return "x86-mmxext-sse1";
++ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2:
++ return "x86-mmxext-sse1-sse2";
++ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_LZCNT:
+- return "x86-sse1-sse2-lzcnt";
+- case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
++ return "x86-mmxext-sse1-sse2-lzcnt";
++ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_SSE3:
+- return "x86-sse1-sse2-sse3";
+- case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
++ return "x86-mmxext-sse1-sse2-sse3";
++ case VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_SSE3 | VEX_HWCAPS_X86_LZCNT:
+- return "x86-sse1-sse2-sse3-lzcnt";
++ return "x86-mmxext-sse1-sse2-sse3-lzcnt";
+ default:
+ return NULL;
+ }
+diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h
+index 4b36727..c8b5892 100644
+--- a/VEX/pub/libvex.h
++++ b/VEX/pub/libvex.h
+@@ -71,11 +71,12 @@ typedef
+ combinations. */
+
+ /* x86: baseline capability is Pentium-1 (FPU, MMX, but no SSE), with
+- cmpxchg8b. */
+-#define VEX_HWCAPS_X86_SSE1 (1<<1) /* SSE1 support (Pentium III) */
+-#define VEX_HWCAPS_X86_SSE2 (1<<2) /* SSE2 support (Pentium 4) */
+-#define VEX_HWCAPS_X86_SSE3 (1<<3) /* SSE3 support (>= Prescott) */
+-#define VEX_HWCAPS_X86_LZCNT (1<<4) /* SSE4a LZCNT insn */
++ cmpxchg8b. MMXEXT is a special AMD only subset of SSE1 (Integer SSE). */
++#define VEX_HWCAPS_X86_MMXEXT (1<<1) /* A subset of SSE1 on early AMD */
++#define VEX_HWCAPS_X86_SSE1 (1<<2) /* SSE1 support (Pentium III) */
++#define VEX_HWCAPS_X86_SSE2 (1<<3) /* SSE2 support (Pentium 4) */
++#define VEX_HWCAPS_X86_SSE3 (1<<4) /* SSE3 support (>= Prescott) */
++#define VEX_HWCAPS_X86_LZCNT (1<<5) /* SSE4a LZCNT insn */
+
+ /* amd64: baseline capability is SSE2, with cmpxchg8b but not
+ cmpxchg16b. */
+commit 4c6f0638553e69b7f70c17a64a8f60114d6f6230
+Author: mjw <mjw at a5019735-40e9-0310-863c-91ae7b9d1cf9>
+Date: Tue Aug 27 10:23:23 2013 +0000
+
+ Support mmxext (integer sse) subset on i386 (athlon). Bug #323713
+
+ Some processors like the AMD Athlon "Classic" support mmxext,
+ a sse1 subset. This subset is not properly detected by VEX.
+ The subset uses the same encoding as the sse1 instructions.
+
+ The subset is described at:
+ http://support.amd.com/us/Embedded_TechDocs/22466.pdf
+ https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions
+
+ Detects mmxext subset from cpuid information (and enables it
+ when full sse1 is found). Also fixes the prereq of
+ none/tests/x86/insn_mmxext.vgtest so that it also runs when
+ full sse1 (and not just the mmxext subset) is found.
+ It already passed on such configurations. With the VEX patch
+ (r2745) it also passes with just the mmxext subset.
+
+ git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13515 a5019735-40e9-0310-863c-91ae7b9d1cf9
+
+diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
+index 353c05b..2fd5f07 100644
+--- a/coregrind/m_machine.c
++++ b/coregrind/m_machine.c
+@@ -685,7 +685,7 @@
+ LibVEX_default_VexArchInfo(&vai);
+
+ #if defined(VGA_x86)
+- { Bool have_sse1, have_sse2, have_cx8, have_lzcnt;
++ { Bool have_sse1, have_sse2, have_cx8, have_lzcnt, have_mmxext;
+ UInt eax, ebx, ecx, edx, max_extended;
+ UChar vstr[13];
+ vstr[0] = 0;
+@@ -722,17 +722,27 @@
+ if (!have_cx8)
+ return False;
+
+- /* Figure out if this is an AMD that can do LZCNT. */
++ /* Figure out if this is an AMD that can do mmxext and/or LZCNT. */
++ have_mmxext = False;
+ have_lzcnt = False;
+ if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
+ && max_extended >= 0x80000001) {
+ VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
+ have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
++
++ /* Some older AMD processors support a sse1 subset (Integer SSE). */
++ have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
+ }
+
+- if (have_sse2 && have_sse1) {
++ /* Intel processors don't define the mmxext extension, but since it
++ is just a sse1 subset always define it when we have sse1. */
++ if (have_sse1)
++ have_mmxext = True;
++
++ if (have_sse2 && have_sse1 && have_mmxext) {
+ va = VexArchX86;
+- vai.hwcaps = VEX_HWCAPS_X86_SSE1;
++ vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
++ vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
+ vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
+ if (have_lzcnt)
+ vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
+@@ -740,13 +750,21 @@
+ return True;
+ }
+
+- if (have_sse1) {
++ if (have_sse1 && have_mmxext) {
+ va = VexArchX86;
+- vai.hwcaps = VEX_HWCAPS_X86_SSE1;
++ vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
++ vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
+ VG_(machine_x86_have_mxcsr) = 1;
+ return True;
+ }
+
++ if (have_mmxext) {
++ va = VexArchX86;
++ vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
++ VG_(machine_x86_have_mxcsr) = 0;
++ return True;
++ }
++
+ va = VexArchX86;
+ vai.hwcaps = 0; /*baseline - no sse at all*/
+ VG_(machine_x86_have_mxcsr) = 0;
+diff --git a/none/tests/x86/insn_mmxext.vgtest b/none/tests/x86/insn_mmxext.vgtest
+index ad48b6e..e3627d6 100644
+--- a/none/tests/x86/insn_mmxext.vgtest
++++ b/none/tests/x86/insn_mmxext.vgtest
+@@ -1,3 +1,4 @@
+ prog: ../../../none/tests/x86/insn_mmxext
+-prereq: ../../../tests/x86_amd64_features x86-mmxext
++# mmxext is an old AMD subset of sse1, so either will do.
++prereq: ../../../tests/x86_amd64_features x86-mmxext || ../../../tests/x86_amd64_features x86-sse
+ vgopts: -q
diff --git a/valgrind.spec b/valgrind.spec
index 090dccc..b35f3d8 100644
--- a/valgrind.spec
+++ b/valgrind.spec
@@ -188,6 +188,9 @@ Patch48: valgrind-3.8.1-power-isa-205-deprecation.patch
# KDE#310931 message-security assist instruction extension not implemented
Patch49: valgrind-3.8.1-s390-STFLE.patch
+# KDE#323713 Support mmxext (integer sse) subset on i386 (athlon)
+Patch50: valgrind-3.8.1-mmxext.patch
+
%ifarch x86_64 ppc64
# Ensure glibc{,-devel} is installed for both multilib arches
BuildRequires: /lib/libc.so.6 /usr/lib/libc.so /lib64/libc.so.6 /usr/lib64/libc.so
@@ -337,6 +340,7 @@ touch ./memcheck/tests/linux/getregset.stderr.exp
chmod 755 tests/check_isa-2_07_cap
%patch48 -p1
%patch49 -p1
+%patch50 -p1
# These tests go into an endless loop on ARM
# There is a __sync_add_and_fetch in the testcase.
@@ -501,6 +505,7 @@ echo ===============END TESTING===============
* Thu Sep 05 2013 Mark Wielaard <mjw at redhat.com>
- Fix power_ISA2_05 testcase (valgrind-3.8.1-power-isa-205-deprecation.patch)
- Fix ppc32 make check build (valgrind-3.8.1-initial-power-isa-207.patch)
+- Add valgrind-3.8.1-mmxext.patch
* Wed Aug 21 2013 Mark Wielaard <mjw at redhat.com> - 3.8.1-26
- Allow building against glibc 2.18. (#999169)
More information about the scm-commits
mailing list