diff options
-rw-r--r-- | tools/tests/x86_emulator/test_x86_emulator.c | 187 | ||||
-rw-r--r-- | xen/arch/x86/hvm/emulate.c | 15 | ||||
-rw-r--r-- | xen/arch/x86/x86_emulate/x86_emulate.c | 231 | ||||
-rw-r--r-- | xen/arch/x86/x86_emulate/x86_emulate.h | 4 | ||||
-rw-r--r-- | xen/include/asm-x86/cpufeature.h | 2 |
5 files changed, 403 insertions, 36 deletions
diff --git a/tools/tests/x86_emulator/test_x86_emulator.c b/tools/tests/x86_emulator/test_x86_emulator.c index 1f5722baaa..bc66c97d2d 100644 --- a/tools/tests/x86_emulator/test_x86_emulator.c +++ b/tools/tests/x86_emulator/test_x86_emulator.c @@ -1,3 +1,5 @@ +#include <errno.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -53,11 +55,84 @@ static int cmpxchg( return X86EMUL_OKAY; } +static int cpuid( + unsigned int *eax, + unsigned int *ebx, + unsigned int *ecx, + unsigned int *edx, + struct x86_emulate_ctxt *ctxt) +{ + asm ("cpuid" : "+a" (*eax), "+c" (*ecx), "=d" (*edx), "=b" (*ebx)); + return X86EMUL_OKAY; +} + +#define cpu_has_mmx ({ \ + unsigned int eax = 1, ecx = 0, edx; \ + cpuid(&eax, &ecx, &ecx, &edx, NULL); \ + (edx & (1U << 23)) != 0; \ +}) + +#define cpu_has_sse ({ \ + unsigned int eax = 1, ecx = 0, edx; \ + cpuid(&eax, &ecx, &ecx, &edx, NULL); \ + (edx & (1U << 25)) != 0; \ +}) + +#define cpu_has_sse2 ({ \ + unsigned int eax = 1, ecx = 0, edx; \ + cpuid(&eax, &ecx, &ecx, &edx, NULL); \ + (edx & (1U << 26)) != 0; \ +}) + +static inline uint64_t xgetbv(uint32_t xcr) +{ + uint64_t res; + + asm ( ".byte 0x0f, 0x01, 0xd0" : "=A" (res) : "c" (xcr) ); + + return res; +} + +#define cpu_has_avx ({ \ + unsigned int eax = 1, ecx = 0, edx; \ + cpuid(&eax, &edx, &ecx, &edx, NULL); \ + if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ + ecx = 0; \ + (ecx & (1U << 28)) != 0; \ +}) + +int get_fpu( + void (*exception_callback)(void *, struct cpu_user_regs *), + void *exception_callback_arg, + enum x86_emulate_fpu_type type, + struct x86_emulate_ctxt *ctxt) +{ + switch ( type ) + { + case X86EMUL_FPU_fpu: + break; + case X86EMUL_FPU_ymm: + if ( cpu_has_avx ) + break; + case X86EMUL_FPU_xmm: + if ( cpu_has_sse ) + break; + case X86EMUL_FPU_mmx: + if ( cpu_has_mmx ) + break; + default: + return X86EMUL_UNHANDLEABLE; + } + return X86EMUL_OKAY; +} + static struct x86_emulate_ops emulops = { .read = read, .insn_fetch = read, .write = write, .cmpxchg = cmpxchg, + .cpuid = cpuid, + .get_fpu = get_fpu, }; int main(int argc, char **argv) @@ -66,6 +141,8 @@ int main(int argc, char **argv) struct cpu_user_regs regs; char *instr; unsigned int *res, i, j; + unsigned long sp; + bool stack_exec; int rc; #ifndef __x86_64__ unsigned int bcdres_native, bcdres_emul; @@ -85,6 +162,16 @@ int main(int argc, char **argv) } instr = (char *)res + 0x100; +#ifdef __x86_64__ + asm ("movq %%rsp, %0" : "=g" (sp)); +#else + asm ("movl %%esp, %0" : "=g" (sp)); +#endif + stack_exec = mprotect((void *)(sp & -0x1000L) - (MMAP_SZ - 0x1000), + MMAP_SZ, PROT_READ|PROT_WRITE|PROT_EXEC) == 0; + if ( !stack_exec ) + printf("Warning: Stack could not be made executable (%d).\n", errno); + printf("%-40s", "Testing addl %%ecx,(%%eax)..."); instr[0] = 0x01; instr[1] = 0x08; regs.eflags = 0x200; @@ -442,6 +529,106 @@ int main(int argc, char **argv) printf("skipped\n"); #endif + printf("%-40s", "Testing movq %mm3,(%ecx)..."); + if ( stack_exec && cpu_has_mmx ) + { + extern const unsigned char movq_to_mem[]; + + asm volatile ( "pcmpeqb %%mm3, %%mm3\n" + ".pushsection .test, \"a\", @progbits\n" + "movq_to_mem: movq %%mm3, (%0)\n" + ".popsection" :: "c" (NULL) ); + + memcpy(instr, movq_to_mem, 15); + memset(res, 0x33, 64); + memset(res + 8, 0xff, 8); + regs.eip = (unsigned long)&instr[0]; + regs.ecx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing movq (%edx),%mm5..."); + if ( stack_exec && cpu_has_mmx ) + { + extern const unsigned char movq_from_mem[]; + + asm volatile ( "pcmpgtb %%mm5, %%mm5\n" + ".pushsection .test, \"a\", @progbits\n" + "movq_from_mem: movq (%0), %%mm5\n" + ".popsection" :: "d" (NULL) ); + + memcpy(instr, movq_from_mem, 15); + regs.eip = (unsigned long)&instr[0]; + regs.ecx = 0; + regs.edx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY ) + goto fail; + asm ( "pcmpeqb %%mm3, %%mm3\n\t" + "pcmpeqb %%mm5, %%mm3\n\t" + "pmovmskb %%mm3, %0" : "=r" (rc) ); + if ( rc != 0xff ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing movdqu %xmm2,(%ecx)..."); + if ( stack_exec && cpu_has_sse2 ) + { + extern const unsigned char movdqu_to_mem[]; + + asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n" + ".pushsection .test, \"a\", @progbits\n" + "movdqu_to_mem: movdqu %%xmm2, (%0)\n" + ".popsection" :: "c" (NULL) ); + + memcpy(instr, movdqu_to_mem, 15); + memset(res, 0x55, 64); + memset(res + 8, 0xff, 16); + regs.eip = (unsigned long)&instr[0]; + regs.ecx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + + printf("%-40s", "Testing movdqu (%edx),%xmm4..."); + if ( stack_exec && cpu_has_sse2 ) + { + extern const unsigned char movdqu_from_mem[]; + + asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n" + ".pushsection .test, \"a\", @progbits\n" + "movdqu_from_mem: movdqu (%0), %%xmm4\n" + ".popsection" :: "d" (NULL) ); + + memcpy(instr, movdqu_from_mem, 15); + regs.eip = (unsigned long)&instr[0]; + regs.ecx = 0; + regs.edx = (unsigned long)res; + rc = x86_emulate(&ctxt, &emulops); + if ( rc != X86EMUL_OKAY ) + goto fail; + asm ( "pcmpeqb %%xmm2, %%xmm2\n\t" + "pcmpeqb %%xmm4, %%xmm2\n\t" + "pmovmskb %%xmm2, %0" : "=r" (rc) ); + if ( rc != 0xffff ) + goto fail; + printf("okay\n"); + } + else + printf("skipped\n"); + for ( j = 1; j <= 2; j++ ) { #if defined(__i386__) diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c index 0d6967a21c..15f413c576 100644 --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -16,6 +16,7 @@ #include <xen/paging.h> #include <xen/trace.h> #include <asm/event.h> +#include <asm/xstate.h> #include <asm/hvm/emulate.h> #include <asm/hvm/hvm.h> #include <asm/hvm/trace.h> @@ -928,6 +929,20 @@ static int hvmemul_get_fpu( if ( !cpu_has_mmx ) return X86EMUL_UNHANDLEABLE; break; + case X86EMUL_FPU_xmm: + if ( !cpu_has_xmm || + (curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_EM) || + !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSFXSR) ) + return X86EMUL_UNHANDLEABLE; + break; + case X86EMUL_FPU_ymm: + if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) || + vm86_mode(ctxt->regs) || + !(curr->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE) || + !(curr->arch.xcr0 & XSTATE_SSE) || + !(curr->arch.xcr0 & XSTATE_YMM) ) + return X86EMUL_UNHANDLEABLE; + break; default: return X86EMUL_UNHANDLEABLE; } diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c index db35429e96..3222b61e37 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.c +++ b/xen/arch/x86/x86_emulate/x86_emulate.c @@ -248,11 +248,52 @@ static uint8_t twobyte_table[256] = { /* 0xD0 - 0xDF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE0 - 0xEF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xF0 - 0xFF */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +#define REX_PREFIX 0x40 +#define REX_B 0x01 +#define REX_X 0x02 +#define REX_R 0x04 +#define REX_W 0x08 + +#define vex_none 0 + +enum vex_opcx { + vex_0f = vex_none + 1, + vex_0f38, + vex_0f3a, +}; + +enum vex_pfx { + vex_66 = vex_none + 1, + vex_f3, + vex_f2 +}; + +union vex { + uint8_t raw[2]; + struct { + uint8_t opcx:5; + uint8_t b:1; + uint8_t x:1; + uint8_t r:1; + uint8_t pfx:2; + uint8_t l:1; + uint8_t reg:4; + uint8_t w:1; + }; +}; + +#define copy_REX_VEX(ptr, rex, vex) do { \ + if ( (vex).opcx != vex_none ) \ + ptr[0] = 0xc4, ptr[1] = (vex).raw[0], ptr[2] = (vex).raw[1]; \ + else if ( mode_64bit() ) \ + ptr[1] = rex | REX_PREFIX; \ +} while (0) + /* Type, address-of, and value of an instruction's operand. */ struct operand { enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; @@ -281,6 +322,23 @@ struct operand { }; }; +typedef union { + uint64_t mmx; + uint64_t __attribute__ ((aligned(16))) xmm[2]; + uint64_t __attribute__ ((aligned(32))) ymm[4]; +} mmval_t; + +/* + * While proper alignment gets specified above, this doesn't get honored by + * the compiler for automatic variables. Use this helper to instantiate a + * suitably aligned variable, producing a pointer to access it. + */ +#define DECLARE_ALIGNED(type, var) \ + long __##var[sizeof(type) + __alignof(type) - __alignof(long)]; \ + type *const var##p = \ + (void *)((long)(__##var + __alignof(type) - __alignof(long)) \ + & -__alignof(type)) + /* MSRs. */ #define MSR_TSC 0x00000010 #define MSR_SYSENTER_CS 0x00000174 @@ -992,9 +1050,12 @@ static bool_t vcpu_has( #define vcpu_must_have(leaf, reg, bit) \ generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1) +#define vcpu_must_have_mmx() vcpu_must_have(0x00000001, EDX, 23) +#define vcpu_must_have_sse() vcpu_must_have(0x00000001, EDX, 25) #define vcpu_must_have_sse2() vcpu_must_have(0x00000001, EDX, 26) #define vcpu_must_have_sse3() vcpu_must_have(0x00000001, ECX, 0) #define vcpu_must_have_cx16() vcpu_must_have(0x00000001, ECX, 13) +#define vcpu_must_have_avx() vcpu_must_have(0x00000001, ECX, 28) static int in_longmode( @@ -1252,13 +1313,14 @@ x86_emulate( uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0; uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; + union vex vex = {}; unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes; #define REPE_PREFIX 1 #define REPNE_PREFIX 2 unsigned int lock_prefix = 0, rep_prefix = 0; int override_seg = -1, rc = X86EMUL_OKAY; struct operand src, dst; - + DECLARE_ALIGNED(mmval_t, mmval); /* * Data operand effective address (usually computed from ModRM). * Default is a memory operand relative to segment DS. @@ -1284,6 +1346,7 @@ x86_emulate( { case 0x66: /* operand-size override */ op_bytes = def_op_bytes ^ 6; + vex.pfx = vex_66; break; case 0x67: /* address-size override */ ad_bytes = def_ad_bytes ^ (mode_64bit() ? 12 : 6); @@ -1311,9 +1374,11 @@ x86_emulate( break; case 0xf2: /* REPNE/REPNZ */ rep_prefix = REPNE_PREFIX; + vex.pfx = vex_f2; break; case 0xf3: /* REP/REPE/REPZ */ rep_prefix = REPE_PREFIX; + vex.pfx = vex_f3; break; case 0x40 ... 0x4f: /* REX */ if ( !mode_64bit() ) @@ -1357,6 +1422,70 @@ x86_emulate( { modrm = insn_fetch_type(uint8_t); modrm_mod = (modrm & 0xc0) >> 6; + + if ( !twobyte && ((b & ~1) == 0xc4) ) + switch ( def_ad_bytes ) + { + default: + BUG(); + case 2: + if ( in_realmode(ctxt, ops) || (_regs.eflags & EFLG_VM) ) + break; + /* fall through */ + case 4: + if ( modrm_mod != 3 ) + break; + /* fall through */ + case 8: + /* VEX */ + generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1); + + vex.raw[0] = b; + if ( b & 1 ) + { + vex.raw[1] = b; + vex.opcx = vex_0f; + vex.x = 1; + vex.b = 1; + vex.w = 0; + } + else + { + vex.raw[1] = insn_fetch_type(uint8_t); + if ( mode_64bit() ) + { + if ( !vex.b ) + rex_prefix |= REX_B; + if ( !vex.x ) + rex_prefix |= REX_X; + if ( vex.w ) + { + rex_prefix |= REX_W; + op_bytes = 8; + } + } + } + vex.reg ^= 0xf; + if ( !mode_64bit() ) + vex.reg &= 0x7; + else if ( !vex.r ) + rex_prefix |= REX_R; + + fail_if(vex.opcx != vex_0f); + twobyte = 1; + b = insn_fetch_type(uint8_t); + d = twobyte_table[b]; + + /* Unrecognised? */ + if ( d == 0 ) + goto cannot_emulate; + + modrm = insn_fetch_type(uint8_t); + modrm_mod = (modrm & 0xc0) >> 6; + + break; + } + modrm_reg = ((rex_prefix & 4) << 1) | ((modrm & 0x38) >> 3); modrm_rm = modrm & 0x07; @@ -3914,44 +4043,78 @@ x86_emulate( break; } - case 0x6f: /* movq mm/m64,mm */ { - uint8_t stub[] = { 0x0f, 0x6f, modrm, 0xc3 }; + case 0xe7: /* movntq mm,m64 */ + /* {,v}movntdq xmm,m128 */ + /* vmovntdq ymm,m256 */ + fail_if(ea.type != OP_MEM); + fail_if(vex.pfx == vex_f3); + /* fall through */ + case 0x6f: /* movq mm/m64,mm */ + /* {,v}movdq{a,u} xmm/m128,xmm */ + /* vmovdq{a,u} ymm/m256,ymm */ + case 0x7f: /* movq mm,mm/m64 */ + /* {,v}movdq{a,u} xmm,xmm/m128 */ + /* vmovdq{a,u} ymm,ymm/m256 */ + { + uint8_t stub[] = { 0x3e, 0x3e, 0x0f, b, modrm, 0xc3 }; struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 }; - uint64_t val; - if ( ea.type == OP_MEM ) + + if ( vex.opcx == vex_none ) { - unsigned long lval, hval; - if ( (rc = read_ulong(ea.mem.seg, ea.mem.off+0, - &lval, 4, ctxt, ops)) || - (rc = read_ulong(ea.mem.seg, ea.mem.off+4, - &hval, 4, ctxt, ops)) ) - goto done; - val = ((uint64_t)hval << 32) | (uint32_t)lval; - stub[2] = modrm & 0x38; /* movq (%eax),%mmN */ + switch ( vex.pfx ) + { + case vex_66: + case vex_f3: + vcpu_must_have_sse2(); + stub[0] = 0x66; /* movdqa */ + get_fpu(X86EMUL_FPU_xmm, &fic); + ea.bytes = 16; + break; + case vex_none: + if ( b != 0xe7 ) + vcpu_must_have_mmx(); + else + vcpu_must_have_sse(); + get_fpu(X86EMUL_FPU_mmx, &fic); + ea.bytes = 8; + break; + default: + goto cannot_emulate; + } + } + else + { + fail_if((vex.opcx != vex_0f) || vex.reg || + ((vex.pfx != vex_66) && (vex.pfx != vex_f3))); + vcpu_must_have_avx(); + get_fpu(X86EMUL_FPU_ymm, &fic); + ea.bytes = 16 << vex.l; } - get_fpu(X86EMUL_FPU_mmx, &fic); - asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" ); - put_fpu(&fic); - break; - } - - case 0x7f: /* movq mm,mm/m64 */ { - uint8_t stub[] = { 0x0f, 0x7f, modrm, 0xc3 }; - struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 }; - uint64_t val; - if ( ea.type == OP_MEM ) - stub[2] = modrm & 0x38; /* movq %mmN,(%eax) */ - get_fpu(X86EMUL_FPU_mmx, &fic); - asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" ); - put_fpu(&fic); if ( ea.type == OP_MEM ) { - unsigned long lval = (uint32_t)val, hval = (uint32_t)(val >> 32); - if ( (rc = ops->write(ea.mem.seg, ea.mem.off+0, &lval, 4, ctxt)) || - (rc = ops->write(ea.mem.seg, ea.mem.off+4, &hval, 4, ctxt)) ) - goto done; + /* XXX enable once there is ops->ea() or equivalent + generate_exception_if((vex.pfx == vex_66) && + (ops->ea(ea.mem.seg, ea.mem.off) + & (ea.bytes - 1)), EXC_GP, 0); */ + if ( b == 0x6f ) + rc = ops->read(ea.mem.seg, ea.mem.off+0, mmvalp, + ea.bytes, ctxt); + /* convert memory operand to (%rAX) */ + rex_prefix &= ~REX_B; + vex.b = 1; + stub[4] &= 0x38; } - break; + if ( !rc ) + { + copy_REX_VEX(stub, rex_prefix, vex); + asm volatile ( "call *%0" : : "r" (stub), "a" (mmvalp) + : "memory" ); + } + put_fpu(&fic); + if ( !rc && (b != 0x6f) && (ea.type == OP_MEM) ) + rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp, + ea.bytes, ctxt); + goto done; } case 0x80 ... 0x8f: /* jcc (near) */ { diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h index 286279f10a..85bc4bcf76 100644 --- a/xen/arch/x86/x86_emulate/x86_emulate.h +++ b/xen/arch/x86/x86_emulate/x86_emulate.h @@ -99,7 +99,9 @@ struct segment_register { /* FPU sub-types which may be requested via ->get_fpu(). */ enum x86_emulate_fpu_type { X86EMUL_FPU_fpu, /* Standard FPU coprocessor instruction set */ - X86EMUL_FPU_mmx /* MMX instruction set (%mm0-%mm7) */ + X86EMUL_FPU_mmx, /* MMX instruction set (%mm0-%mm7) */ + X86EMUL_FPU_xmm, /* SSE instruction set (%xmm0-%xmm7/15) */ + X86EMUL_FPU_ymm /* AVX/XOP instruction set (%ymm0-%ymm7/15) */ }; /* diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h index f34ca79b9c..a1b52edd2d 100644 --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -218,7 +218,7 @@ #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) - +#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX) #define cpu_has_lwp boot_cpu_has(X86_FEATURE_LWP) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) |