aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeir Fraser <keir@xen.org>2010-11-03 08:15:20 +0000
committerKeir Fraser <keir@xen.org>2010-11-03 08:15:20 +0000
commit2d741abb30af1a2abc45f1cba4c8e78e6d1b07d0 (patch)
treeb1f3b5413d754a15302417d98c4b20167aad961b
parent3b0246ea0347bacdfcb751a920899837f29e13d8 (diff)
downloadxen-2d741abb30af1a2abc45f1cba4c8e78e6d1b07d0.tar.gz
xen-2d741abb30af1a2abc45f1cba4c8e78e6d1b07d0.tar.bz2
xen-2d741abb30af1a2abc45f1cba4c8e78e6d1b07d0.zip
x86: Xsave support for PV guests.
Signed-off-by: Shan Haitao <haitao.shan@intel.com> Signed-off-by: Han Weidong <weidong.han@intel.com>
-rw-r--r--tools/libxc/xc_cpuid_x86.c1
-rw-r--r--xen/arch/x86/domain.c30
-rw-r--r--xen/arch/x86/hvm/hvm.c13
-rw-r--r--xen/arch/x86/hvm/vmx/vmx.c8
-rw-r--r--xen/arch/x86/i387.c14
-rw-r--r--xen/arch/x86/traps.c51
-rw-r--r--xen/include/asm-x86/domain.h20
-rw-r--r--xen/include/asm-x86/hvm/vcpu.h9
-rw-r--r--xen/include/asm-x86/i387.h22
9 files changed, 121 insertions, 47 deletions
diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c
index e517eb7733..68e99a1738 100644
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -323,7 +323,6 @@ static void xc_cpuid_pv_policy(
clear_bit(X86_FEATURE_XTPR, regs[2]);
clear_bit(X86_FEATURE_PDCM, regs[2]);
clear_bit(X86_FEATURE_DCA, regs[2]);
- clear_bit(X86_FEATURE_XSAVE, regs[2]);
set_bit(X86_FEATURE_HYPERVISOR, regs[2]);
break;
case 0x80000001:
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index b67ddd6a00..7356efff3f 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -343,10 +343,26 @@ int vcpu_initialise(struct vcpu *v)
paging_vcpu_init(v);
+ if ( cpu_has_xsave )
+ {
+ /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
+ void *xsave_area = _xmalloc(xsave_cntxt_size, 64);
+ if ( xsave_area == NULL )
+ return -ENOMEM;
+
+ xsave_init_save_area(xsave_area);
+ v->arch.xsave_area = xsave_area;
+ v->arch.xcr0 = XSTATE_FP_SSE;
+ v->arch.xcr0_accum = XSTATE_FP_SSE;
+ }
+
if ( is_hvm_domain(d) )
{
if ( (rc = hvm_vcpu_initialise(v)) != 0 )
+ {
+ xfree(v->arch.xsave_area);
return rc;
+ }
}
else
{
@@ -376,7 +392,13 @@ int vcpu_initialise(struct vcpu *v)
spin_lock_init(&v->arch.shadow_ldt_lock);
- return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);
+ rc = 0;
+ if ( is_pv_32on64_vcpu(v) )
+ rc = setup_compat_l4(v);
+ if ( !rc )
+ xfree(v->arch.xsave_area);
+
+ return rc;
}
void vcpu_destroy(struct vcpu *v)
@@ -384,6 +406,8 @@ void vcpu_destroy(struct vcpu *v)
if ( is_pv_32on64_vcpu(v) )
release_compat_l4(v);
+ xfree(v->arch.xsave_area);
+
if ( is_hvm_vcpu(v) )
hvm_vcpu_destroy(v);
}
@@ -592,6 +616,8 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *v, unsigned long guest_cr4)
hv_cr4_mask &= ~X86_CR4_DE;
if ( cpu_has_fsgsbase && !is_pv_32bit_domain(v->domain) )
hv_cr4_mask &= ~X86_CR4_FSGSBASE;
+ if ( cpu_has_xsave )
+ hv_cr4_mask &= ~X86_CR4_OSXSAVE;
if ( (guest_cr4 & hv_cr4_mask) != (hv_cr4 & hv_cr4_mask) )
gdprintk(XENLOG_WARNING,
@@ -1367,6 +1393,8 @@ static void __context_switch(void)
memcpy(stack_regs,
&n->arch.guest_context.user_regs,
CTXT_SWITCH_STACK_BYTES);
+ if ( cpu_has_xsave && n->arch.xcr0 != get_xcr0() )
+ set_xcr0(n->arch.xcr0);
n->arch.ctxt_switch_to(n);
}
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 45afb530d6..43d907a2ee 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -805,18 +805,6 @@ int hvm_vcpu_initialise(struct vcpu *v)
hvm_asid_flush_vcpu(v);
- if ( cpu_has_xsave )
- {
- /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
- void *xsave_area = _xmalloc(xsave_cntxt_size, 64);
- if ( xsave_area == NULL )
- return -ENOMEM;
-
- xsave_init_save_area(xsave_area);
- v->arch.hvm_vcpu.xsave_area = xsave_area;
- v->arch.hvm_vcpu.xcr0 = XSTATE_FP_SSE;
- }
-
if ( (rc = vlapic_init(v)) != 0 )
goto fail1;
@@ -879,7 +867,6 @@ void hvm_vcpu_destroy(struct vcpu *v)
hvm_vcpu_cacheattr_destroy(v);
vlapic_destroy(v);
hvm_funcs.vcpu_destroy(v);
- xfree(v->arch.hvm_vcpu.xsave_area);
/* Event channel is already freed by evtchn_destroy(). */
/*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 24c2331f47..e24f0093e4 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -652,10 +652,7 @@ static void vmx_ctxt_switch_to(struct vcpu *v)
struct domain *d = v->domain;
unsigned long old_cr4 = read_cr4(), new_cr4 = mmu_cr4_features;
- /* HOST_CR4 in VMCS is always mmu_cr4_features and
- * CR4_OSXSAVE(if supported). Sync CR4 now. */
- if ( cpu_has_xsave )
- new_cr4 |= X86_CR4_OSXSAVE;
+ /* HOST_CR4 in VMCS is always mmu_cr4_features. Sync CR4 now. */
if ( old_cr4 != new_cr4 )
write_cr4(new_cr4);
@@ -2215,7 +2212,8 @@ static int vmx_handle_xsetbv(u64 new_bv)
if ( (xfeature_mask & XSTATE_YMM & new_bv) && !(new_bv & XSTATE_SSE) )
goto err;
- v->arch.hvm_vcpu.xcr0 = new_bv;
+ v->arch.xcr0 = new_bv;
+ v->arch.xcr0_accum |= new_bv;
set_xcr0(new_bv);
return 0;
err:
diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
index fa16fa9c5c..27b49234ed 100644
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -33,9 +33,14 @@ void save_init_fpu(struct vcpu *v)
if ( cr0 & X86_CR0_TS )
clts();
- if ( cpu_has_xsave && is_hvm_vcpu(v) )
+ if ( cpu_has_xsave )
{
+ /* XCR0 normally represents what guest OS set. In case of Xen itself,
+ * we set all accumulated feature mask before doing save/restore.
+ */
+ set_xcr0(v->arch.xcr0_accum);
xsave(v);
+ set_xcr0(v->arch.xcr0);
}
else if ( cpu_has_fxsr )
{
@@ -144,6 +149,9 @@ u32 xsave_cntxt_size;
/* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
u64 xfeature_mask;
+/* Cached xcr0 for fast read */
+DEFINE_PER_CPU(uint64_t, xcr0);
+
void xsave_init(void)
{
u32 eax, ebx, ecx, edx;
@@ -171,13 +179,11 @@ void xsave_init(void)
BUG_ON(ecx < min_size);
/*
- * We will only enable the features we know for hvm guest. Here we use
- * set/clear CR4_OSXSAVE and re-run cpuid to get xsave_cntxt_size.
+ * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size.
*/
set_in_cr4(X86_CR4_OSXSAVE);
set_xcr0(eax & XCNTXT_MASK);
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
- clear_in_cr4(X86_CR4_OSXSAVE);
if ( cpu == 0 )
{
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index fa3f48ebb8..f91e3c8ae3 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -795,7 +795,6 @@ static void pv_cpuid(struct cpu_user_regs *regs)
__clear_bit(X86_FEATURE_XTPR % 32, &c);
__clear_bit(X86_FEATURE_PDCM % 32, &c);
__clear_bit(X86_FEATURE_DCA % 32, &c);
- __clear_bit(X86_FEATURE_XSAVE % 32, &c);
if ( !cpu_has_apic )
__clear_bit(X86_FEATURE_X2APIC % 32, &c);
__set_bit(X86_FEATURE_HYPERVISOR % 32, &c);
@@ -1715,7 +1714,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
enum { lm_seg_none, lm_seg_fs, lm_seg_gs } lm_ovr = lm_seg_none;
int rc;
unsigned int port, i, data_sel, ar, data, bpmatch = 0;
- unsigned int op_bytes, op_default, ad_bytes, ad_default;
+ unsigned int op_bytes, op_default, ad_bytes, ad_default, opsize_prefix= 0;
#define rd_ad(reg) (ad_bytes >= sizeof(regs->reg) \
? regs->reg \
: ad_bytes == 4 \
@@ -1751,6 +1750,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
switch ( opcode = insn_fetch(u8, code_base, eip, code_limit) )
{
case 0x66: /* operand-size override */
+ opsize_prefix = 1;
op_bytes = op_default ^ 6; /* switch between 2/4 bytes */
continue;
case 0x67: /* address-size override */
@@ -2051,13 +2051,48 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
goto fail;
switch ( opcode )
{
- case 0x1: /* RDTSCP */
- if ( (v->arch.guest_context.ctrlreg[4] & X86_CR4_TSD) &&
- !guest_kernel_mode(v, regs) )
- goto fail;
- if ( insn_fetch(u8, code_base, eip, code_limit) != 0xf9 )
+ case 0x1: /* RDTSCP and XSETBV */
+ switch ( insn_fetch(u8, code_base, eip, code_limit) )
+ {
+ case 0xf9: /* RDTSCP */
+ if ( (v->arch.guest_context.ctrlreg[4] & X86_CR4_TSD) &&
+ !guest_kernel_mode(v, regs) )
+ goto fail;
+ pv_soft_rdtsc(v, regs, 1);
+ break;
+ case 0xd1: /* XSETBV */
+ {
+ u64 new_xfeature = (u32)regs->eax | ((u64)regs->edx << 32);
+
+ if ( lock || rep_prefix || opsize_prefix
+ || !(v->arch.guest_context.ctrlreg[4] & X86_CR4_OSXSAVE) )
+ {
+ do_guest_trap(TRAP_invalid_op, regs, 0);
+ goto skip;
+ }
+
+ if ( !guest_kernel_mode(v, regs) )
+ goto fail;
+
+ switch ( (u32)regs->ecx )
+ {
+ case XCR_XFEATURE_ENABLED_MASK:
+ /* bit 0 of XCR0 must be set and reserved bit must not be set */
+ if ( !(new_xfeature & XSTATE_FP) || (new_xfeature & ~xfeature_mask) )
+ goto fail;
+
+ v->arch.xcr0 = new_xfeature;
+ v->arch.xcr0_accum |= new_xfeature;
+ set_xcr0(new_xfeature);
+ break;
+ default:
+ goto fail;
+ }
+ break;
+ }
+ default:
goto fail;
- pv_soft_rdtsc(v, regs, 1);
+ }
break;
case 0x06: /* CLTS */
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 7549126504..15b050ac0e 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -400,6 +400,23 @@ struct arch_vcpu
pagetable_t monitor_table; /* (MFN) hypervisor PT (for HVM) */
unsigned long cr3; /* (MA) value to install in HW CR3 */
+ /*
+ * The save area for Processor Extended States and the bitmask of the
+ * XSAVE/XRSTOR features. They are used by: 1) when a vcpu (which has
+ * dirtied FPU/SSE) is scheduled out we XSAVE the states here; 2) in
+ * #NM handler, we XRSTOR the states we XSAVE-ed;
+ */
+ void *xsave_area;
+ uint64_t xcr0;
+ /* Accumulated eXtended features mask for using XSAVE/XRESTORE by Xen
+ * itself, as we can never know whether guest OS depends on content
+ * preservation whenever guest OS clears one feature flag (for example,
+ * temporarily).
+ * However, processor should not be able to touch eXtended states before
+ * it explicitly enables it via xcr0.
+ */
+ uint64_t xcr0_accum;
+
/* Current LDT details. */
unsigned long shadow_ldt_mapcnt;
spinlock_t shadow_ldt_lock;
@@ -435,7 +452,8 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
#define pv_guest_cr4_to_real_cr4(v) \
(((v)->arch.guest_context.ctrlreg[4] \
| (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE)) \
- | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \
+ | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0) \
+ | ((cpu_has_xsave)? X86_CR4_OSXSAVE : 0)) \
& ~X86_CR4_DE)
#define real_cr4_to_pv_guest_cr4(c) \
((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | X86_CR4_OSXSAVE))
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index 53ef98320f..1d72ecfc60 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -49,15 +49,6 @@ struct hvm_vcpu {
*/
unsigned long hw_cr[5];
- /*
- * The save area for Processor Extended States and the bitmask of the
- * XSAVE/XRSTOR features. They are used by: 1) when a vcpu (which has
- * dirtied FPU/SSE) is scheduled out we XSAVE the states here; 2) in
- * #NM handler, we XRSTOR the states we XSAVE-ed;
- */
- void *xsave_area;
- uint64_t xcr0;
-
struct vlapic vlapic;
s64 cache_tsc_offset;
u64 guest_time;
diff --git a/xen/include/asm-x86/i387.h b/xen/include/asm-x86/i387.h
index ba365c0b3c..ba773e99ed 100644
--- a/xen/include/asm-x86/i387.h
+++ b/xen/include/asm-x86/i387.h
@@ -49,6 +49,8 @@ struct xsave_struct
#define REX_PREFIX
#endif
+DECLARE_PER_CPU(uint64_t, xcr0);
+
static inline void xsetbv(u32 index, u64 xfeatures)
{
u32 hi = xfeatures >> 32;
@@ -60,14 +62,20 @@ static inline void xsetbv(u32 index, u64 xfeatures)
static inline void set_xcr0(u64 xfeatures)
{
+ this_cpu(xcr0) = xfeatures;
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures);
}
+static inline uint64_t get_xcr0(void)
+{
+ return this_cpu(xcr0);
+}
+
static inline void xsave(struct vcpu *v)
{
struct xsave_struct *ptr;
- ptr =(struct xsave_struct *)v->arch.hvm_vcpu.xsave_area;
+ ptr =(struct xsave_struct *)v->arch.xsave_area;
asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x27"
:
@@ -79,7 +87,7 @@ static inline void xrstor(struct vcpu *v)
{
struct xsave_struct *ptr;
- ptr =(struct xsave_struct *)v->arch.hvm_vcpu.xsave_area;
+ ptr =(struct xsave_struct *)v->arch.xsave_area;
asm volatile (".byte " REX_PREFIX "0x0f,0xae,0x2f"
:
@@ -108,14 +116,18 @@ static inline void setup_fpu(struct vcpu *v)
if ( !v->fpu_dirtied )
{
v->fpu_dirtied = 1;
- if ( cpu_has_xsave && is_hvm_vcpu(v) )
+ if ( cpu_has_xsave )
{
if ( !v->fpu_initialised )
v->fpu_initialised = 1;
- set_xcr0(v->arch.hvm_vcpu.xcr0 | XSTATE_FP_SSE);
+ /* XCR0 normally represents what guest OS set. In case of Xen
+ * itself, we set all supported feature mask before doing
+ * save/restore.
+ */
+ set_xcr0(v->arch.xcr0_accum);
xrstor(v);
- set_xcr0(v->arch.hvm_vcpu.xcr0);
+ set_xcr0(v->arch.xcr0);
}
else
{