x86/xsave: fix migration from xsave-capable to xsave-incapable host

With CPUID features suitably masked this is supposed to work, but was completely broken (i.e. the case wasn't even considered when the original xsave save/restore code was written). First of all, xsave_enabled() wrongly returned the value of cpu_has_xsave, i.e. not even taking into consideration attributes of the vCPU in question. Instead this function ought to check whether the guest ever enabled xsave support (by writing a [non-zero] value to XCR0). As a result of this, a vCPU's xcr0 and xcr0_accum must no longer be initialized to XSTATE_FP_SSE (since that's a valid value a guest could write to XCR0), and the xsave/xrstor as well as the context switch code need to suitably account for this (by always enforcing at least this part of the state to be saved/loaded). This involves undoing large parts of c/s 22945:13a7d1f7f62c ("x86: add strictly sanity check for XSAVE/XRSTOR") - we need to cleanly distinguish between hardware capabilities and vCPU used features. Next both HVM and PV save code needed tweaking to not always save the full state supported by the underlying hardware, but just the parts that the guest actually used. Similarly the restore code should bail not just on state being restored that the hardware cannot handle, but also on inconsistent save state (inconsistent XCR0 settings or size of saved state not in line with XCR0). And finally the PV extended context get/set code needs to use slightly different logic than the HVM one, as here we can't just key off of xsave_enabled() (i.e. avoid doing anything if a guest doesn't use xsave) because the tools use this function to determine host capabilities as well as read/write vCPU state. The set operation in particular needs to be capable of cleanly dealing with input that consists of only the xcr0 and xcr0_accum values (if they're both zero then no further data is required). While for things to work correctly both sides (saving _and_ restoring host) need to run with the fixed code, afaict no breakage should occur if either side isn't up to date (other than the breakage that this patch attempts to fix). Signed-off-by: Jan Beulich <jbeulich@suse.com> Reviewed-by: Yang Zhang <yang.z.zhang@intel.com> Acked-by: Keir Fraser <keir@xen.org>
author: Jan Beulich <jbeulich@suse.com> 2013-09-09 14:36:54 +0200
committer: Jan Beulich <jbeulich@suse.com> 2013-09-09 14:36:54 +0200
commit: 4cc1344447a0458df5d222960f2adf1b65084fa8 (patch)
tree: de70a143c62b2544ca95636671554a99b6a4f95d /xen/arch/x86/xstate.c
parent: 6ad580d8685190585ba27e996be7e1a730d2a317 (diff)
download: xen-4cc1344447a0458df5d222960f2adf1b65084fa8.tar.gz
xen-4cc1344447a0458df5d222960f2adf1b65084fa8.tar.bz2
xen-4cc1344447a0458df5d222960f2adf1b65084fa8.zip
1 files changed, 51 insertions, 12 deletions
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index 7e459d0256..c6b8dcc993 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -21,7 +21,7 @@ bool_t __read_mostly cpu_has_xsaveopt;
  * the supported and enabled features on the processor, including the
  * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known.
  */
-u32 xsave_cntxt_size;
+static u32 __read_mostly xsave_cntxt_size;
 
 /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
 u64 xfeature_mask;
@@ -206,13 +206,13 @@ void xrstor(struct vcpu *v, uint64_t mask)
 
 bool_t xsave_enabled(const struct vcpu *v)
 {
-    if ( cpu_has_xsave )
-    {
-        ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE);
-        ASSERT(v->arch.xsave_area);
-    }
+    if ( !cpu_has_xsave )
+        return 0;
 
-    return cpu_has_xsave;	
+    ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE);
+    ASSERT(v->arch.xsave_area);
+
+    return !!v->arch.xcr0_accum;
 }
 
 int xstate_alloc_save_area(struct vcpu *v)
@@ -238,8 +238,8 @@ int xstate_alloc_save_area(struct vcpu *v)
     save_area->fpu_sse.mxcsr = MXCSR_DEFAULT;
 
     v->arch.xsave_area = save_area;
-    v->arch.xcr0 = XSTATE_FP_SSE;
-    v->arch.xcr0_accum = XSTATE_FP_SSE;
+    v->arch.xcr0 = 0;
+    v->arch.xcr0_accum = 0;
 
     return 0;
 }
@@ -257,7 +257,11 @@ void xstate_init(bool_t bsp)
     u64 feature_mask;
 
     if ( boot_cpu_data.cpuid_level < XSTATE_CPUID )
+    {
+        BUG_ON(!bsp);
+        setup_clear_cpu_cap(X86_FEATURE_XSAVE);
         return;
+    }
 
     cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
 
@@ -277,7 +281,6 @@ void xstate_init(bool_t bsp)
     set_in_cr4(X86_CR4_OSXSAVE);
     if ( !set_xcr0(feature_mask) )
         BUG();
-    cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
 
     if ( bsp )
     {
@@ -286,14 +289,14 @@ void xstate_init(bool_t bsp)
          * xsave_cntxt_size is the max size required by enabled features.
          * We know FP/SSE and YMM about eax, and nothing about edx at present.
          */
-        xsave_cntxt_size = ebx;
+        xsave_cntxt_size = xstate_ctxt_size(feature_mask);
         printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n",
             __func__, xsave_cntxt_size, xfeature_mask);
     }
     else
     {
         BUG_ON(xfeature_mask != feature_mask);
-        BUG_ON(xsave_cntxt_size != ebx);
+        BUG_ON(xsave_cntxt_size != xstate_ctxt_size(feature_mask));
     }
 
     /* Check XSAVEOPT feature. */
@@ -304,6 +307,42 @@ void xstate_init(bool_t bsp)
         BUG_ON(!cpu_has_xsaveopt != !(eax & XSTATE_FEATURE_XSAVEOPT));
 }
 
+unsigned int xstate_ctxt_size(u64 xcr0)
+{
+    u32 ebx = 0;
+
+    if ( xcr0 )
+    {
+        u64 act_xcr0 = get_xcr0();
+        u32 eax, ecx, edx;
+        bool_t ok = set_xcr0(xcr0);
+
+        ASSERT(ok);
+        cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+        ASSERT(ebx <= ecx);
+        ok = set_xcr0(act_xcr0);
+        ASSERT(ok);
+    }
+
+    return ebx;
+}
+
+int validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv, u64 xfeat_mask)
+{
+    if ( (xcr0_accum & ~xfeat_mask) ||
+         (xstate_bv & ~xcr0_accum) ||
+         (xcr0 & ~xcr0_accum) ||
+         !(xcr0 & XSTATE_FP) ||
+         ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) ||
+         ((xcr0_accum & XSTATE_YMM) && !(xcr0_accum & XSTATE_SSE)) )
+        return -EINVAL;
+
+    if ( xcr0_accum & ~xfeature_mask )
+        return -EOPNOTSUPP;
+
+    return 0;
+}
+
 int handle_xsetbv(u32 index, u64 new_bv)
 {
     struct vcpu *curr = current;
author	Jan Beulich <jbeulich@suse.com>	2013-09-09 14:36:54 +0200
committer	Jan Beulich <jbeulich@suse.com>	2013-09-09 14:36:54 +0200
commit	4cc1344447a0458df5d222960f2adf1b65084fa8 (patch)
tree	de70a143c62b2544ca95636671554a99b6a4f95d /xen/arch/x86/xstate.c
parent	6ad580d8685190585ba27e996be7e1a730d2a317 (diff)
download	xen-4cc1344447a0458df5d222960f2adf1b65084fa8.tar.gz xen-4cc1344447a0458df5d222960f2adf1b65084fa8.tar.bz2 xen-4cc1344447a0458df5d222960f2adf1b65084fa8.zip