aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkfraser@localhost.localdomain <kfraser@localhost.localdomain>2007-08-08 17:28:13 +0100
committerkfraser@localhost.localdomain <kfraser@localhost.localdomain>2007-08-08 17:28:13 +0100
commitba2a198fc70dc65dcb7f3a54cd82084da0e2dfa6 (patch)
tree9c7cc1df5a34df549ff061f50d124dad74af4a00
parentd2b8c776792ecbb0384effc0f99204d41726eebb (diff)
downloadxen-ba2a198fc70dc65dcb7f3a54cd82084da0e2dfa6.tar.gz
xen-ba2a198fc70dc65dcb7f3a54cd82084da0e2dfa6.tar.bz2
xen-ba2a198fc70dc65dcb7f3a54cd82084da0e2dfa6.zip
hvm: Handle hw_cr[] array a bit more sanely.
SVM for the most part does not need to use it at all, and this makes the code clearer. Signed-off-by: Keir Fraser <keir@xensource.com>
-rw-r--r--xen/arch/x86/hvm/hvm.c7
-rw-r--r--xen/arch/x86/hvm/svm/svm.c86
-rw-r--r--xen/arch/x86/hvm/svm/vmcb.c17
-rw-r--r--xen/arch/x86/hvm/vmx/vmcs.c78
-rw-r--r--xen/arch/x86/hvm/vmx/vmx.c9
-rw-r--r--xen/include/asm-x86/hvm/vcpu.h7
6 files changed, 98 insertions, 106 deletions
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index b72420e8ad..b8a249de59 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -596,9 +596,6 @@ int hvm_set_cr0(unsigned long value)
}
v->arch.hvm_vcpu.guest_cr[0] = value;
- v->arch.hvm_vcpu.hw_cr[0] = value;
- if ( !paging_mode_hap(v->domain) )
- v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_PG | X86_CR0_WP;
hvm_update_guest_cr(v, 0);
if ( (value ^ old_value) & X86_CR0_PG )
@@ -672,10 +669,6 @@ int hvm_set_cr4(unsigned long value)
old_cr = v->arch.hvm_vcpu.guest_cr[4];
v->arch.hvm_vcpu.guest_cr[4] = value;
- v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK;
- if ( paging_mode_hap(v->domain) )
- v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;
- v->arch.hvm_vcpu.hw_cr[4] |= value;
hvm_update_guest_cr(v, 4);
/* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 22bef7c551..5a23e7cbfb 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -59,8 +59,9 @@ int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
int inst_len);
asmlinkage void do_IRQ(struct cpu_user_regs *);
-static int svm_reset_to_realmode(struct vcpu *v,
- struct cpu_user_regs *regs);
+static int svm_reset_to_realmode(
+ struct vcpu *v, struct cpu_user_regs *regs);
+static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);
/* va of hardware host save area */
static void *hsa[NR_CPUS] __read_mostly;
@@ -343,48 +344,26 @@ int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
vmcb->rsp = c->rsp;
vmcb->rflags = c->rflags;
- v->arch.hvm_vcpu.guest_cr[0] = c->cr0;
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] =
- c->cr0 | X86_CR0_WP | X86_CR0_ET | X86_CR0_PG;
+ v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;
+ svm_update_guest_cr(v, 0);
v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
+ svm_update_guest_cr(v, 2);
+ v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
+ svm_update_guest_cr(v, 4);
+
#ifdef HVM_DEBUG_SUSPEND
printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
- __func__,
- c->cr3,
- c->cr0,
- c->cr4);
+ __func__, c->cr3, c->cr0, c->cr4);
#endif
- if ( !hvm_paging_enabled(v) )
- {
- printk("%s: paging not enabled.\n", __func__);
- goto skip_cr3;
- }
-
- if ( c->cr3 == v->arch.hvm_vcpu.guest_cr[3] )
+ if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) )
{
- /*
- * This is simple TLB flush, implying the guest has
- * removed some translation or changed page attributes.
- * We simply invalidate the shadow.
- */
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if ( mfn != pagetable_get_pfn(v->arch.guest_table) )
- goto bad_cr3;
- }
- else
- {
- /*
- * If different, make a shadow. Check if the PDBR is valid
- * first.
- */
HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
goto bad_cr3;
-
old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
v->arch.guest_table = pagetable_from_pfn(mfn);
if (old_base_mfn)
@@ -392,10 +371,6 @@ int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
}
- skip_cr3:
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] = c->cr4 | HVM_CR4_HOST_MASK;
- v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
-
vmcb->idtr.limit = c->idtr_limit;
vmcb->idtr.base = c->idtr_base;
@@ -449,10 +424,6 @@ int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
if ( paging_mode_hap(v->domain) )
{
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0];
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] =
- v->arch.hvm_vcpu.guest_cr[4] | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
- vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3] = c->cr3;
vmcb->np_enable = 1;
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
@@ -586,17 +557,22 @@ static void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
switch ( cr )
{
case 0:
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0];
+ vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0];
+ if ( !paging_mode_hap(v->domain) )
+ vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
break;
case 2:
- vmcb->cr2 = v->arch.hvm_vcpu.hw_cr[2];
+ vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2];
break;
case 3:
vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];
svm_asid_inv_asid(v);
break;
case 4:
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4];
+ vmcb->cr4 = HVM_CR4_HOST_MASK;
+ if ( paging_mode_hap(v->domain) )
+ vmcb->cr4 &= ~X86_CR4_PAE;
+ vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4];
break;
default:
BUG();
@@ -724,7 +700,7 @@ static void svm_stts(struct vcpu *v)
if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
{
v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device;
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_TS;
+ vmcb->cr0 |= X86_CR0_TS;
}
}
@@ -1045,7 +1021,7 @@ static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS;
+ vmcb->cr0 &= ~X86_CR0_TS;
}
/* Reserved bits ECX: [31:14], [12:4], [2:1]*/
@@ -1774,7 +1750,7 @@ static void svm_cr_access(
/* TS being cleared means that it's time to restore fpu state. */
setup_fpu(current);
vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS; /* clear TS */
+ vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS; /* clear TS */
break;
@@ -2085,22 +2061,16 @@ static int svm_reset_to_realmode(struct vcpu *v,
memset(regs, 0, sizeof(struct cpu_user_regs));
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] =
- X86_CR0_ET | X86_CR0_PG | X86_CR0_WP;
v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET;
+ svm_update_guest_cr(v, 0);
- vmcb->cr2 = 0;
- vmcb->efer = EFER_SVME;
+ v->arch.hvm_vcpu.guest_cr[2] = 0;
+ svm_update_guest_cr(v, 2);
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK;
v->arch.hvm_vcpu.guest_cr[4] = 0;
+ svm_update_guest_cr(v, 4);
- if ( paging_mode_hap(v->domain) )
- {
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0];
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] =
- v->arch.hvm_vcpu.guest_cr[4] | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
- }
+ vmcb->efer = EFER_SVME;
/* This will jump to ROMBIOS */
vmcb->rip = 0xFFF0;
@@ -2231,7 +2201,7 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
unsigned long va;
va = vmcb->exitinfo2;
regs->error_code = vmcb->exitinfo1;
- HVM_DBG_LOG(DBG_LEVEL_VMMU,
+ HVM_DBG_LOG(DBG_LEVEL_VMMU,
"eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
(unsigned long)regs->eax, (unsigned long)regs->ebx,
(unsigned long)regs->ecx, (unsigned long)regs->edx,
diff --git a/xen/arch/x86/hvm/svm/vmcb.c b/xen/arch/x86/hvm/svm/vmcb.c
index f532ae6ae1..e704747393 100644
--- a/xen/arch/x86/hvm/svm/vmcb.c
+++ b/xen/arch/x86/hvm/svm/vmcb.c
@@ -216,28 +216,19 @@ static int construct_vmcb(struct vcpu *v)
vmcb->tr.base = 0;
vmcb->tr.limit = 0xff;
- /* Guest CR0. */
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = read_cr0();
- v->arch.hvm_vcpu.guest_cr[0] =
- v->arch.hvm_vcpu.hw_cr[0] & ~(X86_CR0_PG | X86_CR0_TS);
+ v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_TS;
+ hvm_update_guest_cr(v, 0);
- /* Guest CR4. */
- v->arch.hvm_vcpu.guest_cr[4] =
- read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE);
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] =
- v->arch.hvm_vcpu.guest_cr[4] | HVM_CR4_HOST_MASK;
+ v->arch.hvm_vcpu.guest_cr[4] = 0;
+ hvm_update_guest_cr(v, 4);
paging_update_paging_modes(v);
- vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];
if ( paging_mode_hap(v->domain) )
{
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0];
vmcb->np_enable = 1; /* enable nested paging */
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] = v->arch.hvm_vcpu.guest_cr[4] =
- HVM_CR4_HOST_MASK & ~X86_CR4_PAE;
vmcb->exception_intercepts = HVM_TRAP_MASK;
/* No point in intercepting CR3/4 reads, because the hardware
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 22242b4fec..0795420820 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -315,34 +315,69 @@ void vmx_cpu_down(void)
local_irq_restore(flags);
}
+struct foreign_vmcs {
+ struct vcpu *v;
+ unsigned int count;
+};
+static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs);
+
void vmx_vmcs_enter(struct vcpu *v)
{
+ struct foreign_vmcs *fv;
+
/*
* NB. We must *always* run an HVM VCPU on its own VMCS, except for
* vmx_vmcs_enter/exit critical regions.
*/
- if ( v == current )
+ if ( likely(v == current) )
return;
- vcpu_pause(v);
- spin_lock(&v->arch.hvm_vmx.vmcs_lock);
+ fv = &this_cpu(foreign_vmcs);
- vmx_clear_vmcs(v);
- vmx_load_vmcs(v);
+ if ( fv->v == v )
+ {
+ BUG_ON(fv->count == 0);
+ }
+ else
+ {
+ BUG_ON(fv->v != NULL);
+ BUG_ON(fv->count != 0);
+
+ vcpu_pause(v);
+ spin_lock(&v->arch.hvm_vmx.vmcs_lock);
+
+ vmx_clear_vmcs(v);
+ vmx_load_vmcs(v);
+
+ fv->v = v;
+ }
+
+ fv->count++;
}
void vmx_vmcs_exit(struct vcpu *v)
{
- if ( v == current )
+ struct foreign_vmcs *fv;
+
+ if ( likely(v == current) )
return;
- /* Don't confuse vmx_do_resume (for @v or @current!) */
- vmx_clear_vmcs(v);
- if ( is_hvm_vcpu(current) )
- vmx_load_vmcs(current);
+ fv = &this_cpu(foreign_vmcs);
+ BUG_ON(fv->v != v);
+ BUG_ON(fv->count == 0);
+
+ if ( --fv->count == 0 )
+ {
+ /* Don't confuse vmx_do_resume (for @v or @current!) */
+ vmx_clear_vmcs(v);
+ if ( is_hvm_vcpu(current) )
+ vmx_load_vmcs(current);
+
+ spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
+ vcpu_unpause(v);
- spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
- vcpu_unpause(v);
+ fv->v = NULL;
+ }
}
struct xgt_desc {
@@ -380,7 +415,6 @@ static void vmx_set_host_env(struct vcpu *v)
static void construct_vmcs(struct vcpu *v)
{
- unsigned long cr0, cr4;
union vmcs_arbytes arbytes;
vmx_vmcs_enter(v);
@@ -504,19 +538,11 @@ static void construct_vmcs(struct vcpu *v)
__vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault));
- /* Guest CR0. */
- cr0 = read_cr0();
- v->arch.hvm_vcpu.hw_cr[0] = cr0;
- __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);
- v->arch.hvm_vcpu.guest_cr[0] = cr0 & ~(X86_CR0_PG | X86_CR0_TS);
- __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]);
-
- /* Guest CR4. */
- cr4 = read_cr4();
- __vmwrite(GUEST_CR4, cr4 & ~X86_CR4_PSE);
- v->arch.hvm_vcpu.guest_cr[4] =
- cr4 & ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
- __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]);
+ v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
+ hvm_update_guest_cr(v, 0);
+
+ v->arch.hvm_vcpu.guest_cr[4] = 0;
+ hvm_update_guest_cr(v, 4);
if ( cpu_has_vmx_tpr_shadow )
{
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 72062dfa90..0d98626c36 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -963,6 +963,9 @@ static void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg,
}
reg->attr.bytes = (attr & 0xff) | ((attr >> 4) & 0xf00);
+ /* Unusable flag is folded into Present flag. */
+ if ( attr & (1u<<16) )
+ reg->attr.fields.p = 0;
}
/* Make sure that xen intercepts any FP accesses from current */
@@ -1062,7 +1065,9 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
switch ( cr )
{
case 0:
- v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_PE | X86_CR0_NE;
+ v->arch.hvm_vcpu.hw_cr[0] =
+ v->arch.hvm_vcpu.guest_cr[0] |
+ X86_CR0_PE | X86_CR0_NE | X86_CR0_PG | X86_CR0_WP;
__vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);
__vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]);
break;
@@ -1073,6 +1078,8 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
__vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]);
break;
case 4:
+ v->arch.hvm_vcpu.hw_cr[4] =
+ v->arch.hvm_vcpu.guest_cr[4] | HVM_CR4_HOST_MASK;
__vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]);
break;
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index 990792e14a..c4ea85c189 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -33,7 +33,12 @@ struct hvm_vcpu {
unsigned long guest_cr[5];
unsigned long guest_efer;
- /* Processor-visible control-register values, while guest executes. */
+ /*
+ * Processor-visible control-register values, while guest executes.
+ * CR0, CR4: Used as a cache of VMCS contents by VMX only.
+ * CR1, CR2: Never used (guest_cr[2] is always processor-visible CR2).
+ * CR3: Always used and kept up to date by paging subsystem.
+ */
unsigned long hw_cr[5];
struct hvm_io_op io_op;