From ba2a198fc70dc65dcb7f3a54cd82084da0e2dfa6 Mon Sep 17 00:00:00 2001 From: "kfraser@localhost.localdomain" Date: Wed, 8 Aug 2007 17:28:13 +0100 Subject: hvm: Handle hw_cr[] array a bit more sanely. SVM for the most part does not need to use it at all, and this makes the code clearer. Signed-off-by: Keir Fraser --- xen/arch/x86/hvm/hvm.c | 7 ---- xen/arch/x86/hvm/svm/svm.c | 86 ++++++++++++++---------------------------- xen/arch/x86/hvm/svm/vmcb.c | 17 ++------- xen/arch/x86/hvm/vmx/vmcs.c | 78 +++++++++++++++++++++++++------------- xen/arch/x86/hvm/vmx/vmx.c | 9 ++++- xen/include/asm-x86/hvm/vcpu.h | 7 +++- 6 files changed, 98 insertions(+), 106 deletions(-) diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index b72420e8ad..b8a249de59 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -596,9 +596,6 @@ int hvm_set_cr0(unsigned long value) } v->arch.hvm_vcpu.guest_cr[0] = value; - v->arch.hvm_vcpu.hw_cr[0] = value; - if ( !paging_mode_hap(v->domain) ) - v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_PG | X86_CR0_WP; hvm_update_guest_cr(v, 0); if ( (value ^ old_value) & X86_CR0_PG ) @@ -672,10 +669,6 @@ int hvm_set_cr4(unsigned long value) old_cr = v->arch.hvm_vcpu.guest_cr[4]; v->arch.hvm_vcpu.guest_cr[4] = value; - v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK; - if ( paging_mode_hap(v->domain) ) - v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE; - v->arch.hvm_vcpu.hw_cr[4] |= value; hvm_update_guest_cr(v, 4); /* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */ diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index 22bef7c551..5a23e7cbfb 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -59,8 +59,9 @@ int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int inst_len); asmlinkage void do_IRQ(struct cpu_user_regs *); -static int svm_reset_to_realmode(struct vcpu *v, - struct cpu_user_regs *regs); +static int svm_reset_to_realmode( + struct vcpu *v, struct cpu_user_regs *regs); +static void svm_update_guest_cr(struct vcpu *v, unsigned int cr); /* va of hardware host save area */ static void *hsa[NR_CPUS] __read_mostly; @@ -343,48 +344,26 @@ int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c) vmcb->rsp = c->rsp; vmcb->rflags = c->rflags; - v->arch.hvm_vcpu.guest_cr[0] = c->cr0; - vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = - c->cr0 | X86_CR0_WP | X86_CR0_ET | X86_CR0_PG; + v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET; + svm_update_guest_cr(v, 0); v->arch.hvm_vcpu.guest_cr[2] = c->cr2; + svm_update_guest_cr(v, 2); + v->arch.hvm_vcpu.guest_cr[4] = c->cr4; + svm_update_guest_cr(v, 4); + #ifdef HVM_DEBUG_SUSPEND printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n", - __func__, - c->cr3, - c->cr0, - c->cr4); + __func__, c->cr3, c->cr0, c->cr4); #endif - if ( !hvm_paging_enabled(v) ) - { - printk("%s: paging not enabled.\n", __func__); - goto skip_cr3; - } - - if ( c->cr3 == v->arch.hvm_vcpu.guest_cr[3] ) + if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) ) { - /* - * This is simple TLB flush, implying the guest has - * removed some translation or changed page attributes. - * We simply invalidate the shadow. - */ - mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); - if ( mfn != pagetable_get_pfn(v->arch.guest_table) ) - goto bad_cr3; - } - else - { - /* - * If different, make a shadow. Check if the PDBR is valid - * first. - */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3); mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) goto bad_cr3; - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); v->arch.guest_table = pagetable_from_pfn(mfn); if (old_base_mfn) @@ -392,10 +371,6 @@ int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c) v->arch.hvm_vcpu.guest_cr[3] = c->cr3; } - skip_cr3: - vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] = c->cr4 | HVM_CR4_HOST_MASK; - v->arch.hvm_vcpu.guest_cr[4] = c->cr4; - vmcb->idtr.limit = c->idtr_limit; vmcb->idtr.base = c->idtr_base; @@ -449,10 +424,6 @@ int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c) if ( paging_mode_hap(v->domain) ) { - vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0]; - vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] = - v->arch.hvm_vcpu.guest_cr[4] | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE); - vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3] = c->cr3; vmcb->np_enable = 1; vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */ vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table); @@ -586,17 +557,22 @@ static void svm_update_guest_cr(struct vcpu *v, unsigned int cr) switch ( cr ) { case 0: - vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0]; + vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0]; + if ( !paging_mode_hap(v->domain) ) + vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP; break; case 2: - vmcb->cr2 = v->arch.hvm_vcpu.hw_cr[2]; + vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2]; break; case 3: vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3]; svm_asid_inv_asid(v); break; case 4: - vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4]; + vmcb->cr4 = HVM_CR4_HOST_MASK; + if ( paging_mode_hap(v->domain) ) + vmcb->cr4 &= ~X86_CR4_PAE; + vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4]; break; default: BUG(); @@ -724,7 +700,7 @@ static void svm_stts(struct vcpu *v) if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) { v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device; - vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_TS; + vmcb->cr0 |= X86_CR0_TS; } } @@ -1045,7 +1021,7 @@ static void svm_do_no_device_fault(struct vmcb_struct *vmcb) vmcb->exception_intercepts &= ~(1U << TRAP_no_device); if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) - vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS; + vmcb->cr0 &= ~X86_CR0_TS; } /* Reserved bits ECX: [31:14], [12:4], [2:1]*/ @@ -1774,7 +1750,7 @@ static void svm_cr_access( /* TS being cleared means that it's time to restore fpu state. */ setup_fpu(current); vmcb->exception_intercepts &= ~(1U << TRAP_no_device); - vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS; /* clear TS */ + vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */ v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS; /* clear TS */ break; @@ -2085,22 +2061,16 @@ static int svm_reset_to_realmode(struct vcpu *v, memset(regs, 0, sizeof(struct cpu_user_regs)); - vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = - X86_CR0_ET | X86_CR0_PG | X86_CR0_WP; v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET; + svm_update_guest_cr(v, 0); - vmcb->cr2 = 0; - vmcb->efer = EFER_SVME; + v->arch.hvm_vcpu.guest_cr[2] = 0; + svm_update_guest_cr(v, 2); - vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK; v->arch.hvm_vcpu.guest_cr[4] = 0; + svm_update_guest_cr(v, 4); - if ( paging_mode_hap(v->domain) ) - { - vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0]; - vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] = - v->arch.hvm_vcpu.guest_cr[4] | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE); - } + vmcb->efer = EFER_SVME; /* This will jump to ROMBIOS */ vmcb->rip = 0xFFF0; @@ -2231,7 +2201,7 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs) unsigned long va; va = vmcb->exitinfo2; regs->error_code = vmcb->exitinfo1; - HVM_DBG_LOG(DBG_LEVEL_VMMU, + HVM_DBG_LOG(DBG_LEVEL_VMMU, "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx", (unsigned long)regs->eax, (unsigned long)regs->ebx, (unsigned long)regs->ecx, (unsigned long)regs->edx, diff --git a/xen/arch/x86/hvm/svm/vmcb.c b/xen/arch/x86/hvm/svm/vmcb.c index f532ae6ae1..e704747393 100644 --- a/xen/arch/x86/hvm/svm/vmcb.c +++ b/xen/arch/x86/hvm/svm/vmcb.c @@ -216,28 +216,19 @@ static int construct_vmcb(struct vcpu *v) vmcb->tr.base = 0; vmcb->tr.limit = 0xff; - /* Guest CR0. */ - vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = read_cr0(); - v->arch.hvm_vcpu.guest_cr[0] = - v->arch.hvm_vcpu.hw_cr[0] & ~(X86_CR0_PG | X86_CR0_TS); + v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_TS; + hvm_update_guest_cr(v, 0); - /* Guest CR4. */ - v->arch.hvm_vcpu.guest_cr[4] = - read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE); - vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] = - v->arch.hvm_vcpu.guest_cr[4] | HVM_CR4_HOST_MASK; + v->arch.hvm_vcpu.guest_cr[4] = 0; + hvm_update_guest_cr(v, 4); paging_update_paging_modes(v); - vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3]; if ( paging_mode_hap(v->domain) ) { - vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0]; vmcb->np_enable = 1; /* enable nested paging */ vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */ vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table); - vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] = v->arch.hvm_vcpu.guest_cr[4] = - HVM_CR4_HOST_MASK & ~X86_CR4_PAE; vmcb->exception_intercepts = HVM_TRAP_MASK; /* No point in intercepting CR3/4 reads, because the hardware diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index 22242b4fec..0795420820 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -315,34 +315,69 @@ void vmx_cpu_down(void) local_irq_restore(flags); } +struct foreign_vmcs { + struct vcpu *v; + unsigned int count; +}; +static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs); + void vmx_vmcs_enter(struct vcpu *v) { + struct foreign_vmcs *fv; + /* * NB. We must *always* run an HVM VCPU on its own VMCS, except for * vmx_vmcs_enter/exit critical regions. */ - if ( v == current ) + if ( likely(v == current) ) return; - vcpu_pause(v); - spin_lock(&v->arch.hvm_vmx.vmcs_lock); + fv = &this_cpu(foreign_vmcs); - vmx_clear_vmcs(v); - vmx_load_vmcs(v); + if ( fv->v == v ) + { + BUG_ON(fv->count == 0); + } + else + { + BUG_ON(fv->v != NULL); + BUG_ON(fv->count != 0); + + vcpu_pause(v); + spin_lock(&v->arch.hvm_vmx.vmcs_lock); + + vmx_clear_vmcs(v); + vmx_load_vmcs(v); + + fv->v = v; + } + + fv->count++; } void vmx_vmcs_exit(struct vcpu *v) { - if ( v == current ) + struct foreign_vmcs *fv; + + if ( likely(v == current) ) return; - /* Don't confuse vmx_do_resume (for @v or @current!) */ - vmx_clear_vmcs(v); - if ( is_hvm_vcpu(current) ) - vmx_load_vmcs(current); + fv = &this_cpu(foreign_vmcs); + BUG_ON(fv->v != v); + BUG_ON(fv->count == 0); + + if ( --fv->count == 0 ) + { + /* Don't confuse vmx_do_resume (for @v or @current!) */ + vmx_clear_vmcs(v); + if ( is_hvm_vcpu(current) ) + vmx_load_vmcs(current); + + spin_unlock(&v->arch.hvm_vmx.vmcs_lock); + vcpu_unpause(v); - spin_unlock(&v->arch.hvm_vmx.vmcs_lock); - vcpu_unpause(v); + fv->v = NULL; + } } struct xgt_desc { @@ -380,7 +415,6 @@ static void vmx_set_host_env(struct vcpu *v) static void construct_vmcs(struct vcpu *v) { - unsigned long cr0, cr4; union vmcs_arbytes arbytes; vmx_vmcs_enter(v); @@ -504,19 +538,11 @@ static void construct_vmcs(struct vcpu *v) __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault)); - /* Guest CR0. */ - cr0 = read_cr0(); - v->arch.hvm_vcpu.hw_cr[0] = cr0; - __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); - v->arch.hvm_vcpu.guest_cr[0] = cr0 & ~(X86_CR0_PG | X86_CR0_TS); - __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); - - /* Guest CR4. */ - cr4 = read_cr4(); - __vmwrite(GUEST_CR4, cr4 & ~X86_CR4_PSE); - v->arch.hvm_vcpu.guest_cr[4] = - cr4 & ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE); - __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); + v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET; + hvm_update_guest_cr(v, 0); + + v->arch.hvm_vcpu.guest_cr[4] = 0; + hvm_update_guest_cr(v, 4); if ( cpu_has_vmx_tpr_shadow ) { diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 72062dfa90..0d98626c36 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -963,6 +963,9 @@ static void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg, } reg->attr.bytes = (attr & 0xff) | ((attr >> 4) & 0xf00); + /* Unusable flag is folded into Present flag. */ + if ( attr & (1u<<16) ) + reg->attr.fields.p = 0; } /* Make sure that xen intercepts any FP accesses from current */ @@ -1062,7 +1065,9 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) switch ( cr ) { case 0: - v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_PE | X86_CR0_NE; + v->arch.hvm_vcpu.hw_cr[0] = + v->arch.hvm_vcpu.guest_cr[0] | + X86_CR0_PE | X86_CR0_NE | X86_CR0_PG | X86_CR0_WP; __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); break; @@ -1073,6 +1078,8 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]); break; case 4: + v->arch.hvm_vcpu.hw_cr[4] = + v->arch.hvm_vcpu.guest_cr[4] | HVM_CR4_HOST_MASK; __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]); __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); break; diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h index 990792e14a..c4ea85c189 100644 --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -33,7 +33,12 @@ struct hvm_vcpu { unsigned long guest_cr[5]; unsigned long guest_efer; - /* Processor-visible control-register values, while guest executes. */ + /* + * Processor-visible control-register values, while guest executes. + * CR0, CR4: Used as a cache of VMCS contents by VMX only. + * CR1, CR2: Never used (guest_cr[2] is always processor-visible CR2). + * CR3: Always used and kept up to date by paging subsystem. + */ unsigned long hw_cr[5]; struct hvm_io_op io_op; -- cgit v1.2.3