/*
* svm.c: handling SVM architecture-related VM exits
* Copyright (c) 2004, Intel Corporation.
* Copyright (c) 2005, AMD Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
*
*/
#include <xen/config.h>
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/trace.h>
#include <xen/sched.h>
#include <xen/irq.h>
#include <xen/softirq.h>
#include <xen/hypercall.h>
#include <xen/domain_page.h>
#include <asm/current.h>
#include <asm/io.h>
#include <asm/shadow.h>
#include <asm/regs.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/types.h>
#include <asm/msr.h>
#include <asm/spinlock.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
#include <asm/hvm/io.h>
#include <asm/hvm/svm/svm.h>
#include <asm/hvm/svm/vmcb.h>
#include <asm/hvm/svm/emulate.h>
#include <asm/hvm/svm/vmmcall.h>
#include <asm/hvm/svm/intr.h>
#include <public/sched.h>
#define SVM_EXTRA_DEBUG
#define set_segment_register(name, value) \
__asm__ __volatile__ ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
/* External functions. We should move these to some suitable header file(s) */
extern void do_nmi(struct cpu_user_regs *, unsigned long);
extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
int inst_len);
extern asmlinkage void do_IRQ(struct cpu_user_regs *);
extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port,
unsigned long count, int size, long value, int dir, int pvalid);
extern int svm_instrlen(struct cpu_user_regs *regs, int mode);
extern void svm_dump_inst(unsigned long eip);
extern int svm_dbg_on;
void svm_dump_regs(const char *from, struct cpu_user_regs *regs);
static void svm_relinquish_guest_resources(struct domain *d);
static int svm_do_vmmcall_reset_to_realmode(struct vcpu *v,
struct cpu_user_regs *regs);
/* va of hardware host save area */
static void *hsa[NR_CPUS] __read_mostly;
/* vmcb used for extended host state */
static void *root_vmcb[NR_CPUS] __read_mostly;
/* physical address of above for host VMSAVE/VMLOAD */
u64 root_vmcb_pa[NR_CPUS] __read_mostly;
/* ASID API */
enum {
ASID_AVAILABLE = 0,
ASID_INUSE,
ASID_RETIRED
};
#define INITIAL_ASID 0
#define ASID_MAX 64
struct asid_pool {
spinlock_t asid_lock;
u32 asid[ASID_MAX];
};
static DEFINE_PER_CPU(struct asid_pool, asid_pool);
/*
* Initializes the POOL of ASID used by the guests per core.
*/
void asidpool_init(int core)
{
int i;
spin_lock_init(&per_cpu(asid_pool,core).asid_lock);
/* Host ASID is always in use */
per_cpu(asid_pool,core).asid[INITIAL_ASID] = ASID_INUSE;
for ( i = 1; i < ASID_MAX; i++ )
per_cpu(asid_pool,core).asid[i] = ASID_AVAILABLE;
}
/* internal function to get the next available ASID */
static int asidpool_fetch_next(struct vmcb_struct *vmcb, int core)
{
int i;
for ( i = 1; i < ASID_MAX; i++ )
{
if ( per_cpu(asid_pool,core).asid[i] == ASID_AVAILABLE )
{
vmcb->guest_asid = i;
per_cpu(asid_pool,core).asid[i] = ASID_INUSE;
return i;
}
}
return -1;
}
/*
* This functions assigns on the passed VMCB, the next
* available ASID number. If none are available, the
* TLB flush flag is set, and all retireds ASID
* are made available.
*
* Returns: 1 -- sucess;
* 0 -- failure -- no more ASID numbers
* available.
*/
int asidpool_assign_next( struct vmcb_struct *vmcb, int retire_current,
int oldcore, int newcore )
{
int i;
int res = 1;
static unsigned long cnt=0;
spin_lock(&per_cpu(asid_pool,oldcore).asid_lock);
if( retire_current && vmcb->guest_asid ) {
per_cpu(asid_pool,oldcore).asid[vmcb->guest_asid & (ASID_MAX-1)] =
ASID_RETIRED;
}
spin_unlock(&per_cpu(asid_pool,oldcore).asid_lock);
spin_lock(&per_cpu(asid_pool,newcore).asid_lock);
if( asidpool_fetch_next( vmcb, newcore ) < 0 ) {
if (svm_dbg_on)
printk( "SVM: tlb(%ld)\n", cnt++ );
/* FLUSH the TLB and all retired slots are made available */
vmcb->tlb_control = 1;
for( i = 1; i < ASID_MAX; i++ ) {
if( per_cpu(asid_pool,newcore).asid[i] == ASID_RETIRED ) {
per_cpu(asid_pool,newcore).asid[i] = ASID_AVAILABLE;
}
}
/* Get the First slot available */
res = asidpool_fetch_next( vmcb, newcore ) > 0;
}
spin_unlock(&per_cpu(asid_pool,newcore).asid_lock);
return res;
}
void asidpool_retire( struct vmcb_struct *vmcb, int core )
{
spin_lock(&per_cpu(asid_pool,core).asid_lock);
if( vmcb->guest_asid ) {
per_cpu(asid_pool,core).asid[vmcb->guest_asid & (ASID_MAX-1)] =
ASID_RETIRED;
}
spin_unlock(&per_cpu(asid_pool,core).asid_lock);
}
static inline void svm_inject_exception(struct vcpu *v, int trap,
int ev, int error_code)
{
eventinj_t event;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
event.bytes = 0;
event.fields.v = 1;
event.fields.type = EVENTTYPE_EXCEPTION;
event.fields.vector = trap;
event.fields.ev = ev;
event.fields.errorcode = error_code;
ASSERT(vmcb->eventinj.fields.v == 0);
vmcb->eventinj = event;
}
static void stop_svm(void)
{
u32 eax, edx;
int cpu = smp_processor_id();
/* We turn off the EFER_SVME bit. */
rdmsr(MSR_EFER, eax, edx);
eax &= ~EFER_SVME;
wrmsr(MSR_EFER, eax, edx);
/* release the HSA */
free_host_save_area(hsa[cpu]);
hsa[cpu] = NULL;
wrmsr(MSR_K8_VM_HSAVE_PA, 0, 0 );
/* free up the root vmcb */
free_vmcb(root_vmcb[cpu]);
root_vmcb[cpu] = NULL;
root_vmcb_pa[cpu] = 0;
printk("AMD SVM Extension is disabled.\n");
}
static void svm_store_cpu_guest_regs(
struct vcpu *v, struct cpu_user_regs *regs, unsigned long *crs)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
if ( regs != NULL )
{
regs->eip = vmcb->rip;
regs->esp = vmcb->rsp;
regs->eflags = vmcb->rflags;
regs->cs = vmcb->cs.sel;
regs->ds = vmcb->ds.sel;
regs->es = vmcb->es.sel;
regs->ss = vmcb->ss.sel;
regs->gs = vmcb->gs.sel;
regs->fs = vmcb->fs.sel;
}
if ( crs != NULL )
{
/* Returning the guest's regs */
crs[0] = v->arch.hvm_svm.cpu_shadow_cr0;
crs[2] = v->arch.hvm_svm.cpu_cr2;
crs[3] = v->arch.hvm_svm.cpu_cr3;
crs[4] = v->arch.hvm_svm.cpu_shadow_cr4;
}
}
static int svm_paging_enabled(struct vcpu *v)
{
unsigned long cr0;
cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
}
#define IS_CANO_ADDRESS(add) 1
static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
{
u64 msr_content = 0;
struct vcpu *vc = current;
struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
switch (regs->ecx)
{
case MSR_EFER:
msr_content = vmcb->efer;
msr_content &= ~EFER_SVME;
break;
case MSR_FS_BASE:
msr_content = vmcb->fs.base;
break;
case MSR_GS_BASE:
msr_content = vmcb->gs.base;
break;
case MSR_SHADOW_GS_BASE:
msr_content = vmcb->kerngsbase;
break;
case MSR_STAR:
msr_content = vmcb->star;
break;
case MSR_LSTAR:
msr_content = vmcb->lstar;
break;
case MSR_CSTAR:
msr_content = vmcb->cstar;
break;
case MSR_SYSCALL_MASK:
msr_content = vmcb->sfmask;
break;
default:
return 0;
}
HVM_DBG_LOG(DBG_LEVEL_2, "mode_do_msr_read: msr_content: %"PRIx64"\n",
msr_content);
regs->eax = msr_content & 0xffffffff;
regs->edx = msr_content >> 32;
return 1;
}
static inline int long_mode_do_msr_write(struct cpu_user_regs *regs)
{
u64 msr_content = regs->eax | ((u64)regs->edx << 32);
struct vcpu *vc = current;
struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;
HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx "
"msr_content %"PRIx64"\n",
(unsigned long)regs->ecx, msr_content);
switch (regs->ecx)
{
case MSR_EFER:
#ifdef __x86_64__
/* offending reserved bit will cause #GP */
if ( msr_content & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE) )
{
printk("trying to set reserved bit in EFER\n");
svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
return 0;
}
/* LME: 0 -> 1 */
if ( msr_content & EFER_LME &&
!test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state))
{
if ( svm_paging_enabled(vc) ||
!test_bit(SVM_CPU_STATE_PAE_ENABLED,
&vc->arch.hvm_svm.cpu_state) )
{
printk("trying to set LME bit when "
"in paging mode or PAE bit is not set\n");
svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
return 0;
}
set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);
}
/* We have already recorded that we want LME, so it will be set
* next time CR0 gets updated. So we clear that bit and continue.
*/
if ((msr_content ^ vmcb->efer) & EFER_LME)
msr_content &= ~EFER_LME;
/* No update for LME/LMA since it have no effect */
#endif
vmcb->efer = msr_content | EFER_SVME;
break;
case MSR_FS_BASE:
case MSR_GS_BASE:
if (!(SVM_LONG_GUEST(vc)))
domain_crash_synchronous();
if (!IS_CANO_ADDRESS(msr_content))
{
HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
svm_inject_exception(vc, TRAP_gp_fault, 1, 0);
}
if (regs->ecx == MSR_FS_BASE)
vmcb->fs.base = msr_content;
else
vmcb->gs.base = msr_content;
break;
case MSR_SHADOW_GS_BASE:
vmcb->kerngsbase = msr_content;
break;
case MSR_STAR:
vmcb->star = msr_content;
break;
case MSR_LSTAR:
vmcb->lstar = msr_content;
break;
case MSR_CSTAR:
vmcb->cstar = msr_content;
break;
case MSR_SYSCALL_MASK:
vmcb->sfmask = msr_content;
break;
default:
return 0;
}
return 1;
}
#define loaddebug(_v,_reg) \
__asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
#define savedebug(_v,_reg) \
__asm__ __volatile__ ("mov %%db" #_reg ",%0" : : "r" ((_v)->debugreg[_reg]))
static inline void svm_save_dr(struct vcpu *v)
{
if (v->arch.hvm_vcpu.flag_dr_dirty)
{
/* clear the DR dirty flag and re-enable intercepts for DR accesses */
v->arch.hvm_vcpu.flag_dr_dirty = 0;
v->arch.hvm_svm.vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES;
savedebug(&v->arch.guest_context, 0);
savedebug(&v->arch.guest_context, 1);
savedebug(&v->arch.guest_context, 2);
savedebug(&v->arch.guest_context, 3);
}
}
static inline void __restore_debug_registers(struct vcpu *v)
{
loaddebug(&v->arch.guest_context, 0);
loaddebug(&v->arch.guest_context, 1);
loaddebug(&v->arch.guest_context, 2);
loaddebug(&v->arch.guest_context, 3);
}
static inline void svm_restore_dr(struct vcpu *v)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
if (!vmcb)
return;
if (unlikely(vmcb->dr7 & 0xFF))
__restore_debug_registers(v);
}
static int svm_realmode(struct vcpu *v)
{
unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
}
int svm_guest_x86_mode(struct vcpu *v)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode;
/* check which operating mode the guest is running */
if( vmcb->efer & EFER_LMA )
mode = vmcb->cs.attributes.fields.l ? 8 : 4;
else
mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
return mode;
}
int svm_instruction_length(struct vcpu *v)
{
return svm_instrlen(guest_cpu_user_regs(), svm_guest_x86_mode(v));
}
void svm_update_host_cr3(struct vcpu *v)
{
/* SVM doesn't have a HOST_CR3 equivalent to update. */
}
unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
{
switch ( num )
{
case 0:
return v->arch.hvm_svm.cpu_shadow_cr0;
case 2:
return v->arch.hvm_svm.cpu_cr2;
case 3:
return v->arch.hvm_svm.cpu_cr3;
case 4:
return v->arch.hvm_svm.cpu_shadow_cr4;
default:
BUG();
}
return 0; /* dummy */
}
/* Make sure that xen intercepts any FP accesses from current */
static void svm_stts(struct vcpu *v)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
/*
* If the guest does not have TS enabled then we must cause and handle an
* exception on first use of the FPU. If the guest *does* have TS enabled
* then this is not necessary: no FPU activity can occur until the guest
* clears CR0.TS, and we will initialise the FPU when that happens.
*/
if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) )
{
v->arch.hvm_svm.vmcb->exception_intercepts |= EXCEPTION_BITMAP_NM;
vmcb->cr0 |= X86_CR0_TS;
}
}
static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
{
v->arch.hvm_svm.vmcb->tsc_offset = offset;
}
/* SVM-specific intitialization code for VCPU application processors */
static void svm_init_ap_context(struct vcpu_guest_context *ctxt,
int vcpuid, int trampoline_vector)
{
int i;
struct vcpu *v, *bsp = current;
struct domain *d = bsp->domain;
cpu_user_regs_t *regs;;
if ((v = d->vcpu[vcpuid]) == NULL)
{
printk("vcpuid %d is invalid! good-bye.\n", vcpuid);
domain_crash_synchronous();
}
regs = &v->arch.guest_context.user_regs;
memset(ctxt, 0, sizeof(*ctxt));
for (i = 0; i < 256; ++i)
{
ctxt->trap_ctxt[i].vector = i;