/******************************************************************************
* arch/x86/domain.c
*
* x86-specific domain handling (e.g., register setup and context switching).
*/
/*
* Copyright (C) 1995 Linus Torvalds
*
* Pentium III FXSR, SSE support
* Gareth Hughes <gareth@valinux.com>, May 2000
*/
#include <xen/config.h>
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/errno.h>
#include <xen/sched.h>
#include <xen/domain.h>
#include <xen/smp.h>
#include <xen/delay.h>
#include <xen/softirq.h>
#include <xen/grant_table.h>
#include <xen/iocap.h>
#include <xen/kernel.h>
#include <xen/multicall.h>
#include <xen/irq.h>
#include <xen/event.h>
#include <xen/console.h>
#include <xen/percpu.h>
#include <xen/compat.h>
#include <asm/regs.h>
#include <asm/mc146818rtc.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/mpspec.h>
#include <asm/ldt.h>
#include <asm/paging.h>
#include <asm/hypercall.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
#include <asm/debugreg.h>
#include <asm/msr.h>
#include <asm/nmi.h>
#include <asm/iommu.h>
#ifdef CONFIG_COMPAT
#include <compat/vcpu.h>
#endif
DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
DEFINE_PER_CPU(u64, efer);
DEFINE_PER_CPU(unsigned long, cr4);
static void unmap_vcpu_info(struct vcpu *v);
static void paravirt_ctxt_switch_from(struct vcpu *v);
static void paravirt_ctxt_switch_to(struct vcpu *v);
static void vcpu_destroy_pagetables(struct vcpu *v);
static void continue_idle_domain(struct vcpu *v)
{
reset_stack_and_jump(idle_loop);
}
static void continue_nonidle_domain(struct vcpu *v)
{
reset_stack_and_jump(ret_from_intr);
}
static void default_idle(void)
{
local_irq_disable();
if ( !softirq_pending(smp_processor_id()) )
safe_halt();
else
local_irq_enable();
}
static void play_dead(void)
{
__cpu_disable();
/* This must be done before dead CPU ack */
cpu_exit_clear();
hvm_cpu_down();
wbinvd();
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
/* With physical CPU hotplug, we should halt the cpu. */
local_irq_disable();
for ( ; ; )
halt();
}
void idle_loop(void)
{
for ( ; ; )
{
if (cpu_is_offline(smp_processor_id()))
play_dead();
page_scrub_schedule_work();
default_idle();
do_softirq();
}
}
void startup_cpu_idle_loop(void)
{
struct vcpu *v = current;
ASSERT(is_idle_vcpu(v));
cpu_set(smp_processor_id(), v->domain->domain_dirty_cpumask);
cpu_set(smp_processor_id(), v->vcpu_dirty_cpumask);
reset_stack_and_jump(idle_loop);
}
void dump_pageframe_info(struct domain *d)
{
struct page_info *page;
printk("Memory pages belonging to domain %u:\n", d->domain_id);
if ( d->tot_pages >= 10 )
{
printk(" DomPage list too long to display\n");
}
else
{
list_for_each_entry ( page, &d->page_list, list )
{
printk(" DomPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
_p(page_to_maddr(page)), _p(page_to_mfn(page)),
page->count_info, page->u.inuse.type_info);
}
}
list_for_each_entry ( page, &d->xenpage_list, list )
{
printk(" XenPage %p: mfn=%p, caf=%08x, taf=%" PRtype_info "\n",
_p(page_to_maddr(page)), _p(page_to_mfn(page)),
page->count_info, page->u.inuse.type_info);
}
}
struct vcpu *alloc_vcpu_struct(void)
{
struct vcpu *v;
if ( (v = xmalloc(struct vcpu)) != NULL )
memset(v, 0, sizeof(*v));
return v;
}
void free_vcpu_struct(struct vcpu *v)
{
xfree(v);
}
#ifdef CONFIG_COMPAT
int setup_arg_xlat_area(struct vcpu *v, l4_pgentry_t *l4tab)
{
struct domain *d = v->domain;
unsigned i;
struct page_info *pg;
if ( !d->arch.mm_arg_xlat_l3 )
{
pg = alloc_domheap_page(NULL);
if ( !pg )
return -ENOMEM;
d->arch.mm_arg_xlat_l3 = page_to_virt(pg);
clear_page(d->arch.mm_arg_xlat_l3);
}
l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
l4e_from_paddr(__pa(d->arch.mm_arg_xlat_l3), __PAGE_HYPERVISOR);
for ( i = 0; i < COMPAT_ARG_XLAT_PAGES; ++i )
{
unsigned long va = COMPAT_ARG_XLAT_VIRT_START(v->vcpu_id) + i * PAGE_SIZE;
l2_pgentry_t *l2tab;
l1_pgentry_t *l1tab;
if ( !l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]) )
{
pg = alloc_domheap_page(NULL);
if ( !pg )
return -ENOMEM;
clear_page(page_to_virt(pg));
d->arch.mm_arg_xlat_l3[l3_table_offset(va)] = l3e_from_page(pg, __PAGE_HYPERVISOR);
}
l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]);
if ( !l2e_get_intpte(l2tab[l2_table_offset(va)]) )
{
pg = alloc_domheap_page(NULL);
if ( !pg )
return -ENOMEM;
clear_page(page_to_virt(pg));
l2tab[l2_table_offset(va)] = l2e_from_page(pg, __PAGE_HYPERVISOR);
}
l1tab = l2e_to_l1e(l2tab[l2_table_offset(va)]);
BUG_ON(l1e_get_intpte(l1tab[l1_table_offset(va)]));
pg = alloc_domheap_page(NULL);
if ( !pg )
return -ENOMEM;
l1tab[l1_table_offset(va)] = l1e_from_page(pg, PAGE_HYPERVISOR);
}
return 0;
}
static void release_arg_xlat_area(struct domain *d)
{
if ( d->arch.mm_arg_xlat_l3 )
{
unsigned l3;
for ( l3 = 0; l3 < L3_PAGETABLE_ENTRIES; ++l3 )
{
if ( l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3]) )
{
l2_pgentry_t *l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3]);
unsigned l2;
for ( l2 = 0; l2 < L2_PAGETABLE_ENTRIES; ++l2 )
{
if ( l2e_get_intpte(l2tab[l2]) )
{
l1_pgentry_t *l1tab = l2e_to_l1e(l2tab[l2]);
unsigned l1;
for ( l1 = 0; l1 < L1_PAGETABLE_ENTRIES; ++l1 )
{
if ( l1e_get_intpte(l1tab[l1]) )
free_domheap_page(l1e_get_page(l1tab[l1]));
}
free_domheap_page(l2e_get_page(l2tab[l2]));
}
}
free_domheap_page(l3e_get_page(d->arch.mm_arg_xlat_l3[l3]));
}
}
free_domheap_page(virt_to_page(d->arch.mm_arg_xlat_l3));
}
}
static int setup_compat_l4(struct vcpu *v)
{
struct page_info *pg = alloc_domheap_page(NULL);
l4_pgentry_t *l4tab;
int rc;
if ( pg == NULL )
return -ENOMEM;
/* This page needs to look like a pagetable so that it can be shadowed */
pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated;
l4tab = copy_page(page_to_virt(pg), idle_pg_table);
l4tab[0] = l4e_empty();
l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
l4e_from_page(pg, __PAGE_HYPERVISOR);
l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3),
__PAGE_HYPERVISOR);
if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 )
{
free_domheap_page(pg);
return rc;
}
v->arch.guest_table = pagetable_from_page(pg);
v->arch.guest_table_user = v->arch.guest_table;
return 0;
}
static void release_compat_l4(struct vcpu *v)
{
free_domheap_page(pagetable_get_page(v->arch.guest_table));
v->arch.guest_table = pagetable_null();
v->arch.guest_table_user = pagetable_null();
}
static inline int may_switch_mode(struct domain *d)
{
return (!is_hvm_domain(d) && (d->tot_pages == 0));
}
int switch_native(struct domain *d)
{
l1_pgentry_t gdt_l1e;
unsigned int vcpuid;
if ( d == NULL )
return -EINVAL;
if ( !may_switch_mode(d) )
return -EACCES;
if ( !is_pv_32on64_domain(d) )
return 0;
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
release_arg_xlat_area(d);
/* switch gdt */
gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
{
d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
if (d->vcpu[vcpuid])
release_compat_l4(d->vcpu[vcpuid]);
}
return 0;
}
int switch_compat(struct domain *d)
{
l1_pgentry_t gdt_l1e;
unsigned int vcpuid;
if ( d == NULL )
return -EINVAL;
if ( !may_switch_mode(d) )
return -EACCES;
if ( is_pv_32on64_domain(d) )
return 0;
d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 1;
/* switch gdt */
gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR);
for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
{
if ( (d->vcpu[vcpuid] != NULL) &&
(setup_compat_l4(d->vcpu[vcpuid]) != 0) )
goto undo_and_fail;
d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
}
d->arch.physaddr_bitsize =
fls((1UL << 32) - HYPERVISOR_COMPAT_VIRT_START(d)) - 1
+ (PAGE_SIZE - 2);