/******************************************************************************
* arch/x86/domain.c
*
* x86-specific domain handling (e.g., register setup and context switching).
*/
/*
* Copyright (C) 1995 Linus Torvalds
*
* Pentium III FXSR, SSE support
* Gareth Hughes <gareth@valinux.com>, May 2000
*/
#include <xen/config.h>
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/errno.h>
#include <xen/sched.h>
#include <xen/domain.h>
#include <xen/smp.h>
#include <xen/delay.h>
#include <xen/softirq.h>
#include <xen/grant_table.h>
#include <xen/iocap.h>
#include <xen/kernel.h>
#include <xen/multicall.h>
#include <xen/irq.h>
#include <xen/event.h>
#include <xen/console.h>
#include <xen/percpu.h>
#include <xen/compat.h>
#include <xen/acpi.h>
#include <xen/pci.h>
#include <xen/paging.h>
#include <xen/cpu.h>
#include <xen/wait.h>
#include <public/sysctl.h>
#include <asm/regs.h>
#include <asm/mc146818rtc.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/mpspec.h>
#include <asm/ldt.h>
#include <asm/hypercall.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
#include <asm/debugreg.h>
#include <asm/msr.h>
#include <asm/traps.h>
#include <asm/nmi.h>
#include <asm/mce.h>
#include <xen/numa.h>
#include <xen/iommu.h>
#ifdef CONFIG_COMPAT
#include <compat/vcpu.h>
#endif
DEFINE_PER_CPU(struct vcpu *, curr_vcpu);
DEFINE_PER_CPU(unsigned long, cr4);
static void default_idle(void);
static void default_dead_idle(void);
void (*pm_idle) (void) __read_mostly = default_idle;
void (*dead_idle) (void) __read_mostly = default_dead_idle;
static void paravirt_ctxt_switch_from(struct vcpu *v);
static void paravirt_ctxt_switch_to(struct vcpu *v);
static void vcpu_destroy_pagetables(struct vcpu *v);
static void continue_idle_domain(struct vcpu *v)
{
reset_stack_and_jump(idle_loop);
}
static void continue_nonidle_domain(struct vcpu *v)
{
check_wakeup_from_wait();
reset_stack_and_jump(ret_from_intr);
}
static void default_idle(void)
{
local_irq_disable();
if ( cpu_is_haltable(smp_processor_id()) )
safe_halt();
else
local_irq_enable();
}
static void default_dead_idle(void)
{
for ( ; ; )
halt();
}
static void play_dead(void)
{
cpu_exit_clear(smp_processor_id());
mb();
local_irq_disable();
wbinvd();
(*dead_idle)();
}
void idle_loop(void)
{
for ( ; ; )
{
if ( cpu_is_offline(smp_processor_id()) )
play_dead();
(*pm_idle)();
do_tasklet();
do_softirq();
}
}
void startup_cpu_idle_loop(void)
{
struct vcpu *v = current;
ASSERT(is_idle_vcpu(v));
cpu_set(smp_processor_id(), v->domain->domain_dirty_cpumask);
cpu_set(smp_processor_id(), v->vcpu_dirty_cpumask);
reset_stack_and_jump(idle_loop);
}
void dump_pageframe_info(struct domain *d)
{
struct page_info *page;
printk("Memory pages belonging to domain %u:\n", d->domain_id);
if ( d->tot_pages >= 10 )
{
printk(" DomPage list too long to display\n");
}
else
{
spin_lock(&d->page_alloc_lock);
page_list_for_each ( page, &d->page_list )
{
printk(" DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
_p(page_to_mfn(page)),
page->count_info, page->u.inuse.type_info);
}
spin_unlock(&d->page_alloc_lock);
}
if ( is_hvm_domain(d) )
{
p2m_pod_dump_data(p2m_get_hostp2m(d));
}
spin_lock(&d->page_alloc_lock);
page_list_for_each ( page, &d->xenpage_list )
{
printk(" XenPage %p: caf=%08lx, taf=%" PRtype_info "\n",
_p(page_to_mfn(page)),
page->count_info, page->u.inuse.type_info);
}
spin_unlock(&d->page_alloc_lock);
}
struct domain *alloc_domain_struct(void)
{
struct domain *d;
/*
* We pack the PDX of the domain structure into a 32-bit field within
* the page_info structure. Hence the MEMF_bits() restriction.
*/
unsigned int bits = 32 + PAGE_SHIFT;
#ifdef __x86_64__
bits += pfn_pdx_hole_shift;
#endif
d = alloc_xenheap_pages(get_order_from_bytes(sizeof(*d)), MEMF_bits(bits));
if ( d != NULL )
memset(d, 0, sizeof(*d));
return d;
}
void free_domain_struct(struct domain *d)
{
lock_profile_deregister_struct(LOCKPROF_TYPE_PERDOM, d);
free_xenheap_pages(d, get_order_from_bytes(sizeof(*d)));
}
struct vcpu *alloc_vcpu_struct(void)
{
struct vcpu *v;
/*
* This structure contains embedded PAE PDPTEs, used when an HVM guest
* runs on shadow pagetables outside of 64-bit mode. In this case the CPU
* may require that the shadow CR3 points below 4GB, and hence the whole
* structure must satisfy this restriction. Thus we specify MEMF_bits(32).
*/
v = alloc_xenheap_pages(get_order_from_bytes(sizeof(*v)), MEMF_bits(32));
if ( v != NULL )
memset(v, 0, sizeof(*v));
return v;
}
void free_vcpu_struct(struct vcpu *v)
{
free_xenheap_pages(v, get_order_from_bytes(sizeof(*v)));
}
#ifdef __x86_64__
static int setup_compat_l4(struct vcpu *v)
{
struct page_info *pg;
l4_pgentry_t *l4tab;
int rc;
pg = alloc_domheap_page(NULL, MEMF_node(vcpu_to_node(v)));
if ( pg == NULL )
return -ENOMEM;
rc = setup_compat_arg_xlat(v);
if ( rc )
{
free_domheap_page(pg);
return rc;
}
/* This page needs to look like a pagetable so that it can be shadowed */
pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1;
l4tab = page_to_virt(pg);
copy_page(l4tab, idle_pg_table);
l4tab[0] = l4e_empty();
l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
l4e_from_page(pg, __PAGE_HYPERVISOR);
l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3),
__PAGE_HYPERVISOR);
v->arch.guest_table = pagetable_from_page(pg);
v->arch.guest_table_user = v->arch.guest_table;
return 0;
}
static void release_compat_l4(struct vcpu *v)
{
free_compat_arg_xlat(v);
free_domheap_page(pagetable_get_page(v->arch.guest_table));
v->arch.guest_table = pagetable_null();