aboutsummaryrefslogtreecommitdiffstats
path: root/xen/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'xen/arch/x86')
-rw-r--r--xen/arch/x86/Makefile1
-rw-r--r--xen/arch/x86/acpi/boot.c62
-rw-r--r--xen/arch/x86/cpu/amd.c18
-rw-r--r--xen/arch/x86/domain.c25
-rw-r--r--xen/arch/x86/domain_build.c4
-rw-r--r--xen/arch/x86/hvm/emulate.c81
-rw-r--r--xen/arch/x86/hvm/hpet.c21
-rw-r--r--xen/arch/x86/hvm/hvm.c192
-rw-r--r--xen/arch/x86/hvm/i8254.c68
-rw-r--r--xen/arch/x86/hvm/intercept.c161
-rw-r--r--xen/arch/x86/hvm/io.c84
-rw-r--r--xen/arch/x86/hvm/mtrr.c2
-rw-r--r--xen/arch/x86/hvm/pmtimer.c8
-rw-r--r--xen/arch/x86/hvm/rtc.c8
-rw-r--r--xen/arch/x86/hvm/stdvga.c26
-rw-r--r--xen/arch/x86/hvm/svm/svm.c7
-rw-r--r--xen/arch/x86/hvm/vioapic.c54
-rw-r--r--xen/arch/x86/hvm/vlapic.c303
-rw-r--r--xen/arch/x86/hvm/vmx/realmode.c2
-rw-r--r--xen/arch/x86/hvm/vmx/vmcs.c85
-rw-r--r--xen/arch/x86/hvm/vmx/vmx.c295
-rw-r--r--xen/arch/x86/hvm/vmx/x86_32/exits.S2
-rw-r--r--xen/arch/x86/hvm/vmx/x86_64/exits.S2
-rw-r--r--xen/arch/x86/hvm/vpic.c40
-rw-r--r--xen/arch/x86/io_apic.c9
-rw-r--r--xen/arch/x86/mm.c51
-rw-r--r--xen/arch/x86/mm/hap/Makefile1
-rw-r--r--xen/arch/x86/mm/hap/hap.c12
-rw-r--r--xen/arch/x86/mm/hap/p2m-ept.c257
-rw-r--r--xen/arch/x86/mm/p2m.c357
-rw-r--r--xen/arch/x86/mm/paging.c15
-rw-r--r--xen/arch/x86/mm/shadow/common.c8
-rw-r--r--xen/arch/x86/pci.c118
-rw-r--r--xen/arch/x86/setup.c2
-rw-r--r--xen/arch/x86/time.c142
-rw-r--r--xen/arch/x86/traps.c295
-rw-r--r--xen/arch/x86/x86_64/compat/mm.c4
-rw-r--r--xen/arch/x86/x86_64/mm.c8
38 files changed, 1814 insertions, 1016 deletions
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 086a7b530f..334a996eb6 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -31,6 +31,7 @@ obj-y += mm.o
obj-y += mpparse.o
obj-y += nmi.o
obj-y += numa.o
+obj-y += pci.o
obj-y += physdev.o
obj-y += rwlock.o
obj-y += setup.o
diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c
index cfe87671e9..9a17d61e3b 100644
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -374,6 +374,18 @@ extern u32 pmtmr_ioport;
#endif
#ifdef CONFIG_ACPI_SLEEP
+#define acpi_fadt_copy_address(dst, src, len) do { \
+ if (fadt->header.revision >= FADT2_REVISION_ID) \
+ acpi_sinfo.dst##_blk = fadt->x##src##_block; \
+ if (!acpi_sinfo.dst##_blk.address) { \
+ acpi_sinfo.dst##_blk.address = fadt->src##_block; \
+ acpi_sinfo.dst##_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO; \
+ acpi_sinfo.dst##_blk.bit_width = fadt->len##_length << 3; \
+ acpi_sinfo.dst##_blk.bit_offset = 0; \
+ acpi_sinfo.dst##_blk.access_width = 0; \
+ } \
+} while (0)
+
/* Get pm1x_cnt and pm1x_evt information for ACPI sleep */
static void __init
acpi_fadt_parse_sleep_info(struct acpi_table_fadt *fadt)
@@ -388,37 +400,18 @@ acpi_fadt_parse_sleep_info(struct acpi_table_fadt *fadt)
goto bad;
rsdp = __va(rsdp_phys);
- if (fadt->header.revision >= FADT2_REVISION_ID) {
- memcpy(&acpi_sinfo.pm1a_cnt_blk, &fadt->xpm1a_control_block,
- sizeof(struct acpi_generic_address));
- memcpy(&acpi_sinfo.pm1b_cnt_blk, &fadt->xpm1b_control_block,
- sizeof(struct acpi_generic_address));
- memcpy(&acpi_sinfo.pm1a_evt_blk, &fadt->xpm1a_event_block,
- sizeof(struct acpi_generic_address));
- memcpy(&acpi_sinfo.pm1b_evt_blk, &fadt->xpm1b_event_block,
- sizeof(struct acpi_generic_address));
- } else {
- acpi_sinfo.pm1a_cnt_blk.address = fadt->pm1a_control_block;
- acpi_sinfo.pm1b_cnt_blk.address = fadt->pm1b_control_block;
- acpi_sinfo.pm1a_evt_blk.address = fadt->pm1a_event_block;
- acpi_sinfo.pm1b_evt_blk.address = fadt->pm1b_event_block;
- acpi_sinfo.pm1a_cnt_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
- acpi_sinfo.pm1b_cnt_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
- acpi_sinfo.pm1a_evt_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
- acpi_sinfo.pm1b_evt_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
- acpi_sinfo.pm1a_cnt_blk.bit_width = 16;
- acpi_sinfo.pm1b_cnt_blk.bit_width = 16;
- acpi_sinfo.pm1a_evt_blk.bit_width = 16;
- acpi_sinfo.pm1b_evt_blk.bit_width = 16;
- acpi_sinfo.pm1a_cnt_blk.bit_offset = 0;
- acpi_sinfo.pm1b_cnt_blk.bit_offset = 0;
- acpi_sinfo.pm1a_evt_blk.bit_offset = 0;
- acpi_sinfo.pm1b_evt_blk.bit_offset = 0;
- acpi_sinfo.pm1a_cnt_blk.access_width = 0;
- acpi_sinfo.pm1b_cnt_blk.access_width = 0;
- acpi_sinfo.pm1a_evt_blk.access_width = 0;
- acpi_sinfo.pm1b_evt_blk.access_width = 0;
- }
+ acpi_fadt_copy_address(pm1a_cnt, pm1a_control, pm1_control);
+ acpi_fadt_copy_address(pm1b_cnt, pm1b_control, pm1_control);
+ acpi_fadt_copy_address(pm1a_evt, pm1a_event, pm1_event);
+ acpi_fadt_copy_address(pm1b_evt, pm1b_event, pm1_event);
+
+ printk(KERN_INFO PREFIX
+ "ACPI SLEEP INFO: pm1x_cnt[%"PRIx64",%"PRIx64"], "
+ "pm1x_evt[%"PRIx64",%"PRIx64"]\n",
+ acpi_sinfo.pm1a_cnt_blk.address,
+ acpi_sinfo.pm1b_cnt_blk.address,
+ acpi_sinfo.pm1a_evt_blk.address,
+ acpi_sinfo.pm1b_evt_blk.address);
/* Now FACS... */
if (fadt->header.revision >= FADT2_REVISION_ID)
@@ -461,13 +454,6 @@ acpi_fadt_parse_sleep_info(struct acpi_table_fadt *fadt)
}
printk(KERN_INFO PREFIX
- "ACPI SLEEP INFO: pm1x_cnt[%"PRIx64",%"PRIx64"], "
- "pm1x_evt[%"PRIx64",%"PRIx64"]\n",
- acpi_sinfo.pm1a_cnt_blk.address,
- acpi_sinfo.pm1b_cnt_blk.address,
- acpi_sinfo.pm1a_evt_blk.address,
- acpi_sinfo.pm1b_evt_blk.address);
- printk(KERN_INFO PREFIX
" wakeup_vec[%"PRIx64"], vec_size[%x]\n",
acpi_sinfo.wakeup_vector, acpi_sinfo.vector_width);
return;
diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
index 909a73f3fa..f0253152bc 100644
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -3,6 +3,7 @@
#include <xen/bitops.h>
#include <xen/mm.h>
#include <xen/smp.h>
+#include <xen/pci.h>
#include <asm/io.h>
#include <asm/msr.h>
#include <asm/processor.h>
@@ -66,19 +67,6 @@ static int c1_ramping_may_cause_clock_drift(struct cpuinfo_x86 *c)
return 1;
}
-/* PCI access functions. Should be safe to use 0xcf8/0xcfc port accesses here. */
-static u8 pci_read_byte(u32 bus, u32 dev, u32 fn, u32 reg)
-{
- outl((1U<<31) | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3), 0xcf8);
- return inb(0xcfc + (reg & 3));
-}
-
-static void pci_write_byte(u32 bus, u32 dev, u32 fn, u32 reg, u8 val)
-{
- outl((1U<<31) | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3), 0xcf8);
- outb(val, 0xcfc + (reg & 3));
-}
-
/*
* Disable C1-Clock ramping if enabled in PMM7.CpuLowPwrEnh on 8th-generation
* cores only. Assume BIOS has setup all Northbridges equivalently.
@@ -90,12 +78,12 @@ static void disable_c1_ramping(void)
for (node=0; node < NR_CPUS; node++) {
/* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */
- pmm7 = pci_read_byte(0, 0x18+node, 0x3, 0x87);
+ pmm7 = pci_conf_read8(0, 0x18+node, 0x3, 0x87);
/* Invalid read means we've updated every Northbridge. */
if (pmm7 == 0xFF)
break;
pmm7 &= 0xFC; /* clear pmm7[1:0] */
- pci_write_byte(0, 0x18+node, 0x3, 0x87, pmm7);
+ pci_conf_write8(0, 0x18+node, 0x3, 0x87, pmm7);
printk ("AMD: Disabling C1 Clock Ramping Node #%x\n", node);
}
}
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index c56db37b37..4418c51ff9 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -46,6 +46,7 @@
#include <asm/debugreg.h>
#include <asm/msr.h>
#include <asm/nmi.h>
+#include <xen/numa.h>
#include <xen/iommu.h>
#ifdef CONFIG_COMPAT
#include <compat/vcpu.h>
@@ -171,7 +172,7 @@ int setup_arg_xlat_area(struct vcpu *v, l4_pgentry_t *l4tab)
if ( !d->arch.mm_arg_xlat_l3 )
{
- pg = alloc_domheap_page(NULL);
+ pg = alloc_domheap_page(NULL, 0);
if ( !pg )
return -ENOMEM;
d->arch.mm_arg_xlat_l3 = page_to_virt(pg);
@@ -189,7 +190,7 @@ int setup_arg_xlat_area(struct vcpu *v, l4_pgentry_t *l4tab)
if ( !l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]) )
{
- pg = alloc_domheap_page(NULL);
+ pg = alloc_domheap_page(NULL, 0);
if ( !pg )
return -ENOMEM;
clear_page(page_to_virt(pg));
@@ -198,7 +199,7 @@ int setup_arg_xlat_area(struct vcpu *v, l4_pgentry_t *l4tab)
l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]);
if ( !l2e_get_intpte(l2tab[l2_table_offset(va)]) )
{
- pg = alloc_domheap_page(NULL);
+ pg = alloc_domheap_page(NULL, 0);
if ( !pg )
return -ENOMEM;
clear_page(page_to_virt(pg));
@@ -206,7 +207,7 @@ int setup_arg_xlat_area(struct vcpu *v, l4_pgentry_t *l4tab)
}
l1tab = l2e_to_l1e(l2tab[l2_table_offset(va)]);
BUG_ON(l1e_get_intpte(l1tab[l1_table_offset(va)]));
- pg = alloc_domheap_page(NULL);
+ pg = alloc_domheap_page(NULL, 0);
if ( !pg )
return -ENOMEM;
l1tab[l1_table_offset(va)] = l1e_from_page(pg, PAGE_HYPERVISOR);
@@ -252,7 +253,7 @@ static void release_arg_xlat_area(struct domain *d)
static int setup_compat_l4(struct vcpu *v)
{
- struct page_info *pg = alloc_domheap_page(NULL);
+ struct page_info *pg = alloc_domheap_page(NULL, 0);
l4_pgentry_t *l4tab;
int rc;
@@ -477,7 +478,8 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
#else /* __x86_64__ */
- if ( (pg = alloc_domheap_page(NULL)) == NULL )
+ pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+ if ( pg == NULL )
goto fail;
d->arch.mm_perdomain_l2 = page_to_virt(pg);
clear_page(d->arch.mm_perdomain_l2);
@@ -486,7 +488,8 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
__PAGE_HYPERVISOR);
- if ( (pg = alloc_domheap_page(NULL)) == NULL )
+ pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+ if ( pg == NULL )
goto fail;
d->arch.mm_perdomain_l3 = page_to_virt(pg);
clear_page(d->arch.mm_perdomain_l3);
@@ -500,13 +503,15 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
HYPERVISOR_COMPAT_VIRT_START(d) = __HYPERVISOR_COMPAT_VIRT_START;
#endif
- paging_domain_init(d);
+ if ( (rc = paging_domain_init(d)) != 0 )
+ goto fail;
paging_initialised = 1;
if ( !is_idle_domain(d) )
{
d->arch.ioport_caps =
rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
+ rc = -ENOMEM;
if ( d->arch.ioport_caps == NULL )
goto fail;
@@ -946,9 +951,9 @@ arch_do_vcpu_op(
if ( copy_from_guest(&info, arg, 1) )
break;
- LOCK_BIGLOCK(d);
+ domain_lock(d);
rc = map_vcpu_info(v, info.mfn, info.offset);
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
break;
}
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index dc8ee52f07..56106bae2f 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -630,7 +630,7 @@ int __init construct_dom0(
}
else
{
- page = alloc_domheap_page(NULL);
+ page = alloc_domheap_page(NULL, 0);
if ( !page )
panic("Not enough RAM for domain 0 PML4.\n");
l4start = l4tab = page_to_virt(page);
@@ -957,6 +957,8 @@ int __init construct_dom0(
rc |= ioports_deny_access(dom0, 0x40, 0x43);
/* PIT Channel 2 / PC Speaker Control. */
rc |= ioports_deny_access(dom0, 0x61, 0x61);
+ /* PCI configuration spaces. */
+ rc |= ioports_deny_access(dom0, 0xcf8, 0xcff);
/* Command-line I/O ranges. */
process_dom0_ioports_disable();
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 57065f7625..d7bf9f3f2f 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -20,12 +20,13 @@
#include <asm/hvm/support.h>
static int hvmemul_do_io(
- int is_mmio, paddr_t addr, unsigned long count, int size,
+ int is_mmio, paddr_t addr, unsigned long *reps, int size,
paddr_t value, int dir, int df, int value_is_ptr, unsigned long *val)
{
struct vcpu *curr = current;
vcpu_iodata_t *vio = get_ioreq(curr);
ioreq_t *p = &vio->vp_ioreq;
+ int rc;
switch ( curr->arch.hvm_vcpu.io_state )
{
@@ -41,52 +42,72 @@ static int hvmemul_do_io(
return X86EMUL_UNHANDLEABLE;
}
- curr->arch.hvm_vcpu.io_state =
- (val == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion;
-
if ( p->state != STATE_IOREQ_NONE )
+ {
gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n",
p->state);
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ curr->arch.hvm_vcpu.io_state =
+ (val == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion;
p->dir = dir;
p->data_is_ptr = value_is_ptr;
p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO;
p->size = size;
p->addr = addr;
- p->count = count;
+ p->count = *reps;
p->df = df;
p->data = value;
p->io_count++;
- if ( is_mmio
- ? (hvm_mmio_intercept(p) || hvm_buffered_io_intercept(p))
- : hvm_portio_intercept(p) )
+ if ( is_mmio )
{
+ rc = hvm_mmio_intercept(p);
+ if ( rc == X86EMUL_UNHANDLEABLE )
+ rc = hvm_buffered_io_intercept(p);
+ }
+ else
+ {
+ rc = hvm_portio_intercept(p);
+ }
+
+ switch ( rc )
+ {
+ case X86EMUL_OKAY:
+ case X86EMUL_RETRY:
+ *reps = p->count;
p->state = STATE_IORESP_READY;
hvm_io_assist();
if ( val != NULL )
*val = curr->arch.hvm_vcpu.io_data;
curr->arch.hvm_vcpu.io_state = HVMIO_none;
- return X86EMUL_OKAY;
+ break;
+ case X86EMUL_UNHANDLEABLE:
+ hvm_send_assist_req(curr);
+ rc = (val != NULL) ? X86EMUL_RETRY : X86EMUL_OKAY;
+ break;
+ default:
+ BUG();
}
- hvm_send_assist_req(curr);
- return (val != NULL) ? X86EMUL_RETRY : X86EMUL_OKAY;
+ return rc;
}
static int hvmemul_do_pio(
- unsigned long port, unsigned long count, int size,
+ unsigned long port, unsigned long *reps, int size,
paddr_t value, int dir, int df, int value_is_ptr, unsigned long *val)
{
- return hvmemul_do_io(0, port, count, size, value,
+ return hvmemul_do_io(0, port, reps, size, value,
dir, df, value_is_ptr, val);
}
static int hvmemul_do_mmio(
- paddr_t gpa, unsigned long count, int size,
+ paddr_t gpa, unsigned long *reps, int size,
paddr_t value, int dir, int df, int value_is_ptr, unsigned long *val)
{
- return hvmemul_do_io(1, gpa, count, size, value,
+ return hvmemul_do_io(1, gpa, reps, size, value,
dir, df, value_is_ptr, val);
}
@@ -206,7 +227,7 @@ static int __hvmemul_read(
struct hvm_emulate_ctxt *hvmemul_ctxt)
{
struct vcpu *curr = current;
- unsigned long addr;
+ unsigned long addr, reps = 1;
uint32_t pfec = PFEC_page_present;
paddr_t gpa;
int rc;
@@ -226,7 +247,8 @@ static int __hvmemul_read(
return X86EMUL_UNHANDLEABLE;
gpa = (((paddr_t)curr->arch.hvm_vcpu.mmio_gpfn << PAGE_SHIFT) | off);
if ( (off + bytes) <= PAGE_SIZE )
- return hvmemul_do_mmio(gpa, 1, bytes, 0, IOREQ_READ, 0, 0, val);
+ return hvmemul_do_mmio(gpa, &reps, bytes, 0,
+ IOREQ_READ, 0, 0, val);
}
if ( (seg != x86_seg_none) &&
@@ -251,7 +273,7 @@ static int __hvmemul_read(
if ( rc != X86EMUL_OKAY )
return rc;
- return hvmemul_do_mmio(gpa, 1, bytes, 0, IOREQ_READ, 0, 0, val);
+ return hvmemul_do_mmio(gpa, &reps, bytes, 0, IOREQ_READ, 0, 0, val);
}
return X86EMUL_OKAY;
@@ -302,7 +324,7 @@ static int hvmemul_write(
struct hvm_emulate_ctxt *hvmemul_ctxt =
container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
struct vcpu *curr = current;
- unsigned long addr;
+ unsigned long addr, reps = 1;
uint32_t pfec = PFEC_page_present | PFEC_write_access;
paddr_t gpa;
int rc;
@@ -318,8 +340,8 @@ static int hvmemul_write(
unsigned int off = addr & (PAGE_SIZE - 1);
gpa = (((paddr_t)curr->arch.hvm_vcpu.mmio_gpfn << PAGE_SHIFT) | off);
if ( (off + bytes) <= PAGE_SIZE )
- return hvmemul_do_mmio(gpa, 1, bytes, val, IOREQ_WRITE,
- 0, 0, NULL);
+ return hvmemul_do_mmio(gpa, &reps, bytes, val,
+ IOREQ_WRITE, 0, 0, NULL);
}
if ( (seg != x86_seg_none) &&
@@ -339,7 +361,8 @@ static int hvmemul_write(
if ( rc != X86EMUL_OKAY )
return rc;
- return hvmemul_do_mmio(gpa, 1, bytes, val, IOREQ_WRITE, 0, 0, NULL);
+ return hvmemul_do_mmio(gpa, &reps, bytes, val,
+ IOREQ_WRITE, 0, 0, NULL);
}
return X86EMUL_OKAY;
@@ -386,7 +409,7 @@ static int hvmemul_rep_ins(
if ( rc != X86EMUL_OKAY )
return rc;
- return hvmemul_do_pio(src_port, *reps, bytes_per_rep, gpa, IOREQ_READ,
+ return hvmemul_do_pio(src_port, reps, bytes_per_rep, gpa, IOREQ_READ,
!!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL);
}
@@ -419,7 +442,7 @@ static int hvmemul_rep_outs(
if ( rc != X86EMUL_OKAY )
return rc;
- return hvmemul_do_pio(dst_port, *reps, bytes_per_rep, gpa, IOREQ_WRITE,
+ return hvmemul_do_pio(dst_port, reps, bytes_per_rep, gpa, IOREQ_WRITE,
!!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL);
}
@@ -469,14 +492,14 @@ static int hvmemul_rep_movs(
(void)gfn_to_mfn_current(sgpa >> PAGE_SHIFT, &p2mt);
if ( !p2m_is_ram(p2mt) )
return hvmemul_do_mmio(
- sgpa, *reps, bytes_per_rep, dgpa, IOREQ_READ,
+ sgpa, reps, bytes_per_rep, dgpa, IOREQ_READ,
!!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL);
(void)gfn_to_mfn_current(dgpa >> PAGE_SHIFT, &p2mt);
if ( p2m_is_ram(p2mt) )
return X86EMUL_UNHANDLEABLE;
return hvmemul_do_mmio(
- dgpa, *reps, bytes_per_rep, sgpa, IOREQ_WRITE,
+ dgpa, reps, bytes_per_rep, sgpa, IOREQ_WRITE,
!!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL);
}
@@ -513,7 +536,8 @@ static int hvmemul_read_io(
unsigned long *val,
struct x86_emulate_ctxt *ctxt)
{
- return hvmemul_do_pio(port, 1, bytes, 0, IOREQ_READ, 0, 0, val);
+ unsigned long reps = 1;
+ return hvmemul_do_pio(port, &reps, bytes, 0, IOREQ_READ, 0, 0, val);
}
static int hvmemul_write_io(
@@ -522,7 +546,8 @@ static int hvmemul_write_io(
unsigned long val,
struct x86_emulate_ctxt *ctxt)
{
- return hvmemul_do_pio(port, 1, bytes, val, IOREQ_WRITE, 0, 0, NULL);
+ unsigned long reps = 1;
+ return hvmemul_do_pio(port, &reps, bytes, val, IOREQ_WRITE, 0, 0, NULL);
}
static int hvmemul_read_cr(
diff --git a/xen/arch/x86/hvm/hpet.c b/xen/arch/x86/hvm/hpet.c
index 49ca998d37..03dfbf3bd8 100644
--- a/xen/arch/x86/hvm/hpet.c
+++ b/xen/arch/x86/hvm/hpet.c
@@ -150,8 +150,9 @@ static inline uint64_t hpet_read_maincounter(HPETState *h)
return h->hpet.mc64;
}
-static unsigned long hpet_read(
- struct vcpu *v, unsigned long addr, unsigned long length)
+static int hpet_read(
+ struct vcpu *v, unsigned long addr, unsigned long length,
+ unsigned long *pval)
{
HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
unsigned long result;
@@ -160,7 +161,10 @@ static unsigned long hpet_read(
addr &= HPET_MMAP_SIZE-1;
if ( hpet_check_access_length(addr, length) != 0 )
- return ~0UL;
+ {
+ result = ~0ul;
+ goto out;
+ }
spin_lock(&h->lock);
@@ -174,7 +178,9 @@ static unsigned long hpet_read(
spin_unlock(&h->lock);
- return result;
+ out:
+ *pval = result;
+ return X86EMUL_OKAY;
}
static void hpet_stop_timer(HPETState *h, unsigned int tn)
@@ -234,7 +240,7 @@ static inline uint64_t hpet_fixup_reg(
return new;
}
-static void hpet_write(
+static int hpet_write(
struct vcpu *v, unsigned long addr,
unsigned long length, unsigned long val)
{
@@ -245,7 +251,7 @@ static void hpet_write(
addr &= HPET_MMAP_SIZE-1;
if ( hpet_check_access_length(addr, length) != 0 )
- return;
+ goto out;
spin_lock(&h->lock);
@@ -349,6 +355,9 @@ static void hpet_write(
}
spin_unlock(&h->lock);
+
+ out:
+ return X86EMUL_OKAY;
}
static int hpet_range(struct vcpu *v, unsigned long addr)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 961bfbf354..97a1aaa17c 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -181,7 +181,8 @@ void hvm_do_resume(struct vcpu *v)
break;
default:
gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state);
- domain_crash_synchronous();
+ domain_crash(v->domain);
+ return; /* bail */
}
}
}
@@ -276,7 +277,7 @@ static int hvm_print_line(
}
spin_unlock(&hd->pbuf_lock);
- return 1;
+ return X86EMUL_OKAY;
}
int hvm_domain_initialise(struct domain *d)
@@ -478,11 +479,11 @@ static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
vc = &v->arch.guest_context;
/* Need to init this vcpu before loading its contents */
- LOCK_BIGLOCK(d);
+ domain_lock(d);
if ( !v->is_initialised )
if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 )
return rc;
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
if ( hvm_load_entry(CPU, h, &ctxt) != 0 )
return -EINVAL;
@@ -687,47 +688,26 @@ void hvm_vcpu_destroy(struct vcpu *v)
/*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
}
-
-void hvm_vcpu_reset(struct vcpu *v)
-{
- vcpu_pause(v);
-
- vlapic_reset(vcpu_vlapic(v));
-
- hvm_funcs.vcpu_initialise(v);
-
- set_bit(_VPF_down, &v->pause_flags);
- clear_bit(_VPF_blocked, &v->pause_flags);
- v->fpu_initialised = 0;
- v->fpu_dirtied = 0;
- v->is_initialised = 0;
-
- vcpu_unpause(v);
-}
-
-static void hvm_vcpu_down(void)
+void hvm_vcpu_down(struct vcpu *v)
{
- struct vcpu *v = current;
struct domain *d = v->domain;
int online_count = 0;
- gdprintk(XENLOG_INFO, "VCPU%d: going offline.\n", v->vcpu_id);
-
/* Doesn't halt us immediately, but we'll never return to guest context. */
set_bit(_VPF_down, &v->pause_flags);
vcpu_sleep_nosync(v);
/* Any other VCPUs online? ... */
- LOCK_BIGLOCK(d);
+ domain_lock(d);
for_each_vcpu ( d, v )
if ( !test_bit(_VPF_down, &v->pause_flags) )
online_count++;
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
/* ... Shut down the domain if not. */
if ( online_count == 0 )
{
- gdprintk(XENLOG_INFO, "all CPUs offline -- powering off.\n");
+ gdprintk(XENLOG_INFO, "All CPUs offline -- powering off.\n");
domain_shutdown(d, SHUTDOWN_poweroff);
}
}
@@ -742,9 +722,10 @@ void hvm_send_assist_req(struct vcpu *v)
p = &get_ioreq(v)->vp_ioreq;
if ( unlikely(p->state != STATE_IOREQ_NONE) )
{
- /* This indicates a bug in the device model. Crash the domain. */
+ /* This indicates a bug in the device model. Crash the domain. */
gdprintk(XENLOG_ERR, "Device model set bad IO state %d.\n", p->state);
- domain_crash_synchronous();
+ domain_crash(v->domain);
+ return;
}
prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port);
@@ -765,7 +746,7 @@ void hvm_hlt(unsigned long rflags)
* out of this.
*/
if ( unlikely(!(rflags & X86_EFLAGS_IF)) )
- return hvm_vcpu_down();
+ return hvm_vcpu_down(current);
do_sched_op_compat(SCHEDOP_block, 0);
}
@@ -1894,79 +1875,6 @@ void hvm_hypercall_page_initialise(struct domain *d,
hvm_funcs.init_hypercall_page(d, hypercall_page);
}
-int hvm_bringup_ap(int vcpuid, int trampoline_vector)
-{
- struct domain *d = current->domain;
- struct vcpu *v;
- struct vcpu_guest_context *ctxt;
- struct segment_register reg;
-
- ASSERT(is_hvm_domain(d));
-
- if ( (v = d->vcpu[vcpuid]) == NULL )
- return -ENOENT;
-
- v->fpu_initialised = 0;
- v->arch.flags |= TF_kernel_mode;
- v->is_initialised = 1;
-
- ctxt = &v->arch.guest_context;
- memset(ctxt, 0, sizeof(*ctxt));
- ctxt->flags = VGCF_online;
- ctxt->user_regs.eflags = 2;
-
- v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET;
- hvm_update_guest_cr(v, 0);
-
- v->arch.hvm_vcpu.guest_cr[2] = 0;
- hvm_update_guest_cr(v, 2);
-
- v->arch.hvm_vcpu.guest_cr[3] = 0;
- hvm_update_guest_cr(v, 3);
-
- v->arch.hvm_vcpu.guest_cr[4] = 0;
- hvm_update_guest_cr(v, 4);
-
- v->arch.hvm_vcpu.guest_efer = 0;
- hvm_update_guest_efer(v);
-
- reg.sel = trampoline_vector << 8;
- reg.base = (uint32_t)reg.sel << 4;
- reg.limit = 0xffff;
- reg.attr.bytes = 0x89b;
- hvm_set_segment_register(v, x86_seg_cs, &reg);
-
- reg.sel = reg.base = 0;
- reg.limit = 0xffff;
- reg.attr.bytes = 0x893;
- hvm_set_segment_register(v, x86_seg_ds, &reg);
- hvm_set_segment_register(v, x86_seg_es, &reg);
- hvm_set_segment_register(v, x86_seg_fs, &reg);
- hvm_set_segment_register(v, x86_seg_gs, &reg);
- hvm_set_segment_register(v, x86_seg_ss, &reg);
-
- reg.attr.bytes = 0x82; /* LDT */
- hvm_set_segment_register(v, x86_seg_ldtr, &reg);
-
- reg.attr.bytes = 0x8b; /* 32-bit TSS (busy) */
- hvm_set_segment_register(v, x86_seg_tr, &reg);
-
- reg.attr.bytes = 0;
- hvm_set_segment_register(v, x86_seg_gdtr, &reg);
- hvm_set_segment_register(v, x86_seg_idtr, &reg);
-
- /* Sync AP's TSC with BSP's. */
- v->arch.hvm_vcpu.cache_tsc_offset =
- v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
- hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
-
- if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
- vcpu_wake(v);
-
- gdprintk(XENLOG_INFO, "AP %d bringup succeeded.\n", vcpuid);
- return 0;
-}
-
static int hvmop_set_pci_intx_level(
XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t) uop)
{
@@ -2185,13 +2093,16 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
if ( op == HVMOP_set_param )
{
+ rc = 0;
+
switch ( a.index )
{
case HVM_PARAM_IOREQ_PFN:
iorp = &d->arch.hvm_domain.ioreq;
- rc = hvm_set_ioreq_page(d, iorp, a.value);
+ if ( (rc = hvm_set_ioreq_page(d, iorp, a.value)) != 0 )
+ break;
spin_lock(&iorp->lock);
- if ( (rc == 0) && (iorp->va != NULL) )
+ if ( iorp->va != NULL )
/* Initialise evtchn port info if VCPUs already created. */
for_each_vcpu ( d, v )
get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
@@ -2206,13 +2117,72 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
hvm_latch_shinfo_size(d);
break;
case HVM_PARAM_TIMER_MODE:
- rc = -EINVAL;
if ( a.value > HVMPTM_one_missed_tick_pending )
- goto param_fail;
+ rc = -EINVAL;
+ break;
+ case HVM_PARAM_IDENT_PT:
+ rc = -EPERM;
+ if ( !IS_PRIV(current->domain) )
+ break;
+
+ rc = -EINVAL;
+ if ( d->arch.hvm_domain.params[a.index] != 0 )
+ break;
+
+ rc = 0;
+ if ( !paging_mode_hap(d) )
+ break;
+
+ domain_pause(d);
+
+ /*
+ * Update GUEST_CR3 in each VMCS to point at identity map.
+ * All foreign updates to guest state must synchronise on
+ * the domctl_lock.
+ */
+ spin_lock(&domctl_lock);
+ d->arch.hvm_domain.params[a.index] = a.value;
+ for_each_vcpu ( d, v )
+ paging_update_cr3(v);
+ spin_unlock(&domctl_lock);
+
+ domain_unpause(d);
+ break;
+ case HVM_PARAM_DM_DOMAIN:
+ /* Privileged domains only, as we must domain_pause(d). */
+ rc = -EPERM;
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ break;
+
+ if ( a.value == DOMID_SELF )
+ a.value = current->domain->domain_id;
+
+ rc = 0;
+ domain_pause(d); /* safe to change per-vcpu xen_port */
+ iorp = &d->arch.hvm_domain.ioreq;
+ for_each_vcpu ( d, v )
+ {
+ int old_port, new_port;
+ new_port = alloc_unbound_xen_event_channel(v, a.value);
+ if ( new_port < 0 )
+ {
+ rc = new_port;
+ break;
+ }
+ /* xchg() ensures that only we free_xen_event_channel() */
+ old_port = xchg(&v->arch.hvm_vcpu.xen_port, new_port);
+ free_xen_event_channel(v, old_port);
+ spin_lock(&iorp->lock);
+ if ( iorp->va != NULL )
+ get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
+ spin_unlock(&iorp->lock);
+ }
+ domain_unpause(d);
break;
}
- d->arch.hvm_domain.params[a.index] = a.value;
- rc = 0;
+
+ if ( rc == 0 )
+ d->arch.hvm_domain.params[a.index] = a.value;
}
else
{
diff --git a/xen/arch/x86/hvm/i8254.c b/xen/arch/x86/hvm/i8254.c
index 01c78f7799..493b7317b9 100644
--- a/xen/arch/x86/hvm/i8254.c
+++ b/xen/arch/x86/hvm/i8254.c
@@ -401,50 +401,6 @@ void pit_stop_channel0_irq(PITState *pit)
spin_unlock(&pit->lock);
}
-#ifdef HVM_DEBUG_SUSPEND
-static void pit_info(PITState *pit)
-{
- struct hvm_hw_pit_channel *s;
- struct periodic_time *pt;
- int i;
-
- for ( i = 0; i < 3; i++ )
- {
- printk("*****pit channel %d's state:*****\n", i);
- s = &pit->hw.channels[i];
- printk("pit 0x%x.\n", s->count);
- printk("pit 0x%x.\n", s->latched_count);
- printk("pit 0x%x.\n", s->count_latched);
- printk("pit 0x%x.\n", s->status_latched);
- printk("pit 0x%x.\n", s->status);
- printk("pit 0x%x.\n", s->read_state);
- printk("pit 0x%x.\n", s->write_state);
- printk("pit 0x%x.\n", s->write_latch);
- printk("pit 0x%x.\n", s->rw_mode);
- printk("pit 0x%x.\n", s->mode);
- printk("pit 0x%x.\n", s->bcd);
- printk("pit 0x%x.\n", s->gate);
- printk("pit %"PRId64"\n", pit->count_load_time[i]);
-
- }
-
- pt = &pit->pt0;
- printk("pit channel 0 periodic timer:\n", i);
- printk("pt %d.\n", pt->enabled);
- printk("pt %d.\n", pt->one_shot);
- printk("pt %d.\n", pt->irq);
- printk("pt %d.\n", pt->first_injected);
- printk("pt %d.\n", pt->pending_intr_nr);
- printk("pt %d.\n", pt->period);
- printk("pt %"PRId64"\n", pt->period_cycles);
- printk("pt %"PRId64"\n", pt->last_plt_gtime);
-}
-#else
-static void pit_info(PITState *pit)
-{
-}
-#endif
-
static int pit_save(struct domain *d, hvm_domain_context_t *h)
{
PITState *pit = domain_vpit(d);
@@ -452,9 +408,6 @@ static int pit_save(struct domain *d, hvm_domain_context_t *h)
spin_lock(&pit->lock);
- pit_info(pit);
-
- /* Save the PIT hardware state */
rc = hvm_save_entry(PIT, 0, h, &pit->hw);
spin_unlock(&pit->lock);
@@ -469,22 +422,21 @@ static int pit_load(struct domain *d, hvm_domain_context_t *h)
spin_lock(&pit->lock);
- /* Restore the PIT hardware state */
if ( hvm_load_entry(PIT, h, &pit->hw) )
{
spin_unlock(&pit->lock);
return 1;
}
- /* Recreate platform timers from hardware state. There will be some
+ /*
+ * Recreate platform timers from hardware state. There will be some
* time jitter here, but the wall-clock will have jumped massively, so
- * we hope the guest can handle it. */
+ * we hope the guest can handle it.
+ */
pit->pt0.last_plt_gtime = hvm_get_guest_time(d->vcpu[0]);
for ( i = 0; i < 3; i++ )
pit_load_count(pit, i, pit->hw.channels[i].count);
- pit_info(pit);
-
spin_unlock(&pit->lock);
return 0;
@@ -535,7 +487,7 @@ static int handle_pit_io(
if ( bytes != 1 )
{
gdprintk(XENLOG_WARNING, "PIT bad access\n");
- return 1;
+ return X86EMUL_OKAY;
}
if ( dir == IOREQ_WRITE )
@@ -550,7 +502,7 @@ static int handle_pit_io(
gdprintk(XENLOG_WARNING, "PIT: read A1:A0=3!\n");
}
- return 1;
+ return X86EMUL_OKAY;
}
static void speaker_ioport_write(
@@ -574,11 +526,7 @@ static int handle_speaker_io(
{
struct PITState *vpit = vcpu_vpit(current);
- if ( bytes != 1 )
- {
- gdprintk(XENLOG_WARNING, "PIT_SPEAKER bad access\n");
- return 1;
- }
+ BUG_ON(bytes != 1);
spin_lock(&vpit->lock);
@@ -589,7 +537,7 @@ static int handle_speaker_io(
spin_unlock(&vpit->lock);
- return 1;
+ return X86EMUL_OKAY;
}
int pv_pit_handler(int port, int data, int write)
diff --git a/xen/arch/x86/hvm/intercept.c b/xen/arch/x86/hvm/intercept.c
index 04c5da7b6f..0e110e00dc 100644
--- a/xen/arch/x86/hvm/intercept.c
+++ b/xen/arch/x86/hvm/intercept.c
@@ -45,53 +45,63 @@ static struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] =
&vioapic_mmio_handler
};
-static inline void hvm_mmio_access(struct vcpu *v,
- ioreq_t *p,
- hvm_mmio_read_t read_handler,
- hvm_mmio_write_t write_handler)
+static int hvm_mmio_access(struct vcpu *v,
+ ioreq_t *p,
+ hvm_mmio_read_t read_handler,
+ hvm_mmio_write_t write_handler)
{
unsigned long data;
+ int rc = X86EMUL_OKAY, i, sign = p->df ? -1 : 1;
- switch ( p->type )
+ if ( !p->data_is_ptr )
{
- case IOREQ_TYPE_COPY:
- if ( !p->data_is_ptr ) {
- if ( p->dir == IOREQ_READ )
- p->data = read_handler(v, p->addr, p->size);
- else /* p->dir == IOREQ_WRITE */
- write_handler(v, p->addr, p->size, p->data);
- } else { /* p->data_is_ptr */
- int i, sign = (p->df) ? -1 : 1;
-
- if ( p->dir == IOREQ_READ ) {
- for ( i = 0; i < p->count; i++ ) {
- data = read_handler(v,
- p->addr + (sign * i * p->size),
- p->size);
- (void)hvm_copy_to_guest_phys(
- p->data + (sign * i * p->size),
- &data,
- p->size);
- }
- } else {/* p->dir == IOREQ_WRITE */
- for ( i = 0; i < p->count; i++ ) {
- (void)hvm_copy_from_guest_phys(
- &data,
- p->data + (sign * i * p->size),
- p->size);
- write_handler(v,
- p->addr + (sign * i * p->size),
- p->size, data);
- }
- }
+ if ( p->dir == IOREQ_READ )
+ {
+ rc = read_handler(v, p->addr, p->size, &data);
+ p->data = data;
}
- break;
+ else /* p->dir == IOREQ_WRITE */
+ rc = write_handler(v, p->addr, p->size, p->data);
+ return rc;
+ }
- default:
- printk("hvm_mmio_access: error ioreq type %x\n", p->type);
- domain_crash_synchronous();
- break;
+ if ( p->dir == IOREQ_READ )
+ {
+ for ( i = 0; i < p->count; i++ )
+ {
+ rc = read_handler(
+ v,
+ p->addr + (sign * i * p->size),
+ p->size, &data);
+ if ( rc != X86EMUL_OKAY )
+ break;
+ (void)hvm_copy_to_guest_phys(
+ p->data + (sign * i * p->size),
+ &data,
+ p->size);
+ }
+ }
+ else
+ {
+ for ( i = 0; i < p->count; i++ )
+ {
+ (void)hvm_copy_from_guest_phys(
+ &data,
+ p->data + (sign * i * p->size),
+ p->size);
+ rc = write_handler(
+ v,
+ p->addr + (sign * i * p->size),
+ p->size, data);
+ if ( rc != X86EMUL_OKAY )
+ break;
+ }
}
+
+ if ( (p->count = i) != 0 )
+ rc = X86EMUL_OKAY;
+
+ return rc;
}
int hvm_mmio_intercept(ioreq_t *p)
@@ -100,60 +110,62 @@ int hvm_mmio_intercept(ioreq_t *p)
int i;
for ( i = 0; i < HVM_MMIO_HANDLER_NR; i++ )
- {
if ( hvm_mmio_handlers[i]->check_handler(v, p->addr) )
- {
- hvm_mmio_access(v, p,
- hvm_mmio_handlers[i]->read_handler,
- hvm_mmio_handlers[i]->write_handler);
- return 1;
- }
- }
+ return hvm_mmio_access(
+ v, p,
+ hvm_mmio_handlers[i]->read_handler,
+ hvm_mmio_handlers[i]->write_handler);
- return 0;
+ return X86EMUL_UNHANDLEABLE;
}
static int process_portio_intercept(portio_action_t action, ioreq_t *p)
{
- int rc = 1, i, sign = p->df ? -1 : 1;
+ int rc = X86EMUL_OKAY, i, sign = p->df ? -1 : 1;
uint32_t data;
- if ( p->dir == IOREQ_READ )
+ if ( !p->data_is_ptr )
{
- if ( !p->data_is_ptr )
+ if ( p->dir == IOREQ_READ )
{
rc = action(IOREQ_READ, p->addr, p->size, &data);
p->data = data;
}
else
{
- for ( i = 0; i < p->count; i++ )
- {
- rc = action(IOREQ_READ, p->addr, p->size, &data);
- (void)hvm_copy_to_guest_phys(p->data + sign*i*p->size,
- &data, p->size);
- }
+ data = p->data;
+ rc = action(IOREQ_WRITE, p->addr, p->size, &data);
}
+ return rc;
}
- else /* p->dir == IOREQ_WRITE */
+
+ if ( p->dir == IOREQ_READ )
{
- if ( !p->data_is_ptr )
+ for ( i = 0; i < p->count; i++ )
{
- data = p->data;
- rc = action(IOREQ_WRITE, p->addr, p->size, &data);
+ rc = action(IOREQ_READ, p->addr, p->size, &data);
+ if ( rc != X86EMUL_OKAY )
+ break;
+ (void)hvm_copy_to_guest_phys(p->data + sign*i*p->size,
+ &data, p->size);
}
- else
+ }
+ else /* p->dir == IOREQ_WRITE */
+ {
+ for ( i = 0; i < p->count; i++ )
{
- for ( i = 0; i < p->count; i++ )
- {
- data = 0;
- (void)hvm_copy_from_guest_phys(&data, p->data + sign*i*p->size,
- p->size);
- rc = action(IOREQ_WRITE, p->addr, p->size, &data);
- }
+ data = 0;
+ (void)hvm_copy_from_guest_phys(&data, p->data + sign*i*p->size,
+ p->size);
+ rc = action(IOREQ_WRITE, p->addr, p->size, &data);
+ if ( rc != X86EMUL_OKAY )
+ break;
}
}
+ if ( (p->count = i) != 0 )
+ rc = X86EMUL_OKAY;
+
return rc;
}
@@ -170,7 +182,7 @@ int hvm_io_intercept(ioreq_t *p, int type)
unsigned long addr, size;
if ( (type == HVM_PORTIO) && (dpci_ioport_intercept(p)) )
- return 1;
+ return X86EMUL_OKAY;
for ( i = 0; i < handler->num_slot; i++ )
{
@@ -188,10 +200,10 @@ int hvm_io_intercept(ioreq_t *p, int type)
}
}
- return 0;
+ return X86EMUL_UNHANDLEABLE;
}
-int register_io_handler(
+void register_io_handler(
struct domain *d, unsigned long addr, unsigned long size,
void *action, int type)
{
@@ -207,9 +219,8 @@ int register_io_handler(
else
handler->hdl_list[num].action.mmio = action;
handler->num_slot++;
-
- return 1;
}
+
/*
* Local variables:
* mode: C
diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
index ac1e62782a..6a8e0885c0 100644
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -148,20 +148,19 @@ void send_timeoffset_req(unsigned long timeoff)
void send_invalidate_req(void)
{
struct vcpu *v = current;
- vcpu_iodata_t *vio;
+ vcpu_iodata_t *vio = get_ioreq(v);
ioreq_t *p;
- vio = get_ioreq(v);
- if ( vio == NULL )
- {
- printk("bad shared page: %lx\n", (unsigned long) vio);
- domain_crash_synchronous();
- }
+ BUG_ON(vio == NULL);
p = &vio->vp_ioreq;
if ( p->state != STATE_IOREQ_NONE )
- printk("WARNING: send invalidate req with something "
- "already pending (%d)?\n", p->state);
+ {
+ gdprintk(XENLOG_ERR, "WARNING: send invalidate req with something "
+ "already pending (%d)?\n", p->state);
+ domain_crash(v->domain);
+ return;
+ }
p->type = IOREQ_TYPE_INVALIDATE;
p->size = 4;
@@ -225,12 +224,6 @@ void hvm_io_assist(void)
ioreq_t *p = &get_ioreq(curr)->vp_ioreq;
enum hvm_io_state io_state;
- if ( p->state != STATE_IORESP_READY )
- {
- gdprintk(XENLOG_ERR, "Unexpected HVM iorequest state %d.\n", p->state);
- domain_crash_synchronous();
- }
-
rmb(); /* see IORESP_READY /then/ read contents of ioreq */
p->state = STATE_IOREQ_NONE;
@@ -253,74 +246,59 @@ void hvm_io_assist(void)
void dpci_ioport_read(uint32_t mport, ioreq_t *p)
{
- uint64_t i;
- uint64_t z_data;
- uint64_t length = (p->count * p->size);
+ int i, sign = p->df ? -1 : 1;
+ uint32_t data = 0;
- for ( i = 0; i < length; i += p->size )
+ for ( i = 0; i < p->count; i++ )
{
- z_data = ~0ULL;
-
switch ( p->size )
{
case 1:
- z_data = (uint64_t)inb(mport);
+ data = inb(mport);
break;
case 2:
- z_data = (uint64_t)inw(mport);
+ data = inw(mport);
break;
case 4:
- z_data = (uint64_t)inl(mport);
+ data = inl(mport);
break;
default:
- gdprintk(XENLOG_ERR, "Error: unable to handle size: %"
- PRId64 "\n", p->size);
- return;
+ BUG();
}
- p->data = z_data;
- if ( p->data_is_ptr &&
- hvm_copy_to_guest_phys(p->data + i, (void *)&z_data,
- (int)p->size) )
- {
- gdprintk(XENLOG_ERR, "Error: couldn't copy to hvm phys\n");
- return;
- }
+ if ( p->data_is_ptr )
+ (void)hvm_copy_to_guest_phys(
+ p->data + (sign * i * p->size), &data, p->size);
+ else
+ p->data = data;
}
}
void dpci_ioport_write(uint32_t mport, ioreq_t *p)
{
- uint64_t i;
- uint64_t z_data = 0;
- uint64_t length = (p->count * p->size);
+ int i, sign = p->df ? -1 : 1;
+ uint32_t data;
- for ( i = 0; i < length; i += p->size )
+ for ( i = 0; i < p->count; i++ )
{
- z_data = p->data;
- if ( p->data_is_ptr &&
- hvm_copy_from_guest_phys((void *)&z_data,
- p->data + i, (int)p->size) )
- {
- gdprintk(XENLOG_ERR, "Error: couldn't copy from hvm phys\n");
- return;
- }
+ data = p->data;
+ if ( p->data_is_ptr )
+ (void)hvm_copy_from_guest_phys(
+ &data, p->data + (sign * i & p->size), p->size);
switch ( p->size )
{
case 1:
- outb((uint8_t) z_data, mport);
+ outb(data, mport);
break;
case 2:
- outw((uint16_t) z_data, mport);
+ outw(data, mport);
break;
case 4:
- outl((uint32_t) z_data, mport);
+ outl(data, mport);
break;
default:
- gdprintk(XENLOG_ERR, "Error: unable to handle size: %"
- PRId64 "\n", p->size);
- break;
+ BUG();
}
}
}
diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
index 3bd0dc9d7c..4e50680022 100644
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -266,7 +266,7 @@ static void setup_var_mtrrs(struct vcpu *v)
{
if ( e820_table[i].addr == 0x100000 )
{
- size = e820_table[i].size + 0x100000 + PAGE_SIZE * 4;
+ size = e820_table[i].size + 0x100000 + PAGE_SIZE * 5;
addr = 0;
}
else
diff --git a/xen/arch/x86/hvm/pmtimer.c b/xen/arch/x86/hvm/pmtimer.c
index 8d3fff8f44..4924a80687 100644
--- a/xen/arch/x86/hvm/pmtimer.c
+++ b/xen/arch/x86/hvm/pmtimer.c
@@ -169,7 +169,7 @@ static int handle_evt_io(
spin_unlock(&s->lock);
- return 1;
+ return X86EMUL_OKAY;
}
@@ -183,7 +183,7 @@ static int handle_pmt_io(
if ( bytes != 4 )
{
gdprintk(XENLOG_WARNING, "HVM_PMT bad access\n");
- return 1;
+ return X86EMUL_OKAY;
}
if ( dir == IOREQ_READ )
@@ -192,10 +192,10 @@ static int handle_pmt_io(
pmt_update_time(s);
*val = s->pm.tmr_val;
spin_unlock(&s->lock);
- return 1;
+ return X86EMUL_OKAY;
}
- return 0;
+ return X86EMUL_UNHANDLEABLE;
}
static int pmtimer_save(struct domain *d, hvm_domain_context_t *h)
diff --git a/xen/arch/x86/hvm/rtc.c b/xen/arch/x86/hvm/rtc.c
index b9e4b4a241..e196c72866 100644
--- a/xen/arch/x86/hvm/rtc.c
+++ b/xen/arch/x86/hvm/rtc.c
@@ -403,21 +403,21 @@ static int handle_rtc_io(
if ( bytes != 1 )
{
gdprintk(XENLOG_WARNING, "HVM_RTC bas access\n");
- return 1;
+ return X86EMUL_OKAY;
}
if ( dir == IOREQ_WRITE )
{
if ( rtc_ioport_write(vrtc, port, (uint8_t)*val) )
- return 1;
+ return X86EMUL_OKAY;
}
else if ( vrtc->hw.cmos_index < RTC_CMOS_SIZE )
{
*val = rtc_ioport_read(vrtc, port);
- return 1;
+ return X86EMUL_OKAY;
}
- return 0;
+ return X86EMUL_UNHANDLEABLE;
}
void rtc_migrate_timers(struct vcpu *v)
diff --git a/xen/arch/x86/hvm/stdvga.c b/xen/arch/x86/hvm/stdvga.c
index 56260c5c77..25b16bddac 100644
--- a/xen/arch/x86/hvm/stdvga.c
+++ b/xen/arch/x86/hvm/stdvga.c
@@ -32,6 +32,7 @@
#include <xen/sched.h>
#include <xen/domain_page.h>
#include <asm/hvm/support.h>
+#include <xen/numa.h>
#define PAT(x) (x)
static const uint32_t mask16[16] = {
@@ -166,19 +167,19 @@ static void stdvga_out(uint32_t port, uint32_t bytes, uint32_t val)
}
}
-int stdvga_intercept_pio(
+static int stdvga_intercept_pio(
int dir, uint32_t port, uint32_t bytes, uint32_t *val)
{
struct hvm_hw_stdvga *s = &current->domain->arch.hvm_domain.stdvga;
- if ( dir == IOREQ_READ )
- return 0;
-
- spin_lock(&s->lock);
- stdvga_out(port, bytes, *val);
- spin_unlock(&s->lock);
+ if ( dir == IOREQ_WRITE )
+ {
+ spin_lock(&s->lock);
+ stdvga_out(port, bytes, *val);
+ spin_unlock(&s->lock);
+ }
- return 0; /* propagate to external ioemu */
+ return X86EMUL_UNHANDLEABLE; /* propagate to external ioemu */
}
#define GET_PLANE(data, p) (((data) >> ((p) * 8)) & 0xff)
@@ -458,7 +459,7 @@ static int mmio_move(struct hvm_hw_stdvga *s, ioreq_t *p)
return 1;
}
-int stdvga_intercept_mmio(ioreq_t *p)
+static int stdvga_intercept_mmio(ioreq_t *p)
{
struct domain *d = current->domain;
struct hvm_hw_stdvga *s = &d->arch.hvm_domain.stdvga;
@@ -467,7 +468,7 @@ int stdvga_intercept_mmio(ioreq_t *p)
if ( p->size > 8 )
{
gdprintk(XENLOG_WARNING, "invalid mmio size %d\n", (int)p->size);
- return 0;
+ return X86EMUL_UNHANDLEABLE;
}
spin_lock(&s->lock);
@@ -498,7 +499,7 @@ int stdvga_intercept_mmio(ioreq_t *p)
spin_unlock(&s->lock);
- return rc;
+ return rc ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;
}
void stdvga_init(struct domain *d)
@@ -513,7 +514,8 @@ void stdvga_init(struct domain *d)
for ( i = 0; i != ARRAY_SIZE(s->vram_page); i++ )
{
- if ( (pg = alloc_domheap_page(NULL)) == NULL )
+ pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+ if ( pg == NULL )
break;
s->vram_page[i] = pg;
p = map_domain_page(page_to_mfn(pg));
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index be166a868c..7c10127966 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -255,11 +255,6 @@ static int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
svm_update_guest_cr(v, 2);
svm_update_guest_cr(v, 4);
-#ifdef HVM_DEBUG_SUSPEND
- printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
- __func__, c->cr3, c->cr0, c->cr4);
-#endif
-
vmcb->sysenter_cs = c->sysenter_cs;
vmcb->sysenter_esp = c->sysenter_esp;
vmcb->sysenter_eip = c->sysenter_eip;
@@ -472,7 +467,7 @@ static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- ASSERT(v == current);
+ ASSERT((v == current) || !vcpu_runnable(v));
switch ( seg )
{
diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
index c01618c69f..8ebaa260cf 100644
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -88,9 +88,9 @@ static unsigned long vioapic_read_indirect(struct hvm_hw_vioapic *vioapic,
return result;
}
-static unsigned long vioapic_read(struct vcpu *v,
- unsigned long addr,
- unsigned long length)
+static int vioapic_read(
+ struct vcpu *v, unsigned long addr,
+ unsigned long length, unsigned long *pval)
{
struct hvm_hw_vioapic *vioapic = domain_vioapic(v->domain);
uint32_t result;
@@ -114,11 +114,13 @@ static unsigned long vioapic_read(struct vcpu *v,
break;
}
- return result;
+ *pval = result;
+ return X86EMUL_OKAY;
}
static void vioapic_write_redirent(
- struct hvm_hw_vioapic *vioapic, unsigned int idx, int top_word, uint32_t val)
+ struct hvm_hw_vioapic *vioapic, unsigned int idx,
+ int top_word, uint32_t val)
{
struct domain *d = vioapic_domain(vioapic);
struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
@@ -196,10 +198,9 @@ static void vioapic_write_indirect(
}
}
-static void vioapic_write(struct vcpu *v,
- unsigned long addr,
- unsigned long length,
- unsigned long val)
+static int vioapic_write(
+ struct vcpu *v, unsigned long addr,
+ unsigned long length, unsigned long val)
{
struct hvm_hw_vioapic *vioapic = domain_vioapic(v->domain);
@@ -224,6 +225,8 @@ static void vioapic_write(struct vcpu *v,
default:
break;
}
+
+ return X86EMUL_OKAY;
}
static int vioapic_range(struct vcpu *v, unsigned long addr)
@@ -477,45 +480,16 @@ void vioapic_update_EOI(struct domain *d, int vector)
spin_unlock(&d->arch.hvm_domain.irq_lock);
}
-#ifdef HVM_DEBUG_SUSPEND
-static void ioapic_info(struct hvm_hw_vioapic *s)
-{
- int i;
- printk("*****ioapic state:*****\n");
- printk("ioapic 0x%x.\n", s->ioregsel);
- printk("ioapic 0x%x.\n", s->id);
- printk("ioapic 0x%lx.\n", s->base_address);
- for (i = 0; i < VIOAPIC_NUM_PINS; i++) {
- printk("ioapic redirtbl[%d]:0x%"PRIx64"\n", i, s->redirtbl[i].bits);
- }
-
-}
-#else
-static void ioapic_info(struct hvm_hw_vioapic *s)
-{
-}
-#endif
-
-
static int ioapic_save(struct domain *d, hvm_domain_context_t *h)
{
struct hvm_hw_vioapic *s = domain_vioapic(d);
- ioapic_info(s);
-
- /* save io-apic state*/
- return ( hvm_save_entry(IOAPIC, 0, h, s) );
+ return hvm_save_entry(IOAPIC, 0, h, s);
}
static int ioapic_load(struct domain *d, hvm_domain_context_t *h)
{
struct hvm_hw_vioapic *s = domain_vioapic(d);
-
- /* restore ioapic state */
- if ( hvm_load_entry(IOAPIC, h, s) != 0 )
- return -EINVAL;
-
- ioapic_info(s);
- return 0;
+ return hvm_load_entry(IOAPIC, h, s);
}
HVM_REGISTER_SAVE_RESTORE(IOAPIC, ioapic_save, ioapic_load, 1, HVMSR_PER_DOM);
diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index bf53ba7a1a..9bfc2cc3d1 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -33,6 +33,7 @@
#include <xen/sched.h>
#include <asm/current.h>
#include <asm/hvm/vmx/vmx.h>
+#include <xen/numa.h>
#include <public/hvm/ioreq.h>
#include <public/hvm/params.h>
@@ -240,12 +241,145 @@ static int vlapic_match_dest(struct vcpu *v, struct vlapic *source,
return result;
}
+static int vlapic_vcpu_pause_async(struct vcpu *v)
+{
+ vcpu_pause_nosync(v);
+
+ if ( v->is_running )
+ {
+ vcpu_unpause(v);
+ return 0;
+ }
+
+ sync_vcpu_execstate(v);
+ return 1;
+}
+
+static void vlapic_init_action(unsigned long _vcpu)
+{
+ struct vcpu *v = (struct vcpu *)_vcpu;
+ struct domain *d = v->domain;
+
+ /* If the VCPU is not on its way down we have nothing to do. */
+ if ( !test_bit(_VPF_down, &v->pause_flags) )
+ return;
+
+ if ( !vlapic_vcpu_pause_async(v) )
+ {
+ tasklet_schedule(&vcpu_vlapic(v)->init_tasklet);
+ return;
+ }
+
+ domain_lock(d);
+
+ /* Paranoia makes us re-assert VPF_down under the domain lock. */
+ set_bit(_VPF_down, &v->pause_flags);
+ v->is_initialised = 0;
+ clear_bit(_VPF_blocked, &v->pause_flags);
+
+ vlapic_reset(vcpu_vlapic(v));
+
+ domain_unlock(d);
+
+ vcpu_unpause(v);
+}
+
+static int vlapic_accept_init(struct vcpu *v)
+{
+ /* Nothing to do if the VCPU is already reset. */
+ if ( !v->is_initialised )
+ return X86EMUL_OKAY;
+
+ /* Asynchronously take the VCPU down and schedule reset work. */
+ hvm_vcpu_down(v);
+ tasklet_schedule(&vcpu_vlapic(v)->init_tasklet);
+ return X86EMUL_RETRY;
+}
+
+static int vlapic_accept_sipi(struct vcpu *v, int trampoline_vector)
+{
+ struct domain *d = current->domain;
+ struct vcpu_guest_context *ctxt;
+ struct segment_register reg;
+
+ /* If the VCPU is not on its way down we have nothing to do. */
+ if ( !test_bit(_VPF_down, &v->pause_flags) )
+ return X86EMUL_OKAY;
+
+ if ( !vlapic_vcpu_pause_async(v) )
+ return X86EMUL_RETRY;
+
+ domain_lock(d);
+
+ if ( v->is_initialised )
+ goto out;
+
+ ctxt = &v->arch.guest_context;
+ memset(ctxt, 0, sizeof(*ctxt));
+ ctxt->flags = VGCF_online;
+ ctxt->user_regs.eflags = 2;
+
+ v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET;
+ hvm_update_guest_cr(v, 0);
+
+ v->arch.hvm_vcpu.guest_cr[2] = 0;
+ hvm_update_guest_cr(v, 2);
+
+ v->arch.hvm_vcpu.guest_cr[3] = 0;
+ hvm_update_guest_cr(v, 3);
+
+ v->arch.hvm_vcpu.guest_cr[4] = 0;
+ hvm_update_guest_cr(v, 4);
+
+ v->arch.hvm_vcpu.guest_efer = 0;
+ hvm_update_guest_efer(v);
+
+ reg.sel = trampoline_vector << 8;
+ reg.base = (uint32_t)reg.sel << 4;
+ reg.limit = 0xffff;
+ reg.attr.bytes = 0x89b;
+ hvm_set_segment_register(v, x86_seg_cs, &reg);
+
+ reg.sel = reg.base = 0;
+ reg.limit = 0xffff;
+ reg.attr.bytes = 0x893;
+ hvm_set_segment_register(v, x86_seg_ds, &reg);
+ hvm_set_segment_register(v, x86_seg_es, &reg);
+ hvm_set_segment_register(v, x86_seg_fs, &reg);
+ hvm_set_segment_register(v, x86_seg_gs, &reg);
+ hvm_set_segment_register(v, x86_seg_ss, &reg);
+
+ reg.attr.bytes = 0x82; /* LDT */
+ hvm_set_segment_register(v, x86_seg_ldtr, &reg);
+
+ reg.attr.bytes = 0x8b; /* 32-bit TSS (busy) */
+ hvm_set_segment_register(v, x86_seg_tr, &reg);
+
+ reg.attr.bytes = 0;
+ hvm_set_segment_register(v, x86_seg_gdtr, &reg);
+ hvm_set_segment_register(v, x86_seg_idtr, &reg);
+
+ /* Sync AP's TSC with BSP's. */
+ v->arch.hvm_vcpu.cache_tsc_offset =
+ v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+
+ v->arch.flags |= TF_kernel_mode;
+ v->is_initialised = 1;
+ clear_bit(_VPF_down, &v->pause_flags);
+
+ out:
+ domain_unlock(d);
+ vcpu_unpause(v);
+ return X86EMUL_OKAY;
+}
+
/* Add a pending IRQ into lapic. */
static int vlapic_accept_irq(struct vcpu *v, int delivery_mode,
int vector, int level, int trig_mode)
{
- int result = 0;
struct vlapic *vlapic = vcpu_vlapic(v);
+ int rc = X86EMUL_OKAY;
switch ( delivery_mode )
{
@@ -270,8 +404,6 @@ static int vlapic_accept_irq(struct vcpu *v, int delivery_mode,
}
vcpu_kick(v);
-
- result = 1;
break;
case APIC_DM_REMRD:
@@ -291,43 +423,20 @@ static int vlapic_accept_irq(struct vcpu *v, int delivery_mode,
/* No work on INIT de-assert for P4-type APIC. */
if ( trig_mode && !(level & APIC_INT_ASSERT) )
break;
- /* FIXME How to check the situation after vcpu reset? */
- if ( v->is_initialised )
- hvm_vcpu_reset(v);
- v->arch.hvm_vcpu.init_sipi_sipi_state =
- HVM_VCPU_INIT_SIPI_SIPI_STATE_WAIT_SIPI;
- result = 1;
+ rc = vlapic_accept_init(v);
break;
case APIC_DM_STARTUP:
- if ( v->arch.hvm_vcpu.init_sipi_sipi_state ==
- HVM_VCPU_INIT_SIPI_SIPI_STATE_NORM )
- break;
-
- v->arch.hvm_vcpu.init_sipi_sipi_state =
- HVM_VCPU_INIT_SIPI_SIPI_STATE_NORM;
-
- if ( v->is_initialised )
- {
- gdprintk(XENLOG_ERR, "SIPI for initialized vcpu %x\n", v->vcpu_id);
- goto exit_and_crash;
- }
-
- if ( hvm_bringup_ap(v->vcpu_id, vector) != 0 )
- result = 0;
+ rc = vlapic_accept_sipi(v, vector);
break;
default:
gdprintk(XENLOG_ERR, "TODO: unsupported delivery mode %x\n",
delivery_mode);
- goto exit_and_crash;
+ domain_crash(v->domain);
}
- return result;
-
- exit_and_crash:
- domain_crash(v->domain);
- return 0;
+ return rc;
}
/* This function is used by both ioapic and lapic.The bitmap is for vcpu_id. */
@@ -369,11 +478,9 @@ void vlapic_EOI_set(struct vlapic *vlapic)
vioapic_update_EOI(vlapic_domain(vlapic), vector);
}
-static void vlapic_ipi(struct vlapic *vlapic)
+static int vlapic_ipi(
+ struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high)
{
- uint32_t icr_low = vlapic_get_reg(vlapic, APIC_ICR);
- uint32_t icr_high = vlapic_get_reg(vlapic, APIC_ICR2);
-
unsigned int dest = GET_APIC_DEST_FIELD(icr_high);
unsigned int short_hand = icr_low & APIC_SHORT_MASK;
unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG;
@@ -385,6 +492,7 @@ static void vlapic_ipi(struct vlapic *vlapic)
struct vlapic *target;
struct vcpu *v;
uint32_t lpr_map = 0;
+ int rc = X86EMUL_OKAY;
HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "icr_high 0x%x, icr_low 0x%x, "
"short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
@@ -399,18 +507,23 @@ static void vlapic_ipi(struct vlapic *vlapic)
if ( delivery_mode == APIC_DM_LOWEST )
__set_bit(v->vcpu_id, &lpr_map);
else
- vlapic_accept_irq(v, delivery_mode,
- vector, level, trig_mode);
+ rc = vlapic_accept_irq(v, delivery_mode,
+ vector, level, trig_mode);
}
+
+ if ( rc != X86EMUL_OKAY )
+ break;
}
if ( delivery_mode == APIC_DM_LOWEST )
{
target = apic_round_robin(vlapic_domain(v), vector, lpr_map);
if ( target != NULL )
- vlapic_accept_irq(vlapic_vcpu(target), delivery_mode,
- vector, level, trig_mode);
+ rc = vlapic_accept_irq(vlapic_vcpu(target), delivery_mode,
+ vector, level, trig_mode);
}
+
+ return rc;
}
static uint32_t vlapic_get_tmcct(struct vlapic *vlapic)
@@ -465,17 +578,18 @@ static void vlapic_read_aligned(
}
}
-static unsigned long vlapic_read(struct vcpu *v, unsigned long address,
- unsigned long len)
+static int vlapic_read(
+ struct vcpu *v, unsigned long address,
+ unsigned long len, unsigned long *pval)
{
unsigned int alignment;
unsigned int tmp;
- unsigned long result;
+ unsigned long result = 0;
struct vlapic *vlapic = vcpu_vlapic(v);
unsigned int offset = address - vlapic_base_address(vlapic);
if ( offset > (APIC_TDCR + 0x3) )
- return 0;
+ goto out;
alignment = offset & 0x3;
@@ -507,14 +621,16 @@ static unsigned long vlapic_read(struct vcpu *v, unsigned long address,
HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "offset 0x%x with length 0x%lx, "
"and the result is 0x%lx", offset, len, result);
- return result;
+ out:
+ *pval = result;
+ return X86EMUL_OKAY;
unaligned_exit_and_crash:
gdprintk(XENLOG_ERR, "Unaligned LAPIC read len=0x%lx at offset=0x%x.\n",
len, offset);
exit_and_crash:
domain_crash(v->domain);
- return 0;
+ return X86EMUL_OKAY;
}
void vlapic_pt_cb(struct vcpu *v, void *data)
@@ -522,11 +638,12 @@ void vlapic_pt_cb(struct vcpu *v, void *data)
*(s_time_t *)data = hvm_get_guest_time(v);
}
-static void vlapic_write(struct vcpu *v, unsigned long address,
- unsigned long len, unsigned long val)
+static int vlapic_write(struct vcpu *v, unsigned long address,
+ unsigned long len, unsigned long val)
{
struct vlapic *vlapic = vcpu_vlapic(v);
unsigned int offset = address - vlapic_base_address(vlapic);
+ int rc = X86EMUL_OKAY;
if ( offset != 0xb0 )
HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
@@ -540,13 +657,13 @@ static void vlapic_write(struct vcpu *v, unsigned long address,
val = (uint32_t)val;
if ( len != 4 )
{
- unsigned int tmp;
+ unsigned long tmp;
unsigned char alignment;
gdprintk(XENLOG_INFO, "Notice: Local APIC write with len = %lx\n",len);
alignment = offset & 0x3;
- tmp = vlapic_read(v, offset & ~0x3, 4);
+ (void)vlapic_read(v, offset & ~0x3, 4, &tmp);
switch ( len )
{
@@ -617,9 +734,10 @@ static void vlapic_write(struct vcpu *v, unsigned long address,
break;
case APIC_ICR:
- /* No delay here, so we always clear the pending bit*/
- vlapic_set_reg(vlapic, APIC_ICR, val & ~(1 << 12));
- vlapic_ipi(vlapic);
+ val &= ~(1 << 12); /* always clear the pending bit */
+ rc = vlapic_ipi(vlapic, val, vlapic_get_reg(vlapic, APIC_ICR2));
+ if ( rc == X86EMUL_OKAY )
+ vlapic_set_reg(vlapic, APIC_ICR, val);
break;
case APIC_ICR2:
@@ -669,13 +787,14 @@ static void vlapic_write(struct vcpu *v, unsigned long address,
break;
}
- return;
+ return rc;
unaligned_exit_and_crash:
gdprintk(XENLOG_ERR, "Unaligned LAPIC write len=0x%lx at offset=0x%x.\n",
len, offset);
exit_and_crash:
domain_crash(v->domain);
+ return rc;
}
static int vlapic_range(struct vcpu *v, unsigned long addr)
@@ -788,77 +907,58 @@ void vlapic_reset(struct vlapic *vlapic)
vlapic_set_reg(vlapic, APIC_SPIV, 0xff);
vlapic->hw.disabled |= VLAPIC_SW_DISABLED;
-}
-#ifdef HVM_DEBUG_SUSPEND
-static void lapic_info(struct vlapic *s)
-{
- printk("*****lapic state:*****\n");
- printk("lapic 0x%"PRIx64".\n", s->hw.apic_base_msr);
- printk("lapic 0x%x.\n", s->hw.disabled);
- printk("lapic 0x%x.\n", s->hw.timer_divisor);
-}
-#else
-static void lapic_info(struct vlapic *s)
-{
+ destroy_periodic_time(&vlapic->pt);
}
-#endif
/* rearm the actimer if needed, after a HVM restore */
static void lapic_rearm(struct vlapic *s)
{
- unsigned long tmict;
+ unsigned long tmict = vlapic_get_reg(s, APIC_TMICT);
+ uint64_t period;
- tmict = vlapic_get_reg(s, APIC_TMICT);
- if ( tmict > 0 )
- {
- uint64_t period = (uint64_t)APIC_BUS_CYCLE_NS *
- (uint32_t)tmict * s->hw.timer_divisor;
- uint32_t lvtt = vlapic_get_reg(s, APIC_LVTT);
-
- s->pt.irq = lvtt & APIC_VECTOR_MASK;
- create_periodic_time(vlapic_vcpu(s), &s->pt, period, s->pt.irq,
- !vlapic_lvtt_period(s), vlapic_pt_cb,
- &s->timer_last_update);
- s->timer_last_update = s->pt.last_plt_gtime;
-
- printk("lapic_load to rearm the actimer:"
- "bus cycle is %uns, "
- "saved tmict count %lu, period %"PRIu64"ns, irq=%"PRIu8"\n",
- APIC_BUS_CYCLE_NS, tmict, period, s->pt.irq);
- }
+ if ( (tmict = vlapic_get_reg(s, APIC_TMICT)) == 0 )
+ return;
- lapic_info(s);
+ period = ((uint64_t)APIC_BUS_CYCLE_NS *
+ (uint32_t)tmict * s->hw.timer_divisor);
+ s->pt.irq = vlapic_get_reg(s, APIC_LVTT) & APIC_VECTOR_MASK;
+ create_periodic_time(vlapic_vcpu(s), &s->pt, period, s->pt.irq,
+ !vlapic_lvtt_period(s), vlapic_pt_cb,
+ &s->timer_last_update);
+ s->timer_last_update = s->pt.last_plt_gtime;
}
static int lapic_save_hidden(struct domain *d, hvm_domain_context_t *h)
{
struct vcpu *v;
struct vlapic *s;
+ int rc = 0;
- for_each_vcpu(d, v)
+ for_each_vcpu ( d, v )
{
s = vcpu_vlapic(v);
- lapic_info(s);
-
- if ( hvm_save_entry(LAPIC, v->vcpu_id, h, &s->hw) != 0 )
- return 1;
+ if ( (rc = hvm_save_entry(LAPIC, v->vcpu_id, h, &s->hw)) != 0 )
+ break;
}
- return 0;
+
+ return rc;
}
static int lapic_save_regs(struct domain *d, hvm_domain_context_t *h)
{
struct vcpu *v;
struct vlapic *s;
+ int rc = 0;
- for_each_vcpu(d, v)
+ for_each_vcpu ( d, v )
{
s = vcpu_vlapic(v);
- if ( hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs) != 0 )
- return 1;
+ if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 )
+ break;
}
- return 0;
+
+ return rc;
}
static int lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
@@ -879,8 +979,6 @@ static int lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
if ( hvm_load_entry(LAPIC, h, &s->hw) != 0 )
return -EINVAL;
- lapic_info(s);
-
vmx_vlapic_msr_changed(v);
return 0;
@@ -916,7 +1014,7 @@ HVM_REGISTER_SAVE_RESTORE(LAPIC_REGS, lapic_save_regs, lapic_load_regs,
int vlapic_init(struct vcpu *v)
{
struct vlapic *vlapic = vcpu_vlapic(v);
- unsigned int memflags = 0;
+ unsigned int memflags = MEMF_node(vcpu_to_node(v));
HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "%d", v->vcpu_id);
@@ -925,10 +1023,10 @@ int vlapic_init(struct vcpu *v)
#ifdef __i386__
/* 32-bit VMX may be limited to 32-bit physical addresses. */
if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
- memflags = MEMF_bits(32);
+ memflags |= MEMF_bits(32);
#endif
- vlapic->regs_page = alloc_domheap_pages(NULL, 0, memflags);
+ vlapic->regs_page = alloc_domheap_page(NULL, memflags);
if ( vlapic->regs_page == NULL )
{
dprintk(XENLOG_ERR, "alloc vlapic regs error: %d/%d\n",
@@ -941,7 +1039,7 @@ int vlapic_init(struct vcpu *v)
{
dprintk(XENLOG_ERR, "map vlapic regs error: %d/%d\n",
v->domain->domain_id, v->vcpu_id);
- return -ENOMEM;
+ return -ENOMEM;
}
clear_page(vlapic->regs);
@@ -953,6 +1051,8 @@ int vlapic_init(struct vcpu *v)
if ( v->vcpu_id == 0 )
vlapic->hw.apic_base_msr |= MSR_IA32_APICBASE_BSP;
+ tasklet_init(&vlapic->init_tasklet, vlapic_init_action, (unsigned long)v);
+
return 0;
}
@@ -960,6 +1060,7 @@ void vlapic_destroy(struct vcpu *v)
{
struct vlapic *vlapic = vcpu_vlapic(v);
+ tasklet_kill(&vlapic->init_tasklet);
destroy_periodic_time(&vlapic->pt);
unmap_domain_page_global(vlapic->regs);
free_domheap_page(vlapic->regs_page);
diff --git a/xen/arch/x86/hvm/vmx/realmode.c b/xen/arch/x86/hvm/vmx/realmode.c
index c00e8b1e42..5d13f4e60b 100644
--- a/xen/arch/x86/hvm/vmx/realmode.c
+++ b/xen/arch/x86/hvm/vmx/realmode.c
@@ -172,7 +172,7 @@ static void realmode_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt)
hvmemul_ctxt->insn_buf[0], hvmemul_ctxt->insn_buf[1],
hvmemul_ctxt->insn_buf[2], hvmemul_ctxt->insn_buf[3],
hvmemul_ctxt->insn_buf[4], hvmemul_ctxt->insn_buf[5]);
- domain_crash_synchronous();
+ domain_crash(curr->domain);
}
void vmx_realmode(struct cpu_user_regs *regs)
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index bee9eb1deb..48506c5b32 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -38,6 +38,9 @@
#include <asm/shadow.h>
#include <asm/tboot.h>
+static int opt_vpid_enabled = 1;
+boolean_param("vpid", opt_vpid_enabled);
+
/* Dynamic (run-time adjusted) execution control flags. */
u32 vmx_pin_based_exec_control __read_mostly;
u32 vmx_cpu_based_exec_control __read_mostly;
@@ -84,14 +87,16 @@ static void vmx_init_vmcs_config(void)
min = (CPU_BASED_HLT_EXITING |
CPU_BASED_INVLPG_EXITING |
+ CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING |
CPU_BASED_MONITOR_EXITING |
CPU_BASED_MWAIT_EXITING |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_ACTIVATE_IO_BITMAP |
CPU_BASED_USE_TSC_OFFSETING);
- opt = CPU_BASED_ACTIVATE_MSR_BITMAP;
- opt |= CPU_BASED_TPR_SHADOW;
- opt |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+ opt = (CPU_BASED_ACTIVATE_MSR_BITMAP |
+ CPU_BASED_TPR_SHADOW |
+ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
_vmx_cpu_based_exec_control = adjust_vmx_controls(
min, opt, MSR_IA32_VMX_PROCBASED_CTLS);
#ifdef __x86_64__
@@ -107,11 +112,25 @@ static void vmx_init_vmcs_config(void)
{
min = 0;
opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
- SECONDARY_EXEC_WBINVD_EXITING);
+ SECONDARY_EXEC_WBINVD_EXITING |
+ SECONDARY_EXEC_ENABLE_EPT);
+ if ( opt_vpid_enabled )
+ opt |= SECONDARY_EXEC_ENABLE_VPID;
_vmx_secondary_exec_control = adjust_vmx_controls(
min, opt, MSR_IA32_VMX_PROCBASED_CTLS2);
}
+ if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT )
+ {
+ /* To use EPT we expect to be able to clear certain intercepts. */
+ uint32_t must_be_one, must_be_zero;
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, must_be_one, must_be_zero);
+ if ( must_be_one & (CPU_BASED_INVLPG_EXITING |
+ CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING) )
+ _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+ }
+
#if defined(__i386__)
/* If we can't virtualise APIC accesses, the TPR shadow is pointless. */
if ( !(_vmx_secondary_exec_control &
@@ -301,6 +320,10 @@ int vmx_cpu_up(void)
return 0;
}
+ ept_sync_all();
+
+ vpid_sync_all();
+
return 1;
}
@@ -439,6 +462,7 @@ void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr)
static int construct_vmcs(struct vcpu *v)
{
+ struct domain *d = v->domain;
uint16_t sysenter_cs;
unsigned long sysenter_eip;
@@ -448,10 +472,25 @@ static int construct_vmcs(struct vcpu *v)
__vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
__vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
__vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
- __vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmx_cpu_based_exec_control);
+
v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
- if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
- __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control);
+ v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control;
+
+ if ( paging_mode_hap(d) )
+ {
+ v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING |
+ CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING);
+ }
+ else
+ {
+ v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+ }
+
+ __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+ if ( cpu_has_vmx_secondary_exec_control )
+ __vmwrite(SECONDARY_VM_EXEC_CONTROL,
+ v->arch.hvm_vmx.secondary_exec_control);
/* MSR access bitmap. */
if ( cpu_has_vmx_msr_bitmap )
@@ -570,9 +609,10 @@ static int construct_vmcs(struct vcpu *v)
__vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
#endif
- __vmwrite(EXCEPTION_BITMAP, (HVM_TRAP_MASK |
- (1U << TRAP_page_fault) |
- (1U << TRAP_no_device)));
+ __vmwrite(EXCEPTION_BITMAP,
+ HVM_TRAP_MASK
+ | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault))
+ | (1U << TRAP_no_device));
v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
hvm_update_guest_cr(v, 0);
@@ -587,6 +627,22 @@ static int construct_vmcs(struct vcpu *v)
__vmwrite(TPR_THRESHOLD, 0);
}
+ if ( paging_mode_hap(d) )
+ {
+ __vmwrite(EPT_POINTER, d->arch.hvm_domain.vmx.ept_control.eptp);
+#ifdef CONFIG_X86_PAE
+ __vmwrite(EPT_POINTER_HIGH,
+ d->arch.hvm_domain.vmx.ept_control.eptp >> 32);
+#endif
+ }
+
+ if ( cpu_has_vmx_vpid )
+ {
+ v->arch.hvm_vmx.vpid =
+ v->domain->arch.hvm_domain.vmx.vpid_base + v->vcpu_id;
+ __vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vmx.vpid);
+ }
+
vmx_vmcs_exit(v);
paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */
@@ -729,14 +785,14 @@ void vmx_destroy_vmcs(struct vcpu *v)
arch_vmx->vmcs = NULL;
}
-void vm_launch_fail(unsigned long eflags)
+void vm_launch_fail(void)
{
unsigned long error = __vmread(VM_INSTRUCTION_ERROR);
printk("<vm_launch_fail> error code %lx\n", error);
domain_crash_synchronous();
}
-void vm_resume_fail(unsigned long eflags)
+void vm_resume_fail(void)
{
unsigned long error = __vmread(VM_INSTRUCTION_ERROR);
printk("<vm_resume_fail> error code %lx\n", error);
@@ -780,6 +836,7 @@ void vmx_do_resume(struct vcpu *v)
vmx_load_vmcs(v);
hvm_migrate_timers(v);
vmx_set_host_env(v);
+ vpid_sync_vcpu_all(v);
}
debug_state = v->domain->debugger_attached;
@@ -932,6 +989,10 @@ void vmcs_dump_vcpu(struct vcpu *v)
(uint32_t)vmr(IDT_VECTORING_ERROR_CODE));
printk("TPR Threshold = 0x%02x\n",
(uint32_t)vmr(TPR_THRESHOLD));
+ printk("EPT pointer = 0x%08x%08x\n",
+ (uint32_t)vmr(EPT_POINTER_HIGH), (uint32_t)vmr(EPT_POINTER));
+ printk("Virtual processor ID = 0x%04x\n",
+ (uint32_t)vmr(VIRTUAL_PROCESSOR_ID));
vmx_vmcs_exit(v);
}
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 29dcb68503..628cbddfcf 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -57,6 +57,8 @@ static void vmx_ctxt_switch_to(struct vcpu *v);
static int vmx_alloc_vlapic_mapping(struct domain *d);
static void vmx_free_vlapic_mapping(struct domain *d);
+static int vmx_alloc_vpid(struct domain *d);
+static void vmx_free_vpid(struct domain *d);
static void vmx_install_vlapic_mapping(struct vcpu *v);
static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr);
static void vmx_update_guest_efer(struct vcpu *v);
@@ -71,12 +73,30 @@ static void vmx_invlpg_intercept(unsigned long vaddr);
static int vmx_domain_initialise(struct domain *d)
{
- return vmx_alloc_vlapic_mapping(d);
+ int rc;
+
+ d->arch.hvm_domain.vmx.ept_control.etmt = EPT_DEFAULT_MT;
+ d->arch.hvm_domain.vmx.ept_control.gaw = EPT_DEFAULT_GAW;
+ d->arch.hvm_domain.vmx.ept_control.asr =
+ pagetable_get_pfn(d->arch.phys_table);
+
+ if ( (rc = vmx_alloc_vpid(d)) != 0 )
+ return rc;
+
+ if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 )
+ {
+ vmx_free_vpid(d);
+ return rc;
+ }
+
+ return 0;
}
static void vmx_domain_destroy(struct domain *d)
{
+ ept_sync_domain(d);
vmx_free_vlapic_mapping(d);
+ vmx_free_vpid(d);
}
static int vmx_vcpu_initialise(struct vcpu *v)
@@ -492,20 +512,23 @@ static int vmx_restore_cr0_cr3(
unsigned long mfn = 0;
p2m_type_t p2mt;
- if ( cr0 & X86_CR0_PG )
+ if ( paging_mode_shadow(v->domain) )
{
- mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt));
- if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
+ if ( cr0 & X86_CR0_PG )
{
- gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%lx\n", cr3);
- return -EINVAL;
+ mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt));
+ if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
+ {
+ gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%lx\n", cr3);
+ return -EINVAL;
+ }
}
- }
- if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
- put_page(pagetable_get_page(v->arch.guest_table));
+ if ( hvm_paging_enabled(v) )
+ put_page(pagetable_get_page(v->arch.guest_table));
- v->arch.guest_table = pagetable_from_pfn(mfn);
+ v->arch.guest_table = pagetable_from_pfn(mfn);
+ }
v->arch.hvm_vcpu.guest_cr[0] = cr0 | X86_CR0_ET;
v->arch.hvm_vcpu.guest_cr[3] = cr3;
@@ -538,11 +561,6 @@ static int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
vmx_update_guest_cr(v, 2);
vmx_update_guest_cr(v, 4);
-#ifdef HVM_DEBUG_SUSPEND
- printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
- __func__, c->cr3, c->cr0, c->cr4);
-#endif
-
v->arch.hvm_vcpu.guest_efer = c->msr_efer;
vmx_update_guest_efer(v);
@@ -573,20 +591,6 @@ static int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
return 0;
}
-#if defined(__x86_64__) && defined(HVM_DEBUG_SUSPEND)
-static void dump_msr_state(struct vmx_msr_state *m)
-{
- int i = 0;
- printk("**** msr state ****\n");
- printk("shadow_gs=0x%lx, flags=0x%lx, msr_items:", m->shadow_gs, m->flags);
- for ( i = 0; i < VMX_MSR_COUNT; i++ )
- printk("0x%lx,", m->msrs[i]);
- printk("\n");
-}
-#else
-#define dump_msr_state(m) ((void)0)
-#endif
-
static void vmx_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
{
#ifdef __x86_64__
@@ -604,8 +608,6 @@ static void vmx_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
#endif
data->tsc = hvm_get_guest_time(v);
-
- dump_msr_state(guest_state);
}
static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
@@ -624,8 +626,6 @@ static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
#endif
hvm_set_guest_time(v, data->tsc);
-
- dump_msr_state(guest_state);
}
@@ -900,6 +900,56 @@ static void vmx_set_interrupt_shadow(struct vcpu *v, unsigned int intr_shadow)
__vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
}
+static void vmx_load_pdptrs(struct vcpu *v)
+{
+ unsigned long cr3 = v->arch.hvm_vcpu.guest_cr[3], mfn;
+ uint64_t *guest_pdptrs;
+ p2m_type_t p2mt;
+ char *p;
+
+ /* EPT needs to load PDPTRS into VMCS for PAE. */
+ if ( !hvm_pae_enabled(v) || (v->arch.hvm_vcpu.guest_efer & EFER_LMA) )
+ return;
+
+ if ( cr3 & 0x1fUL )
+ goto crash;
+
+ mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt));
+ if ( !p2m_is_ram(p2mt) )
+ goto crash;
+
+ p = map_domain_page(mfn);
+
+ guest_pdptrs = (uint64_t *)(p + (cr3 & ~PAGE_MASK));
+
+ /*
+ * We do not check the PDPTRs for validity. The CPU will do this during
+ * vm entry, and we can handle the failure there and crash the guest.
+ * The only thing we could do better here is #GP instead.
+ */
+
+ vmx_vmcs_enter(v);
+
+ __vmwrite(GUEST_PDPTR0, guest_pdptrs[0]);
+ __vmwrite(GUEST_PDPTR1, guest_pdptrs[1]);
+ __vmwrite(GUEST_PDPTR2, guest_pdptrs[2]);
+ __vmwrite(GUEST_PDPTR3, guest_pdptrs[3]);
+#ifdef CONFIG_X86_PAE
+ __vmwrite(GUEST_PDPTR0_HIGH, guest_pdptrs[0] >> 32);
+ __vmwrite(GUEST_PDPTR1_HIGH, guest_pdptrs[1] >> 32);
+ __vmwrite(GUEST_PDPTR2_HIGH, guest_pdptrs[2] >> 32);
+ __vmwrite(GUEST_PDPTR3_HIGH, guest_pdptrs[3] >> 32);
+#endif
+
+ vmx_vmcs_exit(v);
+
+ unmap_domain_page(p);
+ return;
+
+ crash:
+ domain_crash(v->domain);
+}
+
static void vmx_update_host_cr3(struct vcpu *v)
{
vmx_vmcs_enter(v);
@@ -915,7 +965,24 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
{
case 0: {
unsigned long hw_cr0_mask =
- X86_CR0_NE | X86_CR0_PG | X86_CR0_WP | X86_CR0_PE;
+ X86_CR0_NE | X86_CR0_PG | X86_CR0_PE;
+
+ if ( paging_mode_shadow(v->domain) )
+ hw_cr0_mask |= X86_CR0_WP;
+
+ if ( paging_mode_hap(v->domain) )
+ {
+ /* We manage GUEST_CR3 when guest CR0.PE is zero. */
+ uint32_t cr3_ctls = (CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING);
+ v->arch.hvm_vmx.exec_control &= ~cr3_ctls;
+ if ( !hvm_paging_enabled(v) )
+ v->arch.hvm_vmx.exec_control |= cr3_ctls;
+ __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+
+ /* Changing CR0.PE can change some bits in real CR4. */
+ vmx_update_guest_cr(v, 4);
+ }
if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
{
@@ -939,11 +1006,27 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
/* CR2 is updated in exit stub. */
break;
case 3:
+ if ( paging_mode_hap(v->domain) )
+ {
+ if ( !hvm_paging_enabled(v) )
+ v->arch.hvm_vcpu.hw_cr[3] =
+ v->domain->arch.hvm_domain.params[HVM_PARAM_IDENT_PT];
+ vmx_load_pdptrs(v);
+ }
+
__vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]);
+ vpid_sync_vcpu_all(v);
break;
case 4:
- v->arch.hvm_vcpu.hw_cr[4] =
- v->arch.hvm_vcpu.guest_cr[4] | HVM_CR4_HOST_MASK;
+ v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK;
+ if ( paging_mode_hap(v->domain) )
+ v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;
+ v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4];
+ if ( paging_mode_hap(v->domain) && !hvm_paging_enabled(v) )
+ {
+ v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_PSE;
+ v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;
+ }
__vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]);
break;
@@ -978,12 +1061,29 @@ static void vmx_update_guest_efer(struct vcpu *v)
static void vmx_flush_guest_tlbs(void)
{
- /* No tagged TLB support on VMX yet. The fact that we're in Xen
- * at all means any guest will have a clean TLB when it's next run,
- * because VMRESUME will flush it for us. */
+ /*
+ * If VPID (i.e. tagged TLB support) is not enabled, the fact that
+ * we're in Xen at all means any guest will have a clean TLB when
+ * it's next run, because VMRESUME will flush it for us.
+ *
+ * If enabled, we invalidate all translations associated with all
+ * VPID values.
+ */
+ vpid_sync_all();
}
+static void __ept_sync_domain(void *info)
+{
+ struct domain *d = info;
+ __invept(1, d->arch.hvm_domain.vmx.ept_control.eptp, 0);
+}
+void ept_sync_domain(struct domain *d)
+{
+ /* Only if using EPT and this domain has some VCPUs to dirty. */
+ if ( d->arch.hvm_domain.hap_enabled && d->vcpu[0] )
+ on_each_cpu(__ept_sync_domain, d, 1, 1);
+}
static void __vmx_inject_exception(
struct vcpu *v, int trap, int type, int error_code)
@@ -1100,6 +1200,9 @@ static struct hvm_function_table vmx_function_table = {
.invlpg_intercept = vmx_invlpg_intercept
};
+static unsigned long *vpid_bitmap;
+#define VPID_BITMAP_SIZE ((1u << VMCS_VPID_WIDTH) / MAX_VIRT_CPUS)
+
void start_vmx(void)
{
static int bootstrapped;
@@ -1133,6 +1236,25 @@ void start_vmx(void)
return;
}
+ if ( cpu_has_vmx_ept )
+ {
+ printk("VMX: EPT is available.\n");
+ vmx_function_table.hap_supported = 1;
+ }
+
+ if ( cpu_has_vmx_vpid )
+ {
+ printk("VMX: VPID is available.\n");
+
+ vpid_bitmap = xmalloc_array(
+ unsigned long, BITS_TO_LONGS(VPID_BITMAP_SIZE));
+ BUG_ON(vpid_bitmap == NULL);
+ memset(vpid_bitmap, 0, BITS_TO_LONGS(VPID_BITMAP_SIZE) * sizeof(long));
+
+ /* VPID 0 is used by VMX root mode (the hypervisor). */
+ __set_bit(0, vpid_bitmap);
+ }
+
setup_vmcs_dump();
hvm_enable(&vmx_function_table);
@@ -1635,18 +1757,47 @@ static int vmx_alloc_vlapic_mapping(struct domain *d)
share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
set_mmio_p2m_entry(
d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), _mfn(virt_to_mfn(apic_va)));
- d->arch.hvm_domain.vmx_apic_access_mfn = virt_to_mfn(apic_va);
+ d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
return 0;
}
static void vmx_free_vlapic_mapping(struct domain *d)
{
- unsigned long mfn = d->arch.hvm_domain.vmx_apic_access_mfn;
+ unsigned long mfn = d->arch.hvm_domain.vmx.apic_access_mfn;
if ( mfn != 0 )
free_xenheap_page(mfn_to_virt(mfn));
}
+static int vmx_alloc_vpid(struct domain *d)
+{
+ int idx;
+
+ if ( !cpu_has_vmx_vpid )
+ return 0;
+
+ do {
+ idx = find_first_zero_bit(vpid_bitmap, VPID_BITMAP_SIZE);
+ if ( idx >= VPID_BITMAP_SIZE )
+ {
+ dprintk(XENLOG_WARNING, "VMX VPID space exhausted.\n");
+ return -EBUSY;
+ }
+ }
+ while ( test_and_set_bit(idx, vpid_bitmap) );
+
+ d->arch.hvm_domain.vmx.vpid_base = idx * MAX_VIRT_CPUS;
+ return 0;
+}
+
+static void vmx_free_vpid(struct domain *d)
+{
+ if ( !cpu_has_vmx_vpid )
+ return;
+
+ clear_bit(d->arch.hvm_domain.vmx.vpid_base / MAX_VIRT_CPUS, vpid_bitmap);
+}
+
static void vmx_install_vlapic_mapping(struct vcpu *v)
{
paddr_t virt_page_ma, apic_page_ma;
@@ -1655,7 +1806,7 @@ static void vmx_install_vlapic_mapping(struct vcpu *v)
return;
virt_page_ma = page_to_maddr(vcpu_vlapic(v)->regs_page);
- apic_page_ma = v->domain->arch.hvm_domain.vmx_apic_access_mfn;
+ apic_page_ma = v->domain->arch.hvm_domain.vmx.apic_access_mfn;
apic_page_ma <<= PAGE_SHIFT;
vmx_vmcs_enter(v);
@@ -1900,6 +2051,51 @@ static void vmx_wbinvd_intercept(void)
wbinvd();
}
+static void ept_handle_violation(unsigned long qualification, paddr_t gpa)
+{
+ unsigned long gla_validity = qualification & EPT_GLA_VALIDITY_MASK;
+ struct domain *d = current->domain;
+ unsigned long gfn = gpa >> PAGE_SHIFT;
+ mfn_t mfn;
+ p2m_type_t t;
+
+ if ( unlikely(qualification & EPT_GAW_VIOLATION) )
+ {
+ gdprintk(XENLOG_ERR, "EPT violation: guest physical address %"PRIpaddr
+ " exceeded its width limit.\n", gpa);
+ goto crash;
+ }
+
+ if ( unlikely(gla_validity == EPT_GLA_VALIDITY_RSVD) ||
+ unlikely(gla_validity == EPT_GLA_VALIDITY_PDPTR_LOAD) )
+ {
+ gdprintk(XENLOG_ERR, "EPT violation: reserved bit or "
+ "pdptr load violation.\n");
+ goto crash;
+ }
+
+ mfn = gfn_to_mfn(d, gfn, &t);
+ if ( p2m_is_ram(t) && paging_mode_log_dirty(d) )
+ {
+ paging_mark_dirty(d, mfn_x(mfn));
+ p2m_change_type(d, gfn, p2m_ram_logdirty, p2m_ram_rw);
+ flush_tlb_mask(d->domain_dirty_cpumask);
+ return;
+ }
+
+ /* This can only happen in log-dirty mode, writing back A/D bits. */
+ if ( unlikely(gla_validity == EPT_GLA_VALIDITY_GPT_WALK) )
+ goto crash;
+
+ ASSERT(gla_validity == EPT_GLA_VALIDITY_MATCH);
+ handle_mmio();
+
+ return;
+
+ crash:
+ domain_crash(d);
+}
+
static void vmx_failed_vmentry(unsigned int exit_reason,
struct cpu_user_regs *regs)
{
@@ -1939,6 +2135,10 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
unsigned long exit_qualification, inst_len = 0;
struct vcpu *v = current;
+ if ( paging_mode_hap(v->domain) && hvm_paging_enabled(v) )
+ v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] =
+ __vmread(GUEST_CR3);
+
exit_reason = __vmread(VM_EXIT_REASON);
hvmtrace_vmexit(v, regs->eip, exit_reason);
@@ -2171,6 +2371,17 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
break;
}
+ case EXIT_REASON_EPT_VIOLATION:
+ {
+ paddr_t gpa = __vmread(GUEST_PHYSICAL_ADDRESS);
+#ifdef CONFIG_X86_PAE
+ gpa |= (paddr_t)__vmread(GUEST_PHYSICAL_ADDRESS_HIGH) << 32;
+#endif
+ exit_qualification = __vmread(EXIT_QUALIFICATION);
+ ept_handle_violation(exit_qualification, gpa);
+ break;
+ }
+
default:
exit_and_crash:
gdprintk(XENLOG_ERR, "Bad vmexit (reason %x)\n", exit_reason);
diff --git a/xen/arch/x86/hvm/vmx/x86_32/exits.S b/xen/arch/x86/hvm/vmx/x86_32/exits.S
index 11db8cfc21..eff089a112 100644
--- a/xen/arch/x86/hvm/vmx/x86_32/exits.S
+++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S
@@ -129,7 +129,6 @@ ENTRY(vmx_asm_do_vmentry)
/*vmx_resume:*/
HVM_RESTORE_ALL_NOSEGREGS
VMRESUME
- pushf
call vm_resume_fail
ud2
@@ -137,7 +136,6 @@ vmx_launch:
movb $1,VCPU_vmx_launched(%ebx)
HVM_RESTORE_ALL_NOSEGREGS
VMLAUNCH
- pushf
call vm_launch_fail
ud2
diff --git a/xen/arch/x86/hvm/vmx/x86_64/exits.S b/xen/arch/x86/hvm/vmx/x86_64/exits.S
index 48da4869bd..56fdb8ad54 100644
--- a/xen/arch/x86/hvm/vmx/x86_64/exits.S
+++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S
@@ -148,7 +148,6 @@ ENTRY(vmx_asm_do_vmentry)
/*vmx_resume:*/
HVM_RESTORE_ALL_NOSEGREGS
VMRESUME
- pushfq
call vm_resume_fail
ud2
@@ -156,7 +155,6 @@ vmx_launch:
movb $1,VCPU_vmx_launched(%rbx)
HVM_RESTORE_ALL_NOSEGREGS
VMLAUNCH
- pushfq
call vm_launch_fail
ud2
diff --git a/xen/arch/x86/hvm/vpic.c b/xen/arch/x86/hvm/vpic.c
index ce3943eaab..a3d6f2d9ca 100644
--- a/xen/arch/x86/hvm/vpic.c
+++ b/xen/arch/x86/hvm/vpic.c
@@ -319,7 +319,7 @@ static int vpic_intercept_pic_io(
if ( bytes != 1 )
{
gdprintk(XENLOG_WARNING, "PIC_IO bad access size %d\n", bytes);
- return 1;
+ return X86EMUL_OKAY;
}
vpic = &current->domain->arch.hvm_domain.vpic[port >> 7];
@@ -329,7 +329,7 @@ static int vpic_intercept_pic_io(
else
*val = (uint8_t)vpic_ioport_read(vpic, port);
- return 1;
+ return X86EMUL_OKAY;
}
static int vpic_intercept_elcr_io(
@@ -338,11 +338,7 @@ static int vpic_intercept_elcr_io(
struct hvm_hw_vpic *vpic;
uint32_t data;
- if ( bytes != 1 )
- {
- gdprintk(XENLOG_WARNING, "PIC_IO bad access size %d\n", bytes);
- return 1;
- }
+ BUG_ON(bytes != 1);
vpic = &current->domain->arch.hvm_domain.vpic[port & 1];
@@ -360,34 +356,8 @@ static int vpic_intercept_elcr_io(
*val = vpic->elcr & vpic_elcr_mask(vpic);
}
- return 1;
-}
-
-#ifdef HVM_DEBUG_SUSPEND
-static void vpic_info(struct hvm_hw_vpic *s)
-{
- printk("*****pic state:*****\n");
- printk("pic 0x%x.\n", s->irr);
- printk("pic 0x%x.\n", s->imr);
- printk("pic 0x%x.\n", s->isr);
- printk("pic 0x%x.\n", s->irq_base);
- printk("pic 0x%x.\n", s->init_state);
- printk("pic 0x%x.\n", s->priority_add);
- printk("pic 0x%x.\n", s->readsel_isr);
- printk("pic 0x%x.\n", s->poll);
- printk("pic 0x%x.\n", s->auto_eoi);
- printk("pic 0x%x.\n", s->rotate_on_auto_eoi);
- printk("pic 0x%x.\n", s->special_fully_nested_mode);
- printk("pic 0x%x.\n", s->special_mask_mode);
- printk("pic 0x%x.\n", s->elcr);
- printk("pic 0x%x.\n", s->int_output);
- printk("pic 0x%x.\n", s->is_master);
-}
-#else
-static void vpic_info(struct hvm_hw_vpic *s)
-{
+ return X86EMUL_OKAY;
}
-#endif
static int vpic_save(struct domain *d, hvm_domain_context_t *h)
{
@@ -398,7 +368,6 @@ static int vpic_save(struct domain *d, hvm_domain_context_t *h)
for ( i = 0; i < 2 ; i++ )
{
s = &d->arch.hvm_domain.vpic[i];
- vpic_info(s);
if ( hvm_save_entry(PIC, i, h, s) )
return 1;
}
@@ -421,7 +390,6 @@ static int vpic_load(struct domain *d, hvm_domain_context_t *h)
if ( hvm_load_entry(PIC, h, s) != 0 )
return -EINVAL;
- vpic_info(s);
return 0;
}
diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
index 9ccbefd22a..b7e50ae8f1 100644
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -1244,7 +1244,11 @@ static void __init setup_ioapic_ids_from_mpc(void) { }
*/
static int __init timer_irq_works(void)
{
- unsigned long t1 = jiffies;
+ extern unsigned long pit0_ticks;
+ unsigned long t1;
+
+ t1 = pit0_ticks;
+ mb();
local_irq_enable();
/* Let ten ticks pass... */
@@ -1257,7 +1261,8 @@ static int __init timer_irq_works(void)
* might have cached one ExtINT interrupt. Finally, at
* least one tick may be lost due to delays.
*/
- if (jiffies - t1 > 4)
+ mb();
+ if (pit0_ticks - t1 > 4)
return 1;
return 0;
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index a1220af3b3..15f2cf57eb 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -299,7 +299,7 @@ int memory_is_conventional_ram(paddr_t p)
unsigned long domain_get_maximum_gpfn(struct domain *d)
{
if ( is_hvm_domain(d) )
- return d->arch.p2m.max_mapped_pfn;
+ return d->arch.p2m->max_mapped_pfn;
/* NB. PV guests specify nr_pfns rather than max_pfn so we adjust here. */
return arch_get_max_pfn(d) - 1;
}
@@ -476,7 +476,7 @@ static void invalidate_shadow_ldt(struct vcpu *v)
if ( pfn == 0 ) continue;
l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
page = mfn_to_page(pfn);
- ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
+ ASSERT_PAGE_IS_TYPE(page, PGT_seg_desc_page);
ASSERT_PAGE_IS_DOMAIN(page, v->domain);
put_page_and_type(page);
}
@@ -530,7 +530,7 @@ int map_ldt_shadow_page(unsigned int off)
if ( unlikely(!mfn_valid(mfn)) )
return 0;
- okay = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
+ okay = get_page_and_type(mfn_to_page(mfn), d, PGT_seg_desc_page);
if ( unlikely(!okay) )
return 0;
@@ -924,7 +924,7 @@ void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
{
/* We expect this is rare so we blow the entire shadow LDT. */
if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) ==
- PGT_ldt_page)) &&
+ PGT_seg_desc_page)) &&
unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) &&
(d == e) )
{
@@ -1748,8 +1748,7 @@ static int alloc_page_type(struct page_info *page, unsigned long type)
return alloc_l3_table(page);
case PGT_l4_page_table:
return alloc_l4_table(page);
- case PGT_gdt_page:
- case PGT_ldt_page:
+ case PGT_seg_desc_page:
return alloc_segdesc_page(page);
default:
printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n",
@@ -2189,7 +2188,7 @@ int do_mmuext_op(
goto out;
}
- LOCK_BIGLOCK(d);
+ domain_lock(d);
for ( i = 0; i < count; i++ )
{
@@ -2438,7 +2437,7 @@ int do_mmuext_op(
process_deferred_ops();
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
perfc_add(num_mmuext_ops, i);
@@ -2493,7 +2492,7 @@ int do_mmu_update(
domain_mmap_cache_init(&mapcache);
- LOCK_BIGLOCK(d);
+ domain_lock(d);
for ( i = 0; i < count; i++ )
{
@@ -2665,7 +2664,7 @@ int do_mmu_update(
process_deferred_ops();
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
domain_mmap_cache_destroy(&mapcache);
@@ -2694,7 +2693,7 @@ static int create_grant_pte_mapping(
l1_pgentry_t ol1e;
struct domain *d = v->domain;
- ASSERT(spin_is_locked(&d->big_lock));
+ ASSERT(domain_is_locked(d));
adjust_guest_l1e(nl1e, d);
@@ -2817,7 +2816,7 @@ static int create_grant_va_mapping(
unsigned long gl1mfn;
int okay;
- ASSERT(spin_is_locked(&d->big_lock));
+ ASSERT(domain_is_locked(d));
adjust_guest_l1e(nl1e, d);
@@ -3015,7 +3014,7 @@ int do_update_va_mapping(unsigned long va, u64 val64,
if ( rc )
return rc;
- LOCK_BIGLOCK(d);
+ domain_lock(d);
pl1e = guest_map_l1e(v, va, &gl1mfn);
@@ -3028,7 +3027,7 @@ int do_update_va_mapping(unsigned long va, u64 val64,
process_deferred_ops();
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
switch ( flags & UVMF_FLUSHTYPE_MASK )
{
@@ -3134,7 +3133,7 @@ long set_gdt(struct vcpu *v,
{
mfn = frames[i] = gmfn_to_mfn(d, frames[i]);
if ( !mfn_valid(mfn) ||
- !get_page_and_type(mfn_to_page(mfn), d, PGT_gdt_page) )
+ !get_page_and_type(mfn_to_page(mfn), d, PGT_seg_desc_page) )
goto fail;
}
@@ -3173,12 +3172,12 @@ long do_set_gdt(XEN_GUEST_HANDLE(ulong) frame_list, unsigned int entries)
if ( copy_from_guest(frames, frame_list, nr_pages) )
return -EFAULT;
- LOCK_BIGLOCK(curr->domain);
+ domain_lock(curr->domain);
if ( (ret = set_gdt(curr, frames, entries)) == 0 )
flush_tlb_local();
- UNLOCK_BIGLOCK(curr->domain);
+ domain_unlock(curr->domain);
return ret;
}
@@ -3211,12 +3210,8 @@ long do_update_descriptor(u64 pa, u64 desc)
/* Check if the given frame is in use in an unsafe context. */
switch ( page->u.inuse.type_info & PGT_type_mask )
{
- case PGT_gdt_page:
- if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
- goto out;
- break;
- case PGT_ldt_page:
- if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
+ case PGT_seg_desc_page:
+ if ( unlikely(!get_page_type(page, PGT_seg_desc_page)) )
goto out;
break;
default:
@@ -3316,7 +3311,7 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
return -EINVAL;
}
- LOCK_BIGLOCK(d);
+ domain_lock(d);
/* Remove previously mapped page if it was present. */
prev_mfn = gmfn_to_mfn(d, xatp.gpfn);
@@ -3338,7 +3333,7 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
/* Map at new location. */
guest_physmap_add_page(d, xatp.gpfn, mfn);
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
rcu_unlock_domain(d);
@@ -3674,7 +3669,7 @@ int ptwr_do_page_fault(struct vcpu *v, unsigned long addr,
struct ptwr_emulate_ctxt ptwr_ctxt;
int rc;
- LOCK_BIGLOCK(d);
+ domain_lock(d);
/* Attempt to read the PTE that maps the VA being accessed. */
guest_get_eff_l1e(v, addr, &pte);
@@ -3699,12 +3694,12 @@ int ptwr_do_page_fault(struct vcpu *v, unsigned long addr,
if ( rc == X86EMUL_UNHANDLEABLE )
goto bail;
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
perfc_incr(ptwr_emulations);
return EXCRET_fault_fixed;
bail:
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
return 0;
}
diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
index 160e5f36bf..64cb72786e 100644
--- a/xen/arch/x86/mm/hap/Makefile
+++ b/xen/arch/x86/mm/hap/Makefile
@@ -2,6 +2,7 @@ obj-y += hap.o
obj-y += guest_walk_2level.o
obj-y += guest_walk_3level.o
obj-y += guest_walk_4level.o
+obj-y += p2m-ept.o
guest_levels = $(subst level,,$(filter %level,$(subst ., ,$(subst _, ,$(1)))))
guest_walk_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1))
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index 15cdc23c96..e30acf6948 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -38,6 +38,7 @@
#include <asm/hap.h>
#include <asm/paging.h>
#include <asm/domain.h>
+#include <xen/numa.h>
#include "private.h"
@@ -61,7 +62,7 @@ int hap_enable_log_dirty(struct domain *d)
hap_unlock(d);
/* set l1e entries of P2M table to be read-only. */
- p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
+ p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
flush_tlb_mask(d->domain_dirty_cpumask);
return 0;
}
@@ -73,14 +74,14 @@ int hap_disable_log_dirty(struct domain *d)
hap_unlock(d);
/* set l1e entries of P2M table with normal mode */
- p2m_change_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
+ p2m_change_entry_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
return 0;
}
void hap_clean_dirty_bitmap(struct domain *d)
{
/* set l1e entries of P2M table to be read-only. */
- p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
+ p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
flush_tlb_mask(d->domain_dirty_cpumask);
}
@@ -135,7 +136,8 @@ static struct page_info *hap_alloc_p2m_page(struct domain *d)
&& mfn_x(page_to_mfn(pg)) >= (1UL << (32 - PAGE_SHIFT)) )
{
free_domheap_page(pg);
- pg = alloc_domheap_pages(NULL, 0, MEMF_bits(32));
+ pg = alloc_domheap_page(
+ NULL, MEMF_bits(32) | MEMF_node(domain_to_node(d)));
if ( likely(pg != NULL) )
{
void *p = hap_map_domain_page(page_to_mfn(pg));
@@ -199,7 +201,7 @@ hap_set_allocation(struct domain *d, unsigned int pages, int *preempted)
if ( d->arch.paging.hap.total_pages < pages )
{
/* Need to allocate more memory from domheap */
- pg = alloc_domheap_page(NULL);
+ pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
if ( pg == NULL )
{
HAP_PRINTK("failed to allocate hap pages.\n");
diff --git a/xen/arch/x86/mm/hap/p2m-ept.c b/xen/arch/x86/mm/hap/p2m-ept.c
new file mode 100644
index 0000000000..697ca4d697
--- /dev/null
+++ b/xen/arch/x86/mm/hap/p2m-ept.c
@@ -0,0 +1,257 @@
+/*
+ * ept-p2m.c: use the EPT page table as p2m
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/domain_page.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#include <asm/types.h>
+#include <asm/domain.h>
+#include <asm/p2m.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <xen/iommu.h>
+
+static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type)
+{
+ switch(type)
+ {
+ case p2m_invalid:
+ case p2m_mmio_dm:
+ default:
+ return;
+ case p2m_ram_rw:
+ case p2m_mmio_direct:
+ entry->r = entry->w = entry->x = 1;
+ return;
+ case p2m_ram_logdirty:
+ case p2m_ram_ro:
+ entry->r = entry->x = 1;
+ entry->w = 0;
+ return;
+ }
+}
+
+static int ept_next_level(struct domain *d, bool_t read_only,
+ ept_entry_t **table, unsigned long *gfn_remainder,
+ u32 shift)
+{
+ ept_entry_t *ept_entry, *next;
+ u32 index;
+
+ index = *gfn_remainder >> shift;
+ *gfn_remainder &= (1UL << shift) - 1;
+
+ ept_entry = (*table) + index;
+
+ if ( !(ept_entry->epte & 0x7) )
+ {
+ struct page_info *pg;
+
+ if ( read_only )
+ return 0;
+
+ pg = d->arch.p2m->alloc_page(d);
+ if ( pg == NULL )
+ return 0;
+
+ pg->count_info = 1;
+ pg->u.inuse.type_info = 1 | PGT_validated;
+ list_add_tail(&pg->list, &d->arch.p2m->pages);
+
+ ept_entry->emt = 0;
+ ept_entry->sp_avail = 0;
+ ept_entry->avail1 = 0;
+ ept_entry->mfn = page_to_mfn(pg);
+ ept_entry->rsvd = 0;
+ ept_entry->avail2 = 0;
+ /* last step */
+ ept_entry->r = ept_entry->w = ept_entry->x = 1;
+ }
+
+ next = map_domain_page(ept_entry->mfn);
+ unmap_domain_page(*table);
+ *table = next;
+
+ return 1;
+}
+
+static int
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+{
+ ept_entry_t *table =
+ map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+ unsigned long gfn_remainder = gfn;
+ ept_entry_t *ept_entry = NULL;
+ u32 index;
+ int i, rv = 0;
+
+ /* Should check if gfn obeys GAW here */
+
+ for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
+ if ( !ept_next_level(d, 0, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER) )
+ goto out;
+
+ index = gfn_remainder;
+ ept_entry = table + index;
+
+ if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
+ {
+ /* Track the highest gfn for which we have ever had a valid mapping */
+ if ( gfn > d->arch.p2m->max_mapped_pfn )
+ d->arch.p2m->max_mapped_pfn = gfn;
+
+ ept_entry->emt = EPT_DEFAULT_MT;
+ ept_entry->sp_avail = 0;
+ ept_entry->avail1 = p2mt;
+ ept_entry->mfn = mfn_x(mfn);
+ ept_entry->rsvd = 0;
+ ept_entry->avail2 = 0;
+ /* last step */
+ ept_entry->r = ept_entry->w = ept_entry->x = 1;
+ ept_p2m_type_to_flags(ept_entry, p2mt);
+ }
+ else
+ ept_entry->epte = 0;
+
+ /* Success */
+ rv = 1;
+
+ out:
+ unmap_domain_page(table);
+
+ ept_sync_domain(d);
+
+ /* If p2m table is shared with vtd page-table. */
+ if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
+ iommu_flush(d, gfn, (u64*)ept_entry);
+
+ return rv;
+}
+
+/* Read ept p2m entries */
+static mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t)
+{
+ ept_entry_t *table =
+ map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+ unsigned long gfn_remainder = gfn;
+ ept_entry_t *ept_entry;
+ u32 index;
+ int i;
+ mfn_t mfn = _mfn(INVALID_MFN);
+
+ *t = p2m_mmio_dm;
+
+ /* This pfn is higher than the highest the p2m map currently holds */
+ if ( gfn > d->arch.p2m->max_mapped_pfn )
+ goto out;
+
+ /* Should check if gfn obeys GAW here. */
+
+ for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
+ if ( !ept_next_level(d, 1, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER) )
+ goto out;
+
+ index = gfn_remainder;
+ ept_entry = table + index;
+
+ if ( ept_entry->avail1 != p2m_invalid )
+ {
+ *t = ept_entry->avail1;
+ mfn = _mfn(ept_entry->mfn);
+ }
+
+ out:
+ unmap_domain_page(table);
+ return mfn;
+}
+
+static mfn_t ept_get_entry_current(unsigned long gfn, p2m_type_t *t)
+{
+ return ept_get_entry(current->domain, gfn, t);
+}
+
+/* Walk the whole p2m table, changing any entries of the old type
+ * to the new type. This is used in hardware-assisted paging to
+ * quickly enable or diable log-dirty tracking */
+
+static void ept_change_entry_type_global(struct domain *d,
+ p2m_type_t ot, p2m_type_t nt)
+{
+ ept_entry_t *l4e, *l3e, *l2e, *l1e;
+ int i4, i3, i2, i1;
+
+ if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
+ return;
+
+ BUG_ON(EPT_DEFAULT_GAW != 3);
+
+ l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+ for (i4 = 0; i4 < EPT_PAGETABLE_ENTRIES; i4++ )
+ {
+ if ( !l4e[i4].epte || l4e[i4].sp_avail )
+ continue;
+ l3e = map_domain_page(l4e[i4].mfn);
+ for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
+ {
+ if ( !l3e[i3].epte || l3e[i3].sp_avail )
+ continue;
+ l2e = map_domain_page(l3e[i3].mfn);
+ for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
+ {
+ if ( !l2e[i2].epte || l2e[i2].sp_avail )
+ continue;
+ l1e = map_domain_page(l2e[i2].mfn);
+ for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
+ {
+ if ( !l1e[i1].epte )
+ continue;
+ if ( l1e[i1].avail1 != ot )
+ continue;
+ l1e[i1].avail1 = nt;
+ ept_p2m_type_to_flags(l1e+i1, nt);
+ }
+ unmap_domain_page(l1e);
+ }
+ unmap_domain_page(l2e);
+ }
+ unmap_domain_page(l3e);
+ }
+ unmap_domain_page(l4e);
+
+ ept_sync_domain(d);
+}
+
+void ept_p2m_init(struct domain *d)
+{
+ d->arch.p2m->set_entry = ept_set_entry;
+ d->arch.p2m->get_entry = ept_get_entry;
+ d->arch.p2m->get_entry_current = ept_get_entry_current;
+ d->arch.p2m->change_entry_type_global = ept_change_entry_type_global;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index e8298fb3bd..faee13955e 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -27,6 +27,7 @@
#include <asm/page.h>
#include <asm/paging.h>
#include <asm/p2m.h>
+#include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
#include <xen/iommu.h>
/* Debugging and auditing of the P2M code? */
@@ -41,36 +42,37 @@
* Locking discipline: always acquire this lock before the shadow or HAP one
*/
-#define p2m_lock_init(_d) \
- do { \
- spin_lock_init(&(_d)->arch.p2m.lock); \
- (_d)->arch.p2m.locker = -1; \
- (_d)->arch.p2m.locker_function = "nobody"; \
+#define p2m_lock_init(_p2m) \
+ do { \
+ spin_lock_init(&(_p2m)->lock); \
+ (_p2m)->locker = -1; \
+ (_p2m)->locker_function = "nobody"; \
} while (0)
-#define p2m_lock(_d) \
- do { \
- if ( unlikely((_d)->arch.p2m.locker == current->processor) )\
- { \
- printk("Error: p2m lock held by %s\n", \
- (_d)->arch.p2m.locker_function); \
- BUG(); \
- } \
- spin_lock(&(_d)->arch.p2m.lock); \
- ASSERT((_d)->arch.p2m.locker == -1); \
- (_d)->arch.p2m.locker = current->processor; \
- (_d)->arch.p2m.locker_function = __func__; \
+#define p2m_lock(_p2m) \
+ do { \
+ if ( unlikely((_p2m)->locker == current->processor) ) \
+ { \
+ printk("Error: p2m lock held by %s\n", \
+ (_p2m)->locker_function); \
+ BUG(); \
+ } \
+ spin_lock(&(_p2m)->lock); \
+ ASSERT((_p2m)->locker == -1); \
+ (_p2m)->locker = current->processor; \
+ (_p2m)->locker_function = __func__; \
} while (0)
-#define p2m_unlock(_d) \
- do { \
- ASSERT((_d)->arch.p2m.locker == current->processor); \
- (_d)->arch.p2m.locker = -1; \
- (_d)->arch.p2m.locker_function = "nobody"; \
- spin_unlock(&(_d)->arch.p2m.lock); \
+#define p2m_unlock(_p2m) \
+ do { \
+ ASSERT((_p2m)->locker == current->processor); \
+ (_p2m)->locker = -1; \
+ (_p2m)->locker_function = "nobody"; \
+ spin_unlock(&(_p2m)->lock); \
} while (0)
-
+#define p2m_locked_by_me(_p2m) \
+ (current->processor == (_p2m)->locker)
/* Printouts */
#define P2M_PRINTK(_f, _a...) \
@@ -152,7 +154,7 @@ p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
l1_pgentry_t *p2m_entry;
l1_pgentry_t new_entry;
void *next;
- ASSERT(d->arch.p2m.alloc_page);
+ ASSERT(d->arch.p2m->alloc_page);
if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
shift, max)) )
@@ -160,10 +162,10 @@ p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) )
{
- struct page_info *pg = d->arch.p2m.alloc_page(d);
+ struct page_info *pg = d->arch.p2m->alloc_page(d);
if ( pg == NULL )
return 0;
- list_add_tail(&pg->list, &d->arch.p2m.pages);
+ list_add_tail(&pg->list, &d->arch.p2m->pages);
pg->u.inuse.type_info = type | 1 | PGT_validated;
pg->count_info = 1;
@@ -202,7 +204,7 @@ p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
// Returns 0 on error (out of memory)
static int
-set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
{
// XXX -- this might be able to be faster iff current->domain == d
mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
@@ -244,8 +246,8 @@ set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
ASSERT(p2m_entry);
/* Track the highest gfn for which we have ever had a valid mapping */
- if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) )
- d->arch.p2m.max_mapped_pfn = gfn;
+ if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
+ d->arch.p2m->max_mapped_pfn = gfn;
if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
@@ -279,14 +281,170 @@ set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
return rv;
}
+static mfn_t
+p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t)
+{
+ mfn_t mfn;
+ paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
+ l2_pgentry_t *l2e;
+ l1_pgentry_t *l1e;
+
+ ASSERT(paging_mode_translate(d));
+
+ /* XXX This is for compatibility with the old model, where anything not
+ * XXX marked as RAM was considered to be emulated MMIO space.
+ * XXX Once we start explicitly registering MMIO regions in the p2m
+ * XXX we will return p2m_invalid for unmapped gfns */
+ *t = p2m_mmio_dm;
+
+ mfn = pagetable_get_mfn(d->arch.phys_table);
+
+ if ( gfn > d->arch.p2m->max_mapped_pfn )
+ /* This pfn is higher than the highest the p2m map currently holds */
+ return _mfn(INVALID_MFN);
+
+#if CONFIG_PAGING_LEVELS >= 4
+ {
+ l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
+ l4e += l4_table_offset(addr);
+ if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
+ {
+ unmap_domain_page(l4e);
+ return _mfn(INVALID_MFN);
+ }
+ mfn = _mfn(l4e_get_pfn(*l4e));
+ unmap_domain_page(l4e);
+ }
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+ {
+ l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
+#if CONFIG_PAGING_LEVELS == 3
+ /* On PAE hosts the p2m has eight l3 entries, not four (see
+ * shadow_set_p2m_entry()) so we can't use l3_table_offset.
+ * Instead, just count the number of l3es from zero. It's safe
+ * to do this because we already checked that the gfn is within
+ * the bounds of the p2m. */
+ l3e += (addr >> L3_PAGETABLE_SHIFT);
+#else
+ l3e += l3_table_offset(addr);
+#endif
+ if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
+ {
+ unmap_domain_page(l3e);
+ return _mfn(INVALID_MFN);
+ }
+ mfn = _mfn(l3e_get_pfn(*l3e));
+ unmap_domain_page(l3e);
+ }
+#endif
+
+ l2e = map_domain_page(mfn_x(mfn));
+ l2e += l2_table_offset(addr);
+ if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
+ {
+ unmap_domain_page(l2e);
+ return _mfn(INVALID_MFN);
+ }
+ mfn = _mfn(l2e_get_pfn(*l2e));
+ unmap_domain_page(l2e);
+
+ l1e = map_domain_page(mfn_x(mfn));
+ l1e += l1_table_offset(addr);
+ if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
+ {
+ unmap_domain_page(l1e);
+ return _mfn(INVALID_MFN);
+ }
+ mfn = _mfn(l1e_get_pfn(*l1e));
+ *t = p2m_flags_to_type(l1e_get_flags(*l1e));
+ unmap_domain_page(l1e);
+
+ ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+ return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+}
+
+/* Read the current domain's p2m table (through the linear mapping). */
+static mfn_t p2m_gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
+{
+ mfn_t mfn = _mfn(INVALID_MFN);
+ p2m_type_t p2mt = p2m_mmio_dm;
+ /* XXX This is for compatibility with the old model, where anything not
+ * XXX marked as RAM was considered to be emulated MMIO space.
+ * XXX Once we start explicitly registering MMIO regions in the p2m
+ * XXX we will return p2m_invalid for unmapped gfns */
+
+ if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
+ {
+ l1_pgentry_t l1e = l1e_empty();
+ int ret;
+
+ ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
+ / sizeof(l1_pgentry_t));
+
+ /* Need to __copy_from_user because the p2m is sparse and this
+ * part might not exist */
+ ret = __copy_from_user(&l1e,
+ &phys_to_machine_mapping[gfn],
+ sizeof(l1e));
+
+ if ( ret == 0 ) {
+ p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+ ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ if ( p2m_is_valid(p2mt) )
+ mfn = _mfn(l1e_get_pfn(l1e));
+ else
+ /* XXX see above */
+ p2mt = p2m_mmio_dm;
+ }
+ }
+
+ *t = p2mt;
+ return mfn;
+}
/* Init the datastructures for later use by the p2m code */
-void p2m_init(struct domain *d)
+int p2m_init(struct domain *d)
{
- p2m_lock_init(d);
- INIT_LIST_HEAD(&d->arch.p2m.pages);
+ struct p2m_domain *p2m;
+
+ p2m = xmalloc(struct p2m_domain);
+ if ( p2m == NULL )
+ return -ENOMEM;
+
+ d->arch.p2m = p2m;
+
+ memset(p2m, 0, sizeof(*p2m));
+ p2m_lock_init(p2m);
+ INIT_LIST_HEAD(&p2m->pages);
+
+ p2m->set_entry = p2m_set_entry;
+ p2m->get_entry = p2m_gfn_to_mfn;
+ p2m->get_entry_current = p2m_gfn_to_mfn_current;
+ p2m->change_entry_type_global = p2m_change_type_global;
+
+ if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) )
+ ept_p2m_init(d);
+
+ return 0;
}
+void p2m_change_entry_type_global(struct domain *d,
+ p2m_type_t ot, p2m_type_t nt)
+{
+ struct p2m_domain *p2m = d->arch.p2m;
+
+ p2m_lock(p2m);
+ p2m->change_entry_type_global(d, ot, nt);
+ p2m_unlock(p2m);
+}
+
+static inline
+int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+{
+ return d->arch.p2m->set_entry(d, gfn, mfn, p2mt);
+}
// Allocate a new p2m table for a domain.
//
@@ -308,28 +466,29 @@ int p2m_alloc_table(struct domain *d,
struct page_info *page, *p2m_top;
unsigned int page_count = 0;
unsigned long gfn = -1UL;
+ struct p2m_domain *p2m = d->arch.p2m;
- p2m_lock(d);
+ p2m_lock(p2m);
if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
{
P2M_ERROR("p2m already allocated for this domain\n");
- p2m_unlock(d);
+ p2m_unlock(p2m);
return -EINVAL;
}
P2M_PRINTK("allocating p2m table\n");
- d->arch.p2m.alloc_page = alloc_page;
- d->arch.p2m.free_page = free_page;
+ p2m->alloc_page = alloc_page;
+ p2m->free_page = free_page;
- p2m_top = d->arch.p2m.alloc_page(d);
+ p2m_top = p2m->alloc_page(d);
if ( p2m_top == NULL )
{
- p2m_unlock(d);
+ p2m_unlock(p2m);
return -ENOMEM;
}
- list_add_tail(&p2m_top->list, &d->arch.p2m.pages);
+ list_add_tail(&p2m_top->list, &p2m->pages);
p2m_top->count_info = 1;
p2m_top->u.inuse.type_info =
@@ -376,13 +535,13 @@ int p2m_alloc_table(struct domain *d,
#endif
P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
- p2m_unlock(d);
+ p2m_unlock(p2m);
return 0;
error:
P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
PRI_mfn "\n", gfn, mfn_x(mfn));
- p2m_unlock(d);
+ p2m_unlock(p2m);
return -ENOMEM;
}
@@ -392,101 +551,24 @@ void p2m_teardown(struct domain *d)
{
struct list_head *entry, *n;
struct page_info *pg;
+ struct p2m_domain *p2m = d->arch.p2m;
- p2m_lock(d);
+ p2m_lock(p2m);
d->arch.phys_table = pagetable_null();
- list_for_each_safe(entry, n, &d->arch.p2m.pages)
+ list_for_each_safe(entry, n, &p2m->pages)
{
pg = list_entry(entry, struct page_info, list);
list_del(entry);
- d->arch.p2m.free_page(d, pg);
+ p2m->free_page(d, pg);
}
- p2m_unlock(d);
+ p2m_unlock(p2m);
}
-mfn_t
-gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t)
-/* Read another domain's p2m entries */
+void p2m_final_teardown(struct domain *d)
{
- mfn_t mfn;
- paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
- l2_pgentry_t *l2e;
- l1_pgentry_t *l1e;
-
- ASSERT(paging_mode_translate(d));
-
- /* XXX This is for compatibility with the old model, where anything not
- * XXX marked as RAM was considered to be emulated MMIO space.
- * XXX Once we start explicitly registering MMIO regions in the p2m
- * XXX we will return p2m_invalid for unmapped gfns */
- *t = p2m_mmio_dm;
-
- mfn = pagetable_get_mfn(d->arch.phys_table);
-
- if ( gfn > d->arch.p2m.max_mapped_pfn )
- /* This pfn is higher than the highest the p2m map currently holds */
- return _mfn(INVALID_MFN);
-
-#if CONFIG_PAGING_LEVELS >= 4
- {
- l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
- l4e += l4_table_offset(addr);
- if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
- {
- unmap_domain_page(l4e);
- return _mfn(INVALID_MFN);
- }
- mfn = _mfn(l4e_get_pfn(*l4e));
- unmap_domain_page(l4e);
- }
-#endif
-#if CONFIG_PAGING_LEVELS >= 3
- {
- l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
-#if CONFIG_PAGING_LEVELS == 3
- /* On PAE hosts the p2m has eight l3 entries, not four (see
- * shadow_set_p2m_entry()) so we can't use l3_table_offset.
- * Instead, just count the number of l3es from zero. It's safe
- * to do this because we already checked that the gfn is within
- * the bounds of the p2m. */
- l3e += (addr >> L3_PAGETABLE_SHIFT);
-#else
- l3e += l3_table_offset(addr);
-#endif
- if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
- {
- unmap_domain_page(l3e);
- return _mfn(INVALID_MFN);
- }
- mfn = _mfn(l3e_get_pfn(*l3e));
- unmap_domain_page(l3e);
- }
-#endif
-
- l2e = map_domain_page(mfn_x(mfn));
- l2e += l2_table_offset(addr);
- if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
- {
- unmap_domain_page(l2e);
- return _mfn(INVALID_MFN);
- }
- mfn = _mfn(l2e_get_pfn(*l2e));
- unmap_domain_page(l2e);
-
- l1e = map_domain_page(mfn_x(mfn));
- l1e += l1_table_offset(addr);
- if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
- {
- unmap_domain_page(l1e);
- return _mfn(INVALID_MFN);
- }
- mfn = _mfn(l1e_get_pfn(*l1e));
- *t = p2m_flags_to_type(l1e_get_flags(*l1e));
- unmap_domain_page(l1e);
-
- ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
- return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+ xfree(d->arch.p2m);
+ d->arch.p2m = NULL;
}
#if P2M_AUDIT
@@ -564,7 +646,7 @@ static void audit_p2m(struct domain *d)
set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
}
- if ( test_linear && (gfn <= d->arch.p2m.max_mapped_pfn) )
+ if ( test_linear && (gfn <= d->arch.p2m->max_mapped_pfn) )
{
lp2mfn = mfn_x(gfn_to_mfn_current(gfn, &type));
if ( lp2mfn != mfn_x(p2mfn) )
@@ -695,11 +777,11 @@ void
guest_physmap_remove_page(struct domain *d, unsigned long gfn,
unsigned long mfn)
{
- p2m_lock(d);
+ p2m_lock(d->arch.p2m);
audit_p2m(d);
p2m_remove_page(d, gfn, mfn);
audit_p2m(d);
- p2m_unlock(d);
+ p2m_unlock(d->arch.p2m);
}
int
@@ -722,7 +804,7 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn,
*/
if ( paging_mode_hap(d) && (gfn > 0xfffffUL) )
{
- if ( !test_and_set_bool(d->arch.hvm_domain.amd_npt_4gb_warning) )
+ if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
" 4GB: specify 'hap=0' domain config option.\n",
d->domain_id);
@@ -730,7 +812,7 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn,
}
#endif
- p2m_lock(d);
+ p2m_lock(d->arch.p2m);
audit_p2m(d);
P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
@@ -781,7 +863,7 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn,
}
audit_p2m(d);
- p2m_unlock(d);
+ p2m_unlock(d->arch.p2m);
return rc;
}
@@ -812,7 +894,7 @@ void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
return;
- p2m_lock(d);
+ ASSERT(p2m_locked_by_me(d->arch.p2m));
#if CONFIG_PAGING_LEVELS == 4
l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
@@ -860,7 +942,7 @@ void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
mfn = l1e_get_pfn(l1e[i1]);
gfn = get_gpfn_from_mfn(mfn);
/* create a new 1le entry with the new type */
- flags = p2m_flags_to_type(nt);
+ flags = p2m_type_to_flags(nt);
l1e_content = l1e_from_pfn(mfn, flags);
paging_write_p2m_entry(d, gfn, &l1e[i1],
l1mfn, l1e_content, 1);
@@ -884,7 +966,6 @@ void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
unmap_domain_page(l2e);
#endif
- p2m_unlock(d);
}
/* Modify the p2m type of a single gfn from ot to nt, returning the
@@ -895,13 +976,13 @@ p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
p2m_type_t pt;
mfn_t mfn;
- p2m_lock(d);
+ p2m_lock(d->arch.p2m);
mfn = gfn_to_mfn(d, gfn, &pt);
if ( pt == ot )
set_p2m_entry(d, gfn, mfn, nt);
- p2m_unlock(d);
+ p2m_unlock(d->arch.p2m);
return pt;
}
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index e6c3cbb9e6..2247d8dd68 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -26,6 +26,7 @@
#include <asm/p2m.h>
#include <asm/hap.h>
#include <asm/guest_access.h>
+#include <xen/numa.h>
#include <xsm/xsm.h>
#define hap_enabled(d) (is_hvm_domain(d) && (d)->arch.hvm_domain.hap_enabled)
@@ -99,8 +100,9 @@
static mfn_t paging_new_log_dirty_page(struct domain *d, void **mapping_p)
{
mfn_t mfn;
- struct page_info *page = alloc_domheap_page(NULL);
+ struct page_info *page;
+ page = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
if ( unlikely(page == NULL) )
{
d->arch.paging.log_dirty.failed_allocs++;
@@ -482,9 +484,12 @@ void paging_log_dirty_teardown(struct domain*d)
/* CODE FOR PAGING SUPPORT */
/************************************************/
/* Domain paging struct initialization. */
-void paging_domain_init(struct domain *d)
+int paging_domain_init(struct domain *d)
{
- p2m_init(d);
+ int rc;
+
+ if ( (rc = p2m_init(d)) != 0 )
+ return rc;
/* The order of the *_init calls below is important, as the later
* ones may rewrite some common fields. Shadow pagetables are the
@@ -494,6 +499,8 @@ void paging_domain_init(struct domain *d)
/* ... but we will use hardware assistance if it's available. */
if ( hap_enabled(d) )
hap_domain_init(d);
+
+ return 0;
}
/* vcpu paging struct initialization goes here */
@@ -587,6 +594,8 @@ void paging_final_teardown(struct domain *d)
hap_final_teardown(d);
else
shadow_final_teardown(d);
+
+ p2m_final_teardown(d);
}
/* Enable an arbitrary paging-assistance mode. Call once at domain
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index e4a04bb456..d7239cde77 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -36,6 +36,7 @@
#include <asm/current.h>
#include <asm/flushtlb.h>
#include <asm/shadow.h>
+#include <xen/numa.h>
#include "private.h"
@@ -1249,7 +1250,7 @@ static unsigned int sh_set_allocation(struct domain *d,
{
/* Need to allocate more memory from domheap */
sp = (struct shadow_page_info *)
- alloc_domheap_pages(NULL, order, 0);
+ alloc_domheap_pages(NULL, order, MEMF_node(domain_to_node(d)));
if ( sp == NULL )
{
SHADOW_PRINTK("failed to allocate shadow pages.\n");
@@ -2171,13 +2172,12 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
#undef DO_UNSHADOW
/* If that didn't catch the shadows, something is wrong */
- if ( !fast && (pg->count_info & PGC_page_table) )
+ if ( !fast && all && (pg->count_info & PGC_page_table) )
{
SHADOW_ERROR("can't find all shadows of mfn %05lx "
"(shadow_flags=%08lx)\n",
mfn_x(gmfn), pg->shadow_flags);
- if ( all )
- domain_crash(v->domain);
+ domain_crash(v->domain);
}
/* Need to flush TLBs now, so that linear maps are safe next time we
diff --git a/xen/arch/x86/pci.c b/xen/arch/x86/pci.c
new file mode 100644
index 0000000000..341457b4bc
--- /dev/null
+++ b/xen/arch/x86/pci.c
@@ -0,0 +1,118 @@
+/******************************************************************************
+ * pci.c
+ *
+ * PCI access functions.
+ */
+
+#include <xen/config.h>
+#include <xen/pci.h>
+#include <xen/spinlock.h>
+#include <asm/io.h>
+
+#define PCI_CONF_ADDRESS(bus, dev, func, reg) \
+ (0x80000000 | (bus << 16) | (dev << 11) | (func << 8) | (reg & ~3))
+
+static DEFINE_SPINLOCK(pci_config_lock);
+
+uint32_t pci_conf_read(uint32_t cf8, uint8_t offset, uint8_t bytes)
+{
+ unsigned long flags;
+ uint32_t value;
+
+ BUG_ON((offset + bytes) > 4);
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outl(cf8, 0xcf8);
+
+ switch ( bytes )
+ {
+ case 1:
+ value = inb(0xcfc + offset);
+ break;
+ case 2:
+ value = inw(0xcfc + offset);
+ break;
+ case 4:
+ value = inl(0xcfc + offset);
+ break;
+ default:
+ value = 0;
+ BUG();
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return value;
+}
+
+void pci_conf_write(uint32_t cf8, uint8_t offset, uint8_t bytes, uint32_t data)
+{
+ unsigned long flags;
+
+ BUG_ON((offset + bytes) > 4);
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outl(cf8, 0xcf8);
+
+ switch ( bytes )
+ {
+ case 1:
+ outb((uint8_t)data, 0xcfc + offset);
+ break;
+ case 2:
+ outw((uint16_t)data, 0xcfc + offset);
+ break;
+ case 4:
+ outl(data, 0xcfc + offset);
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+
+uint8_t pci_conf_read8(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg)
+{
+ BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255));
+ return pci_conf_read(PCI_CONF_ADDRESS(bus, dev, func, reg), reg & 3, 1);
+}
+
+uint16_t pci_conf_read16(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg)
+{
+ BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255));
+ return pci_conf_read(PCI_CONF_ADDRESS(bus, dev, func, reg), reg & 2, 2);
+}
+
+uint32_t pci_conf_read32(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg)
+{
+ BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255));
+ return pci_conf_read(PCI_CONF_ADDRESS(bus, dev, func, reg), 0, 4);
+}
+
+void pci_conf_write8(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg,
+ uint8_t data)
+{
+ BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255));
+ pci_conf_write(PCI_CONF_ADDRESS(bus, dev, func, reg), reg & 3, 1, data);
+}
+
+void pci_conf_write16(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg,
+ uint16_t data)
+{
+ BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255));
+ pci_conf_write(PCI_CONF_ADDRESS(bus, dev, func, reg), reg & 2, 2, data);
+}
+
+void pci_conf_write32(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg,
+ uint32_t data)
+{
+ BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255));
+ pci_conf_write(PCI_CONF_ADDRESS(bus, dev, func, reg), 0, 4, data);
+}
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 383a868225..9b025b51b1 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -861,6 +861,8 @@ void __init __start_xen(unsigned long mbi_p)
early_boot = 0;
+ softirq_init();
+
early_cpu_init();
paging_init();
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index 57135940bf..ccefc50cf2 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -40,7 +40,7 @@ string_param("clocksource", opt_clocksource);
unsigned long cpu_khz; /* CPU clock frequency in kHz. */
unsigned long hpet_address;
DEFINE_SPINLOCK(rtc_lock);
-volatile unsigned long jiffies;
+unsigned long pit0_ticks;
static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
static DEFINE_SPINLOCK(wc_lock);
@@ -67,19 +67,16 @@ struct platform_timesource {
static DEFINE_PER_CPU(struct cpu_time, cpu_time);
/*
- * Protected by platform_timer_lock, which must be acquired with interrupts
- * disabled because plt_overflow() is called from PIT ch0 interrupt context.
- */
-static s_time_t stime_platform_stamp;
-static u64 platform_timer_stamp;
-static DEFINE_SPINLOCK(platform_timer_lock);
-
-/*
- * Folding platform timer into 64-bit software counter is a really critical
- * operation! We therefore do it directly in PIT ch0 interrupt handler.
+ * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
+ * Otherwise overflow happens too quickly (~50ms) for us to guarantee that
+ * softirq handling will happen in time.
+ *
+ * The pit_lock protects the 16- and 32-bit stamp fields as well as the
*/
-static u32 plt_overflow_jiffies;
-static void plt_overflow(void);
+static DEFINE_SPINLOCK(pit_lock);
+static u16 pit_stamp16;
+static u32 pit_stamp32;
+static int using_pit;
/*
* 32-bit division of integer dividend and integer divisor yielding
@@ -146,22 +143,36 @@ static inline u64 scale_delta(u64 delta, struct time_scale *scale)
return product;
}
-void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
+static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
{
ASSERT(local_irq_is_enabled());
- /* Update jiffies counter. */
- (*(volatile unsigned long *)&jiffies)++;
+ /* Only for start-of-day interruopt tests in io_apic.c. */
+ (*(volatile unsigned long *)&pit0_ticks)++;
/* Rough hack to allow accurate timers to sort-of-work with no APIC. */
if ( !cpu_has_apic )
raise_softirq(TIMER_SOFTIRQ);
- if ( --plt_overflow_jiffies == 0 )
- plt_overflow();
+ /* Emulate a 32-bit PIT counter. */
+ if ( using_pit )
+ {
+ u16 count;
+
+ spin_lock_irq(&pit_lock);
+
+ outb(0x80, PIT_MODE);
+ count = inb(PIT_CH2);
+ count |= inb(PIT_CH2) << 8;
+
+ pit_stamp32 += (u16)(pit_stamp16 - count);
+ pit_stamp16 = count;
+
+ spin_unlock_irq(&pit_lock);
+ }
}
-static struct irqaction irq0 = { timer_interrupt, "timer", NULL};
+static struct irqaction irq0 = { timer_interrupt, "timer", NULL };
/* ------ Calibrate the TSC -------
* Return processor ticks per second / CALIBRATE_FRAC.
@@ -295,12 +306,21 @@ static char *freq_string(u64 freq)
static u32 read_pit_count(void)
{
- u16 count;
- ASSERT(spin_is_locked(&platform_timer_lock));
+ u16 count16;
+ u32 count32;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pit_lock, flags);
+
outb(0x80, PIT_MODE);
- count = inb(PIT_CH2);
- count |= inb(PIT_CH2) << 8;
- return ~count;
+ count16 = inb(PIT_CH2);
+ count16 |= inb(PIT_CH2) << 8;
+
+ count32 = pit_stamp32 + (u16)(pit_stamp16 - count16);
+
+ spin_unlock_irqrestore(&pit_lock, flags);
+
+ return count32;
}
static void init_pit(struct platform_timesource *pts)
@@ -308,7 +328,8 @@ static void init_pit(struct platform_timesource *pts)
pts->name = "PIT";
pts->frequency = CLOCK_TICK_RATE;
pts->read_counter = read_pit_count;
- pts->counter_bits = 16;
+ pts->counter_bits = 32;
+ using_pit = 1;
}
/************************************************************
@@ -466,24 +487,28 @@ static int init_pmtimer(struct platform_timesource *pts)
static struct platform_timesource plt_src; /* details of chosen timesource */
static u32 plt_mask; /* hardware-width mask */
-static u32 plt_overflow_period; /* jiffies between calls to plt_overflow() */
+static u64 plt_overflow_period; /* ns between calls to plt_overflow() */
static struct time_scale plt_scale; /* scale: platform counter -> nanosecs */
/* Protected by platform_timer_lock. */
-static u64 plt_count64; /* 64-bit platform counter stamp */
-static u32 plt_count; /* hardware-width platform counter stamp */
+static DEFINE_SPINLOCK(platform_timer_lock);
+static s_time_t stime_platform_stamp; /* System time at below platform time */
+static u64 platform_timer_stamp; /* Platform time at above system time */
+static u64 plt_stamp64; /* 64-bit platform counter stamp */
+static u32 plt_stamp; /* hardware-width platform counter stamp */
+static struct timer plt_overflow_timer;
-static void plt_overflow(void)
+static void plt_overflow(void *unused)
{
u32 count;
- unsigned long flags;
- spin_lock_irqsave(&platform_timer_lock, flags);
+ spin_lock(&platform_timer_lock);
count = plt_src.read_counter();
- plt_count64 += (count - plt_count) & plt_mask;
- plt_count = count;
- plt_overflow_jiffies = plt_overflow_period;
- spin_unlock_irqrestore(&platform_timer_lock, flags);
+ plt_stamp64 += (count - plt_stamp) & plt_mask;
+ plt_stamp = count;
+ spin_unlock(&platform_timer_lock);
+
+ set_timer(&plt_overflow_timer, NOW() + plt_overflow_period);
}
static s_time_t __read_platform_stime(u64 platform_time)
@@ -497,12 +522,11 @@ static s_time_t read_platform_stime(void)
{
u64 count;
s_time_t stime;
- unsigned long flags;
- spin_lock_irqsave(&platform_timer_lock, flags);
- count = plt_count64 + ((plt_src.read_counter() - plt_count) & plt_mask);
+ spin_lock(&platform_timer_lock);
+ count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
stime = __read_platform_stime(count);
- spin_unlock_irqrestore(&platform_timer_lock, flags);
+ spin_unlock(&platform_timer_lock);
return stime;
}
@@ -511,27 +535,25 @@ static void platform_time_calibration(void)
{
u64 count;
s_time_t stamp;
- unsigned long flags;
- spin_lock_irqsave(&platform_timer_lock, flags);
- count = plt_count64 + ((plt_src.read_counter() - plt_count) & plt_mask);
+ spin_lock(&platform_timer_lock);
+ count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
stamp = __read_platform_stime(count);
stime_platform_stamp = stamp;
platform_timer_stamp = count;
- spin_unlock_irqrestore(&platform_timer_lock, flags);
+ spin_unlock(&platform_timer_lock);
}
static void resume_platform_timer(void)
{
/* No change in platform_stime across suspend/resume. */
- platform_timer_stamp = plt_count64;
- plt_count = plt_src.read_counter();
+ platform_timer_stamp = plt_stamp64;
+ plt_stamp = plt_src.read_counter();
}
static void init_platform_timer(void)
{
struct platform_timesource *pts = &plt_src;
- u64 overflow_period;
int rc = -1;
if ( opt_clocksource[0] != '\0' )
@@ -561,13 +583,12 @@ static void init_platform_timer(void)
set_time_scale(&plt_scale, pts->frequency);
- overflow_period = scale_delta(1ull << (pts->counter_bits-1), &plt_scale);
- do_div(overflow_period, MILLISECS(1000/HZ));
- plt_overflow_period = overflow_period;
- plt_overflow();
- printk("Platform timer overflows in %d jiffies.\n", plt_overflow_period);
+ plt_overflow_period = scale_delta(
+ 1ull << (pts->counter_bits-1), &plt_scale);
+ init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
+ plt_overflow(NULL);
- platform_timer_stamp = plt_count64;
+ platform_timer_stamp = plt_stamp64;
printk("Platform timer is %s %s\n",
freq_string(pts->frequency), pts->name);
@@ -969,6 +990,19 @@ void __init early_time_init(void)
setup_irq(0, &irq0);
}
+static int __init disable_pit_irq(void)
+{
+ if ( !using_pit && cpu_has_apic )
+ {
+ /* Disable PIT CH0 timer interrupt. */
+ outb_p(0x30, PIT_MODE);
+ outb_p(0, PIT_CH0);
+ outb_p(0, PIT_CH0);
+ }
+ return 0;
+}
+__initcall(disable_pit_irq);
+
void send_timer_event(struct vcpu *v)
{
send_guest_vcpu_virq(v, VIRQ_TIMER);
@@ -1002,6 +1036,8 @@ int time_resume(void)
{
u64 tmp = init_pit_and_calibrate_tsc();
+ disable_pit_irq();
+
set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);
resume_platform_timer();
@@ -1019,7 +1055,7 @@ int time_resume(void)
int dom0_pit_access(struct ioreq *ioreq)
{
/* Is Xen using Channel 2? Then disallow direct dom0 access. */
- if ( plt_src.read_counter == read_pit_count )
+ if ( using_pit )
return 0;
switch ( ioreq->addr )
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 019e3e56cf..5e39c9b417 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1305,23 +1305,24 @@ static int read_gate_descriptor(unsigned int gate_sel,
const struct desc_struct *pdesc;
- pdesc = (const struct desc_struct *)(!(gate_sel & 4) ?
- GDT_VIRT_START(v) :
- LDT_VIRT_START(v))
- + (gate_sel >> 3);
- if ( gate_sel < 4 ||
- (gate_sel >= FIRST_RESERVED_GDT_BYTE && !(gate_sel & 4)) ||
+ pdesc = (const struct desc_struct *)
+ (!(gate_sel & 4) ? GDT_VIRT_START(v) : LDT_VIRT_START(v))
+ + (gate_sel >> 3);
+ if ( (gate_sel < 4) ||
+ ((gate_sel >= FIRST_RESERVED_GDT_BYTE) && !(gate_sel & 4)) ||
__get_user(desc, pdesc) )
return 0;
*sel = (desc.a >> 16) & 0x0000fffc;
*off = (desc.a & 0x0000ffff) | (desc.b & 0xffff0000);
*ar = desc.b & 0x0000ffff;
+
/*
* check_descriptor() clears the DPL field and stores the
* guest requested DPL in the selector's RPL field.
*/
- ASSERT(!(*ar & _SEGMENT_DPL));
+ if ( *ar & _SEGMENT_DPL )
+ return 0;
*ar |= (desc.a >> (16 - 13)) & _SEGMENT_DPL;
if ( !is_pv_32bit_vcpu(v) )
@@ -1352,7 +1353,7 @@ static int read_gate_descriptor(unsigned int gate_sel,
#endif
/* Has the guest requested sufficient permission for this I/O access? */
-static inline int guest_io_okay(
+static int guest_io_okay(
unsigned int port, unsigned int bytes,
struct vcpu *v, struct cpu_user_regs *regs)
{
@@ -1394,19 +1395,130 @@ static inline int guest_io_okay(
}
/* Has the administrator granted sufficient permission for this I/O access? */
-static inline int admin_io_okay(
+static int admin_io_okay(
unsigned int port, unsigned int bytes,
struct vcpu *v, struct cpu_user_regs *regs)
{
return ioports_access_permitted(v->domain, port, port + bytes - 1);
}
-#define guest_inb_okay(_p, _d, _r) admin_io_okay(_p, 1, _d, _r)
-#define guest_inw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r)
-#define guest_inl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r)
-#define guest_outb_okay(_p, _d, _r) admin_io_okay(_p, 1, _d, _r)
-#define guest_outw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r)
-#define guest_outl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r)
+static uint32_t guest_io_read(
+ unsigned int port, unsigned int bytes,
+ struct vcpu *v, struct cpu_user_regs *regs)
+{
+ extern uint32_t pci_conf_read(
+ uint32_t cf8, uint8_t offset, uint8_t bytes);
+
+ uint32_t data = 0;
+ unsigned int shift = 0;
+
+ if ( admin_io_okay(port, bytes, v, regs) )
+ {
+ switch ( bytes )
+ {
+ case 1: return inb(port);
+ case 2: return inw(port);
+ case 4: return inl(port);
+ }
+ }
+
+ while ( bytes != 0 )
+ {
+ unsigned int size = 1;
+ uint32_t sub_data = 0xff;
+
+ if ( (port == 0x42) || (port == 0x43) || (port == 0x61) )
+ {
+ sub_data = pv_pit_handler(port, 0, 0);
+ }
+ else if ( (port & 0xfffc) == 0xcf8 )
+ {
+ size = min(bytes, 4 - (port & 3));
+ sub_data = v->domain->arch.pci_cf8 >> ((port & 3) * 8);
+ }
+ else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) )
+ {
+ size = min(bytes, 4 - (port & 3));
+ if ( size == 3 )
+ size = 2;
+ sub_data = pci_conf_read(v->domain->arch.pci_cf8, port & 3, size);
+ }
+
+ if ( size == 4 )
+ return sub_data;
+
+ data |= (sub_data & ((1u << (size * 8)) - 1)) << shift;
+ shift += size * 8;
+ port += size;
+ bytes -= size;
+ }
+
+ return data;
+}
+
+static void guest_io_write(
+ unsigned int port, unsigned int bytes, uint32_t data,
+ struct vcpu *v, struct cpu_user_regs *regs)
+{
+ extern void pci_conf_write(
+ uint32_t cf8, uint8_t offset, uint8_t bytes, uint32_t data);
+
+ if ( admin_io_okay(port, bytes, v, regs) )
+ {
+ switch ( bytes ) {
+ case 1:
+ outb((uint8_t)data, port);
+ if ( pv_post_outb_hook )
+ pv_post_outb_hook(port, (uint8_t)data);
+ break;
+ case 2:
+ outw((uint16_t)data, port);
+ break;
+ case 4:
+ outl(data, port);
+ break;
+ }
+ return;
+ }
+
+ while ( bytes != 0 )
+ {
+ unsigned int size = 1;
+
+ if ( (port == 0x42) || (port == 0x43) || (port == 0x61) )
+ {
+ pv_pit_handler(port, (uint8_t)data, 1);
+ }
+ else if ( (port & 0xfffc) == 0xcf8 )
+ {
+ size = min(bytes, 4 - (port & 3));
+ if ( size == 4 )
+ {
+ v->domain->arch.pci_cf8 = data;
+ }
+ else
+ {
+ uint32_t mask = ((1u << (size * 8)) - 1) << ((port & 3) * 8);
+ v->domain->arch.pci_cf8 &= ~mask;
+ v->domain->arch.pci_cf8 |= (data << ((port & 3) * 8)) & mask;
+ }
+ }
+ else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) )
+ {
+ size = min(bytes, 4 - (port & 3));
+ if ( size == 3 )
+ size = 2;
+ pci_conf_write(v->domain->arch.pci_cf8, port & 3, size, data);
+ }
+
+ if ( size == 4 )
+ return;
+
+ port += size;
+ bytes -= size;
+ data >>= size * 8;
+ }
+}
/* I/O emulation support. Helper routines for, and type of, the stack stub.*/
void host_to_guest_gpr_switch(struct cpu_user_regs *)
@@ -1525,7 +1637,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
/* REX prefix. */
if ( rex & 8 ) /* REX.W */
- op_bytes = 4; /* emulating only opcodes not supporting 64-bit operands */
+ op_bytes = 4; /* emulate only opcodes not supporting 64-bit operands */
modrm_reg = (rex & 4) << 1; /* REX.R */
/* REX.X does not need to be decoded. */
modrm_rm = (rex & 1) << 3; /* REX.B */
@@ -1554,7 +1666,8 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
{
if ( !read_descriptor(data_sel, v, regs,
&data_base, &data_limit, &ar,
- _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P) )
+ _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|
+ _SEGMENT_P) )
goto fail;
if ( !(ar & _SEGMENT_S) ||
!(ar & _SEGMENT_P) ||
@@ -1601,69 +1714,39 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
case 0x6c: /* INSB */
op_bytes = 1;
case 0x6d: /* INSW/INSL */
- if ( data_limit < op_bytes - 1 ||
- rd_ad(edi) > data_limit - (op_bytes - 1) ||
+ if ( (data_limit < (op_bytes - 1)) ||
+ (rd_ad(edi) > (data_limit - (op_bytes - 1))) ||
!guest_io_okay(port, op_bytes, v, regs) )
goto fail;
- switch ( op_bytes )
- {
- case 1:
- /* emulate PIT counter 2 */
- data = (u8)(guest_inb_okay(port, v, regs) ? inb(port) :
- ((port == 0x42 || port == 0x43 || port == 0x61) ?
- pv_pit_handler(port, 0, 0) : ~0));
- break;
- case 2:
- data = (u16)(guest_inw_okay(port, v, regs) ? inw(port) : ~0);
- break;
- case 4:
- data = (u32)(guest_inl_okay(port, v, regs) ? inl(port) : ~0);
- break;
- }
- if ( (rc = copy_to_user((void *)data_base + rd_ad(edi), &data, op_bytes)) != 0 )
+ data = guest_io_read(port, op_bytes, v, regs);
+ if ( (rc = copy_to_user((void *)data_base + rd_ad(edi),
+ &data, op_bytes)) != 0 )
{
propagate_page_fault(data_base + rd_ad(edi) + op_bytes - rc,
PFEC_write_access);
return EXCRET_fault_fixed;
}
- wr_ad(edi, regs->edi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes));
+ wr_ad(edi, regs->edi + (int)((regs->eflags & EF_DF)
+ ? -op_bytes : op_bytes));
break;
case 0x6e: /* OUTSB */
op_bytes = 1;
case 0x6f: /* OUTSW/OUTSL */
- if ( data_limit < op_bytes - 1 ||
- rd_ad(esi) > data_limit - (op_bytes - 1) ||
- !guest_io_okay(port, op_bytes, v, regs) )
+ if ( (data_limit < (op_bytes - 1)) ||
+ (rd_ad(esi) > (data_limit - (op_bytes - 1))) ||
+ !guest_io_okay(port, op_bytes, v, regs) )
goto fail;
- rc = copy_from_user(&data, (void *)data_base + rd_ad(esi), op_bytes);
- if ( rc != 0 )
+ if ( (rc = copy_from_user(&data, (void *)data_base + rd_ad(esi),
+ op_bytes)) != 0 )
{
- propagate_page_fault(data_base + rd_ad(esi) + op_bytes - rc, 0);
+ propagate_page_fault(data_base + rd_ad(esi)
+ + op_bytes - rc, 0);
return EXCRET_fault_fixed;
}
- switch ( op_bytes )
- {
- case 1:
- if ( guest_outb_okay(port, v, regs) )
- {
- outb((u8)data, port);
- if ( pv_post_outb_hook )
- pv_post_outb_hook(port, data);
- }
- else if ( port == 0x42 || port == 0x43 || port == 0x61 )
- pv_pit_handler(port, data, 1);
- break;
- case 2:
- if ( guest_outw_okay(port, v, regs) )
- outw((u16)data, port);
- break;
- case 4:
- if ( guest_outl_okay(port, v, regs) )
- outl((u32)data, port);
- break;
- }
- wr_ad(esi, regs->esi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes));
+ guest_io_write(port, op_bytes, data, v, regs);
+ wr_ad(esi, regs->esi + (int)((regs->eflags & EF_DF)
+ ? -op_bytes : op_bytes));
break;
}
@@ -1727,31 +1810,17 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
exec_in:
if ( !guest_io_okay(port, op_bytes, v, regs) )
goto fail;
- switch ( op_bytes )
+ if ( admin_io_okay(port, op_bytes, v, regs) )
{
- case 1:
- if ( guest_inb_okay(port, v, regs) )
- io_emul(regs);
- else if ( port == 0x42 || port == 0x43 || port == 0x61 )
- {
- regs->eax &= ~0xffUL;
- regs->eax |= pv_pit_handler(port, 0, 0);
- }
- else
- regs->eax |= (u8)~0;
- break;
- case 2:
- if ( guest_inw_okay(port, v, regs) )
- io_emul(regs);
- else
- regs->eax |= (u16)~0;
- break;
- case 4:
- if ( guest_inl_okay(port, v, regs) )
- io_emul(regs);
+ io_emul(regs);
+ }
+ else
+ {
+ if ( op_bytes == 4 )
+ regs->eax = 0;
else
- regs->eax = (u32)~0;
- break;
+ regs->eax &= ~((1u << (op_bytes * 8)) - 1);
+ regs->eax |= guest_io_read(port, op_bytes, v, regs);
}
bpmatch = check_guest_io_breakpoint(v, port, op_bytes);
goto done;
@@ -1770,26 +1839,15 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
exec_out:
if ( !guest_io_okay(port, op_bytes, v, regs) )
goto fail;
- switch ( op_bytes )
+ if ( admin_io_okay(port, op_bytes, v, regs) )
{
- case 1:
- if ( guest_outb_okay(port, v, regs) )
- {
- io_emul(regs);
- if ( pv_post_outb_hook )
- pv_post_outb_hook(port, regs->eax);
- }
- else if ( port == 0x42 || port == 0x43 || port == 0x61 )
- pv_pit_handler(port, regs->eax, 1);
- break;
- case 2:
- if ( guest_outw_okay(port, v, regs) )
- io_emul(regs);
- break;
- case 4:
- if ( guest_outl_okay(port, v, regs) )
- io_emul(regs);
- break;
+ io_emul(regs);
+ if ( (op_bytes == 1) && pv_post_outb_hook )
+ pv_post_outb_hook(port, regs->eax);
+ }
+ else
+ {
+ guest_io_write(port, op_bytes, regs->eax, v, regs);
}
bpmatch = check_guest_io_breakpoint(v, port, op_bytes);
goto done;
@@ -1921,14 +1979,14 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
break;
case 3: /* Write CR3 */
- LOCK_BIGLOCK(v->domain);
+ domain_lock(v->domain);
if ( !is_pv_32on64_vcpu(v) )
rc = new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg)));
#ifdef CONFIG_COMPAT
else
rc = new_guest_cr3(gmfn_to_mfn(v->domain, compat_cr3_to_pfn(*reg)));
#endif
- UNLOCK_BIGLOCK(v->domain);
+ domain_unlock(v->domain);
if ( rc == 0 ) /* not okay */
goto fail;
break;
@@ -2137,8 +2195,8 @@ static void emulate_gate_op(struct cpu_user_regs *regs)
/* Check whether this fault is due to the use of a call gate. */
if ( !read_gate_descriptor(regs->error_code, v, &sel, &off, &ar) ||
- ((ar >> 13) & 3) < (regs->cs & 3) ||
- (ar & _SEGMENT_TYPE) != 0xc00 )
+ (((ar >> 13) & 3) < (regs->cs & 3)) ||
+ ((ar & _SEGMENT_TYPE) != 0xc00) )
{
do_guest_trap(TRAP_gp_fault, regs, 1);
return;
@@ -2232,15 +2290,18 @@ static void emulate_gate_op(struct cpu_user_regs *regs)
{
if ( (modrm & 7) == 4 )
{
- unsigned int sib = insn_fetch(u8, base, eip, limit);
+ unsigned int sib;
+ sib = insn_fetch(u8, base, eip, limit);
modrm = (modrm & ~7) | (sib & 7);
if ( (sib >>= 3) != 4 )
- opnd_off = *(unsigned long *)decode_register(sib & 7, regs, 0);
+ opnd_off = *(unsigned long *)
+ decode_register(sib & 7, regs, 0);
opnd_off <<= sib >> 3;
}
if ( (modrm & 7) != 5 || (modrm & 0xc0) )
- opnd_off += *(unsigned long *)decode_register(modrm & 7, regs, 0);
+ opnd_off += *(unsigned long *)
+ decode_register(modrm & 7, regs, 0);
else
modrm |= 0x87;
if ( !opnd_sel )
@@ -2576,12 +2637,14 @@ asmlinkage void do_general_protection(struct cpu_user_regs *regs)
panic("GENERAL PROTECTION FAULT\n[error_code=%04x]\n", regs->error_code);
}
-static void nmi_softirq(void)
+static void nmi_action(unsigned long unused)
{
/* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
vcpu_kick(dom0->vcpu[0]);
}
+static DECLARE_TASKLET(nmi_tasklet, nmi_action, 0);
+
static void nmi_dom0_report(unsigned int reason_idx)
{
struct domain *d;
@@ -2593,7 +2656,7 @@ static void nmi_dom0_report(unsigned int reason_idx)
set_bit(reason_idx, nmi_reason(d));
if ( !test_and_set_bool(v->nmi_pending) )
- raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */
+ tasklet_schedule(&nmi_tasklet); /* not safe to wake a vcpu here */
}
asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
@@ -2871,8 +2934,6 @@ void __init trap_init(void)
percpu_traps_init();
cpu_init();
-
- open_softirq(NMI_SOFTIRQ, nmi_softirq);
}
long register_guest_nmi_callback(unsigned long address)
diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c
index 256f7a5ac8..a1de1bab27 100644
--- a/xen/arch/x86/x86_64/compat/mm.c
+++ b/xen/arch/x86/x86_64/compat/mm.c
@@ -28,12 +28,12 @@ int compat_set_gdt(XEN_GUEST_HANDLE(uint) frame_list, unsigned int entries)
guest_handle_add_offset(frame_list, 1);
}
- LOCK_BIGLOCK(current->domain);
+ domain_lock(current->domain);
if ( (ret = set_gdt(current, frames, entries)) == 0 )
flush_tlb_local();
- UNLOCK_BIGLOCK(current->domain);
+ domain_unlock(current->domain);
return ret;
}
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index f9f33e0a88..3d79657989 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -59,7 +59,7 @@ void *alloc_xen_pagetable(void)
if ( !early_boot )
{
- struct page_info *pg = alloc_domheap_page(NULL);
+ struct page_info *pg = alloc_domheap_page(NULL, 0);
BUG_ON(pg == NULL);
return page_to_virt(pg);
}
@@ -108,7 +108,7 @@ void __init paging_init(void)
struct page_info *l1_pg, *l2_pg, *l3_pg;
/* Create user-accessible L2 directory to map the MPT for guests. */
- if ( (l3_pg = alloc_domheap_page(NULL)) == NULL )
+ if ( (l3_pg = alloc_domheap_page(NULL, 0)) == NULL )
goto nomem;
l3_ro_mpt = page_to_virt(l3_pg);
clear_page(l3_ro_mpt);
@@ -134,7 +134,7 @@ void __init paging_init(void)
1UL << L2_PAGETABLE_SHIFT);
if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
{
- if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
+ if ( (l2_pg = alloc_domheap_page(NULL, 0)) == NULL )
goto nomem;
va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
l2_ro_mpt = page_to_virt(l2_pg);
@@ -154,7 +154,7 @@ void __init paging_init(void)
l4_table_offset(HIRO_COMPAT_MPT_VIRT_START));
l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(
HIRO_COMPAT_MPT_VIRT_START)]);
- if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
+ if ( (l2_pg = alloc_domheap_page(NULL, 0)) == NULL )
goto nomem;
compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg);
clear_page(l2_ro_mpt);