aboutsummaryrefslogtreecommitdiffstats
path: root/xen
diff options
context:
space:
mode:
Diffstat (limited to 'xen')
-rw-r--r--xen/arch/ia64/linux-xen/sn/kernel/irq.c2
-rw-r--r--xen/arch/ia64/vmx/vmx_hypercall.c5
-rw-r--r--xen/arch/ia64/xen/mm.c10
-rw-r--r--xen/arch/ia64/xen/tlb_track.c4
-rw-r--r--xen/arch/ia64/xen/xensetup.c2
-rw-r--r--xen/arch/x86/Makefile1
-rw-r--r--xen/arch/x86/acpi/boot.c62
-rw-r--r--xen/arch/x86/cpu/amd.c18
-rw-r--r--xen/arch/x86/domain.c25
-rw-r--r--xen/arch/x86/domain_build.c4
-rw-r--r--xen/arch/x86/hvm/emulate.c81
-rw-r--r--xen/arch/x86/hvm/hpet.c21
-rw-r--r--xen/arch/x86/hvm/hvm.c192
-rw-r--r--xen/arch/x86/hvm/i8254.c68
-rw-r--r--xen/arch/x86/hvm/intercept.c161
-rw-r--r--xen/arch/x86/hvm/io.c84
-rw-r--r--xen/arch/x86/hvm/mtrr.c2
-rw-r--r--xen/arch/x86/hvm/pmtimer.c8
-rw-r--r--xen/arch/x86/hvm/rtc.c8
-rw-r--r--xen/arch/x86/hvm/stdvga.c26
-rw-r--r--xen/arch/x86/hvm/svm/svm.c7
-rw-r--r--xen/arch/x86/hvm/vioapic.c54
-rw-r--r--xen/arch/x86/hvm/vlapic.c303
-rw-r--r--xen/arch/x86/hvm/vmx/realmode.c2
-rw-r--r--xen/arch/x86/hvm/vmx/vmcs.c85
-rw-r--r--xen/arch/x86/hvm/vmx/vmx.c295
-rw-r--r--xen/arch/x86/hvm/vmx/x86_32/exits.S2
-rw-r--r--xen/arch/x86/hvm/vmx/x86_64/exits.S2
-rw-r--r--xen/arch/x86/hvm/vpic.c40
-rw-r--r--xen/arch/x86/io_apic.c9
-rw-r--r--xen/arch/x86/mm.c51
-rw-r--r--xen/arch/x86/mm/hap/Makefile1
-rw-r--r--xen/arch/x86/mm/hap/hap.c12
-rw-r--r--xen/arch/x86/mm/hap/p2m-ept.c257
-rw-r--r--xen/arch/x86/mm/p2m.c357
-rw-r--r--xen/arch/x86/mm/paging.c15
-rw-r--r--xen/arch/x86/mm/shadow/common.c8
-rw-r--r--xen/arch/x86/pci.c118
-rw-r--r--xen/arch/x86/setup.c2
-rw-r--r--xen/arch/x86/time.c142
-rw-r--r--xen/arch/x86/traps.c295
-rw-r--r--xen/arch/x86/x86_64/compat/mm.c4
-rw-r--r--xen/arch/x86/x86_64/mm.c8
-rw-r--r--xen/common/compat/domain.c4
-rw-r--r--xen/common/compat/grant_table.c20
-rw-r--r--xen/common/domain.c10
-rw-r--r--xen/common/domctl.c7
-rw-r--r--xen/common/event_channel.c12
-rw-r--r--xen/common/grant_table.c6
-rw-r--r--xen/common/keyhandler.c8
-rw-r--r--xen/common/memory.c25
-rw-r--r--xen/common/page_alloc.c27
-rw-r--r--xen/common/softirq.c102
-rw-r--r--xen/common/trace.c9
-rw-r--r--xen/common/xencomm.c1
-rw-r--r--xen/drivers/passthrough/amd/iommu_detect.c37
-rw-r--r--xen/drivers/passthrough/amd/iommu_init.c2
-rw-r--r--xen/drivers/passthrough/amd/iommu_map.c74
-rw-r--r--xen/drivers/passthrough/amd/pci_amd_iommu.c11
-rw-r--r--xen/drivers/passthrough/io.c72
-rw-r--r--xen/drivers/passthrough/iommu.c2
-rw-r--r--xen/drivers/passthrough/pci-direct.h48
-rw-r--r--xen/drivers/passthrough/vtd/Makefile2
-rw-r--r--xen/drivers/passthrough/vtd/dmar.c26
-rw-r--r--xen/drivers/passthrough/vtd/dmar.h8
-rw-r--r--xen/drivers/passthrough/vtd/intremap.c58
-rw-r--r--xen/drivers/passthrough/vtd/iommu.c476
-rw-r--r--xen/drivers/passthrough/vtd/iommu.h4
-rw-r--r--xen/drivers/passthrough/vtd/qinval.c89
-rw-r--r--xen/drivers/passthrough/vtd/utils.c69
-rw-r--r--xen/drivers/passthrough/vtd/vtd.h9
-rw-r--r--xen/drivers/passthrough/vtd/x86/Makefile1
-rw-r--r--xen/drivers/passthrough/vtd/x86/vtd.c303
-rw-r--r--xen/include/asm-ia64/domain.h2
-rw-r--r--xen/include/asm-ia64/linux-xen/asm/sn/README.origin1
-rw-r--r--xen/include/asm-ia64/linux-xen/asm/sn/pcibr_provider.h2
-rw-r--r--xen/include/asm-ia64/linux-xen/asm/sn/pcidev.h (renamed from xen/include/asm-ia64/linux/asm/sn/pcidev.h)4
-rw-r--r--xen/include/asm-ia64/linux-xen/linux/interrupt.h2
-rw-r--r--xen/include/asm-ia64/linux-xen/linux/linux-pci.h (renamed from xen/include/asm-ia64/linux-xen/linux/pci.h)0
-rw-r--r--xen/include/asm-ia64/linux/asm/sn/README.origin1
-rw-r--r--xen/include/asm-x86/domain.h26
-rw-r--r--xen/include/asm-x86/hvm/domain.h12
-rw-r--r--xen/include/asm-x86/hvm/hvm.h4
-rw-r--r--xen/include/asm-x86/hvm/io.h21
-rw-r--r--xen/include/asm-x86/hvm/svm/amd-iommu-proto.h1
-rw-r--r--xen/include/asm-x86/hvm/svm/vmcb.h6
-rw-r--r--xen/include/asm-x86/hvm/vcpu.h6
-rw-r--r--xen/include/asm-x86/hvm/vlapic.h2
-rw-r--r--xen/include/asm-x86/hvm/vmx/vmcs.h47
-rw-r--r--xen/include/asm-x86/hvm/vmx/vmx.h126
-rw-r--r--xen/include/asm-x86/mm.h3
-rw-r--r--xen/include/asm-x86/numa.h1
-rw-r--r--xen/include/asm-x86/p2m.h80
-rw-r--r--xen/include/asm-x86/paging.h2
-rw-r--r--xen/include/public/hvm/params.h8
-rw-r--r--xen/include/xen/hvm/iommu.h5
-rw-r--r--xen/include/xen/hypercall.h1
-rw-r--r--xen/include/xen/iommu.h6
-rw-r--r--xen/include/xen/mm.h7
-rw-r--r--xen/include/xen/numa.h7
-rw-r--r--xen/include/xen/pci.h29
-rw-r--r--xen/include/xen/sched.h9
-rw-r--r--xen/include/xen/softirq.h59
-rw-r--r--xen/include/xen/xencomm.h8
-rw-r--r--xen/xsm/acm/acm_chinesewall_hooks.c8
-rw-r--r--xen/xsm/acm/acm_simple_type_enforcement_hooks.c12
106 files changed, 3099 insertions, 1784 deletions
diff --git a/xen/arch/ia64/linux-xen/sn/kernel/irq.c b/xen/arch/ia64/linux-xen/sn/kernel/irq.c
index 3feeccbd94..acdc996b4e 100644
--- a/xen/arch/ia64/linux-xen/sn/kernel/irq.c
+++ b/xen/arch/ia64/linux-xen/sn/kernel/irq.c
@@ -12,7 +12,7 @@
#include <linux/spinlock.h>
#include <linux/init.h>
#ifdef XEN
-#include <linux/pci.h>
+#include <linux/linux-pci.h>
#include <asm/hw_irq.h>
#endif
#include <asm/sn/addrs.h>
diff --git a/xen/arch/ia64/vmx/vmx_hypercall.c b/xen/arch/ia64/vmx/vmx_hypercall.c
index 97c8d5f990..84510d7b78 100644
--- a/xen/arch/ia64/vmx/vmx_hypercall.c
+++ b/xen/arch/ia64/vmx/vmx_hypercall.c
@@ -165,6 +165,11 @@ do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
iorp = &d->arch.hvm_domain.buf_pioreq;
rc = vmx_set_ioreq_page(d, iorp, a.value);
break;
+ case HVM_PARAM_DM_DOMAIN:
+ if (a.value == DOMID_SELF)
+ a.value = current->domain->domain_id;
+ rc = a.value ? -EINVAL : 0; /* no stub domain support */
+ break;
default:
/* nothing */
break;
diff --git a/xen/arch/ia64/xen/mm.c b/xen/arch/ia64/xen/mm.c
index d51912fc21..5c8c6dd8de 100644
--- a/xen/arch/ia64/xen/mm.c
+++ b/xen/arch/ia64/xen/mm.c
@@ -820,7 +820,7 @@ __assign_new_domain_page(struct domain *d, unsigned long mpaddr,
BUG_ON(!pte_none(*pte));
- p = alloc_domheap_page(d);
+ p = alloc_domheap_page(d, 0);
if (unlikely(!p)) {
printk("assign_new_domain_page: Can't alloc!!!! Aaaargh!\n");
return(p);
@@ -2315,7 +2315,7 @@ steal_page(struct domain *d, struct page_info *page, unsigned int memflags)
unsigned long new_mfn;
int ret;
- new = alloc_domheap_page(d);
+ new = alloc_domheap_page(d, 0);
if (new == NULL) {
gdprintk(XENLOG_INFO, "alloc_domheap_page() failed\n");
return -1;
@@ -2602,7 +2602,7 @@ void *pgtable_quicklist_alloc(void)
BUG_ON(dom_p2m == NULL);
if (!opt_p2m_xenheap) {
- struct page_info *page = alloc_domheap_page(dom_p2m);
+ struct page_info *page = alloc_domheap_page(dom_p2m, 0);
if (page == NULL)
return NULL;
p = page_to_virt(page);
@@ -2827,7 +2827,7 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
return -EINVAL;
}
- LOCK_BIGLOCK(d);
+ domain_lock(d);
/* Check remapping necessity */
prev_mfn = gmfn_to_mfn(d, xatp.gpfn);
@@ -2853,7 +2853,7 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
guest_physmap_add_page(d, xatp.gpfn, mfn);
out:
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
rcu_unlock_domain(d);
diff --git a/xen/arch/ia64/xen/tlb_track.c b/xen/arch/ia64/xen/tlb_track.c
index 50ef084818..d9e30a2db5 100644
--- a/xen/arch/ia64/xen/tlb_track.c
+++ b/xen/arch/ia64/xen/tlb_track.c
@@ -48,7 +48,7 @@ tlb_track_allocate_entries(struct tlb_track* tlb_track)
__func__, tlb_track->num_entries, tlb_track->limit);
return -ENOMEM;
}
- entry_page = alloc_domheap_page(NULL);
+ entry_page = alloc_domheap_page(NULL, 0);
if (entry_page == NULL) {
dprintk(XENLOG_WARNING,
"%s: domheap page failed. num_entries %d limit %d\n",
@@ -84,7 +84,7 @@ tlb_track_create(struct domain* d)
if (tlb_track == NULL)
goto out;
- hash_page = alloc_domheap_page(NULL);
+ hash_page = alloc_domheap_page(NULL, 0);
if (hash_page == NULL)
goto out;
diff --git a/xen/arch/ia64/xen/xensetup.c b/xen/arch/ia64/xen/xensetup.c
index c3aa6bfb46..d6aa79c246 100644
--- a/xen/arch/ia64/xen/xensetup.c
+++ b/xen/arch/ia64/xen/xensetup.c
@@ -576,6 +576,8 @@ skip_move:
end_boot_allocator();
+ softirq_init();
+
late_setup_arch(&cmdline);
scheduler_init();
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 086a7b530f..334a996eb6 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -31,6 +31,7 @@ obj-y += mm.o
obj-y += mpparse.o
obj-y += nmi.o
obj-y += numa.o
+obj-y += pci.o
obj-y += physdev.o
obj-y += rwlock.o
obj-y += setup.o
diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c
index cfe87671e9..9a17d61e3b 100644
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -374,6 +374,18 @@ extern u32 pmtmr_ioport;
#endif
#ifdef CONFIG_ACPI_SLEEP
+#define acpi_fadt_copy_address(dst, src, len) do { \
+ if (fadt->header.revision >= FADT2_REVISION_ID) \
+ acpi_sinfo.dst##_blk = fadt->x##src##_block; \
+ if (!acpi_sinfo.dst##_blk.address) { \
+ acpi_sinfo.dst##_blk.address = fadt->src##_block; \
+ acpi_sinfo.dst##_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO; \
+ acpi_sinfo.dst##_blk.bit_width = fadt->len##_length << 3; \
+ acpi_sinfo.dst##_blk.bit_offset = 0; \
+ acpi_sinfo.dst##_blk.access_width = 0; \
+ } \
+} while (0)
+
/* Get pm1x_cnt and pm1x_evt information for ACPI sleep */
static void __init
acpi_fadt_parse_sleep_info(struct acpi_table_fadt *fadt)
@@ -388,37 +400,18 @@ acpi_fadt_parse_sleep_info(struct acpi_table_fadt *fadt)
goto bad;
rsdp = __va(rsdp_phys);
- if (fadt->header.revision >= FADT2_REVISION_ID) {
- memcpy(&acpi_sinfo.pm1a_cnt_blk, &fadt->xpm1a_control_block,
- sizeof(struct acpi_generic_address));
- memcpy(&acpi_sinfo.pm1b_cnt_blk, &fadt->xpm1b_control_block,
- sizeof(struct acpi_generic_address));
- memcpy(&acpi_sinfo.pm1a_evt_blk, &fadt->xpm1a_event_block,
- sizeof(struct acpi_generic_address));
- memcpy(&acpi_sinfo.pm1b_evt_blk, &fadt->xpm1b_event_block,
- sizeof(struct acpi_generic_address));
- } else {
- acpi_sinfo.pm1a_cnt_blk.address = fadt->pm1a_control_block;
- acpi_sinfo.pm1b_cnt_blk.address = fadt->pm1b_control_block;
- acpi_sinfo.pm1a_evt_blk.address = fadt->pm1a_event_block;
- acpi_sinfo.pm1b_evt_blk.address = fadt->pm1b_event_block;
- acpi_sinfo.pm1a_cnt_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
- acpi_sinfo.pm1b_cnt_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
- acpi_sinfo.pm1a_evt_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
- acpi_sinfo.pm1b_evt_blk.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
- acpi_sinfo.pm1a_cnt_blk.bit_width = 16;
- acpi_sinfo.pm1b_cnt_blk.bit_width = 16;
- acpi_sinfo.pm1a_evt_blk.bit_width = 16;
- acpi_sinfo.pm1b_evt_blk.bit_width = 16;
- acpi_sinfo.pm1a_cnt_blk.bit_offset = 0;
- acpi_sinfo.pm1b_cnt_blk.bit_offset = 0;
- acpi_sinfo.pm1a_evt_blk.bit_offset = 0;
- acpi_sinfo.pm1b_evt_blk.bit_offset = 0;
- acpi_sinfo.pm1a_cnt_blk.access_width = 0;
- acpi_sinfo.pm1b_cnt_blk.access_width = 0;
- acpi_sinfo.pm1a_evt_blk.access_width = 0;
- acpi_sinfo.pm1b_evt_blk.access_width = 0;
- }
+ acpi_fadt_copy_address(pm1a_cnt, pm1a_control, pm1_control);
+ acpi_fadt_copy_address(pm1b_cnt, pm1b_control, pm1_control);
+ acpi_fadt_copy_address(pm1a_evt, pm1a_event, pm1_event);
+ acpi_fadt_copy_address(pm1b_evt, pm1b_event, pm1_event);
+
+ printk(KERN_INFO PREFIX
+ "ACPI SLEEP INFO: pm1x_cnt[%"PRIx64",%"PRIx64"], "
+ "pm1x_evt[%"PRIx64",%"PRIx64"]\n",
+ acpi_sinfo.pm1a_cnt_blk.address,
+ acpi_sinfo.pm1b_cnt_blk.address,
+ acpi_sinfo.pm1a_evt_blk.address,
+ acpi_sinfo.pm1b_evt_blk.address);
/* Now FACS... */
if (fadt->header.revision >= FADT2_REVISION_ID)
@@ -461,13 +454,6 @@ acpi_fadt_parse_sleep_info(struct acpi_table_fadt *fadt)
}
printk(KERN_INFO PREFIX
- "ACPI SLEEP INFO: pm1x_cnt[%"PRIx64",%"PRIx64"], "
- "pm1x_evt[%"PRIx64",%"PRIx64"]\n",
- acpi_sinfo.pm1a_cnt_blk.address,
- acpi_sinfo.pm1b_cnt_blk.address,
- acpi_sinfo.pm1a_evt_blk.address,
- acpi_sinfo.pm1b_evt_blk.address);
- printk(KERN_INFO PREFIX
" wakeup_vec[%"PRIx64"], vec_size[%x]\n",
acpi_sinfo.wakeup_vector, acpi_sinfo.vector_width);
return;
diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
index 909a73f3fa..f0253152bc 100644
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -3,6 +3,7 @@
#include <xen/bitops.h>
#include <xen/mm.h>
#include <xen/smp.h>
+#include <xen/pci.h>
#include <asm/io.h>
#include <asm/msr.h>
#include <asm/processor.h>
@@ -66,19 +67,6 @@ static int c1_ramping_may_cause_clock_drift(struct cpuinfo_x86 *c)
return 1;
}
-/* PCI access functions. Should be safe to use 0xcf8/0xcfc port accesses here. */
-static u8 pci_read_byte(u32 bus, u32 dev, u32 fn, u32 reg)
-{
- outl((1U<<31) | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3), 0xcf8);
- return inb(0xcfc + (reg & 3));
-}
-
-static void pci_write_byte(u32 bus, u32 dev, u32 fn, u32 reg, u8 val)
-{
- outl((1U<<31) | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3), 0xcf8);
- outb(val, 0xcfc + (reg & 3));
-}
-
/*
* Disable C1-Clock ramping if enabled in PMM7.CpuLowPwrEnh on 8th-generation
* cores only. Assume BIOS has setup all Northbridges equivalently.
@@ -90,12 +78,12 @@ static void disable_c1_ramping(void)
for (node=0; node < NR_CPUS; node++) {
/* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */
- pmm7 = pci_read_byte(0, 0x18+node, 0x3, 0x87);
+ pmm7 = pci_conf_read8(0, 0x18+node, 0x3, 0x87);
/* Invalid read means we've updated every Northbridge. */
if (pmm7 == 0xFF)
break;
pmm7 &= 0xFC; /* clear pmm7[1:0] */
- pci_write_byte(0, 0x18+node, 0x3, 0x87, pmm7);
+ pci_conf_write8(0, 0x18+node, 0x3, 0x87, pmm7);
printk ("AMD: Disabling C1 Clock Ramping Node #%x\n", node);
}
}
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index c56db37b37..4418c51ff9 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -46,6 +46,7 @@
#include <asm/debugreg.h>
#include <asm/msr.h>
#include <asm/nmi.h>
+#include <xen/numa.h>
#include <xen/iommu.h>
#ifdef CONFIG_COMPAT
#include <compat/vcpu.h>
@@ -171,7 +172,7 @@ int setup_arg_xlat_area(struct vcpu *v, l4_pgentry_t *l4tab)
if ( !d->arch.mm_arg_xlat_l3 )
{
- pg = alloc_domheap_page(NULL);
+ pg = alloc_domheap_page(NULL, 0);
if ( !pg )
return -ENOMEM;
d->arch.mm_arg_xlat_l3 = page_to_virt(pg);
@@ -189,7 +190,7 @@ int setup_arg_xlat_area(struct vcpu *v, l4_pgentry_t *l4tab)
if ( !l3e_get_intpte(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]) )
{
- pg = alloc_domheap_page(NULL);
+ pg = alloc_domheap_page(NULL, 0);
if ( !pg )
return -ENOMEM;
clear_page(page_to_virt(pg));
@@ -198,7 +199,7 @@ int setup_arg_xlat_area(struct vcpu *v, l4_pgentry_t *l4tab)
l2tab = l3e_to_l2e(d->arch.mm_arg_xlat_l3[l3_table_offset(va)]);
if ( !l2e_get_intpte(l2tab[l2_table_offset(va)]) )
{
- pg = alloc_domheap_page(NULL);
+ pg = alloc_domheap_page(NULL, 0);
if ( !pg )
return -ENOMEM;
clear_page(page_to_virt(pg));
@@ -206,7 +207,7 @@ int setup_arg_xlat_area(struct vcpu *v, l4_pgentry_t *l4tab)
}
l1tab = l2e_to_l1e(l2tab[l2_table_offset(va)]);
BUG_ON(l1e_get_intpte(l1tab[l1_table_offset(va)]));
- pg = alloc_domheap_page(NULL);
+ pg = alloc_domheap_page(NULL, 0);
if ( !pg )
return -ENOMEM;
l1tab[l1_table_offset(va)] = l1e_from_page(pg, PAGE_HYPERVISOR);
@@ -252,7 +253,7 @@ static void release_arg_xlat_area(struct domain *d)
static int setup_compat_l4(struct vcpu *v)
{
- struct page_info *pg = alloc_domheap_page(NULL);
+ struct page_info *pg = alloc_domheap_page(NULL, 0);
l4_pgentry_t *l4tab;
int rc;
@@ -477,7 +478,8 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
#else /* __x86_64__ */
- if ( (pg = alloc_domheap_page(NULL)) == NULL )
+ pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+ if ( pg == NULL )
goto fail;
d->arch.mm_perdomain_l2 = page_to_virt(pg);
clear_page(d->arch.mm_perdomain_l2);
@@ -486,7 +488,8 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
__PAGE_HYPERVISOR);
- if ( (pg = alloc_domheap_page(NULL)) == NULL )
+ pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+ if ( pg == NULL )
goto fail;
d->arch.mm_perdomain_l3 = page_to_virt(pg);
clear_page(d->arch.mm_perdomain_l3);
@@ -500,13 +503,15 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
HYPERVISOR_COMPAT_VIRT_START(d) = __HYPERVISOR_COMPAT_VIRT_START;
#endif
- paging_domain_init(d);
+ if ( (rc = paging_domain_init(d)) != 0 )
+ goto fail;
paging_initialised = 1;
if ( !is_idle_domain(d) )
{
d->arch.ioport_caps =
rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex);
+ rc = -ENOMEM;
if ( d->arch.ioport_caps == NULL )
goto fail;
@@ -946,9 +951,9 @@ arch_do_vcpu_op(
if ( copy_from_guest(&info, arg, 1) )
break;
- LOCK_BIGLOCK(d);
+ domain_lock(d);
rc = map_vcpu_info(v, info.mfn, info.offset);
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
break;
}
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index dc8ee52f07..56106bae2f 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -630,7 +630,7 @@ int __init construct_dom0(
}
else
{
- page = alloc_domheap_page(NULL);
+ page = alloc_domheap_page(NULL, 0);
if ( !page )
panic("Not enough RAM for domain 0 PML4.\n");
l4start = l4tab = page_to_virt(page);
@@ -957,6 +957,8 @@ int __init construct_dom0(
rc |= ioports_deny_access(dom0, 0x40, 0x43);
/* PIT Channel 2 / PC Speaker Control. */
rc |= ioports_deny_access(dom0, 0x61, 0x61);
+ /* PCI configuration spaces. */
+ rc |= ioports_deny_access(dom0, 0xcf8, 0xcff);
/* Command-line I/O ranges. */
process_dom0_ioports_disable();
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 57065f7625..d7bf9f3f2f 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -20,12 +20,13 @@
#include <asm/hvm/support.h>
static int hvmemul_do_io(
- int is_mmio, paddr_t addr, unsigned long count, int size,
+ int is_mmio, paddr_t addr, unsigned long *reps, int size,
paddr_t value, int dir, int df, int value_is_ptr, unsigned long *val)
{
struct vcpu *curr = current;
vcpu_iodata_t *vio = get_ioreq(curr);
ioreq_t *p = &vio->vp_ioreq;
+ int rc;
switch ( curr->arch.hvm_vcpu.io_state )
{
@@ -41,52 +42,72 @@ static int hvmemul_do_io(
return X86EMUL_UNHANDLEABLE;
}
- curr->arch.hvm_vcpu.io_state =
- (val == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion;
-
if ( p->state != STATE_IOREQ_NONE )
+ {
gdprintk(XENLOG_WARNING, "WARNING: io already pending (%d)?\n",
p->state);
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ curr->arch.hvm_vcpu.io_state =
+ (val == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion;
p->dir = dir;
p->data_is_ptr = value_is_ptr;
p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO;
p->size = size;
p->addr = addr;
- p->count = count;
+ p->count = *reps;
p->df = df;
p->data = value;
p->io_count++;
- if ( is_mmio
- ? (hvm_mmio_intercept(p) || hvm_buffered_io_intercept(p))
- : hvm_portio_intercept(p) )
+ if ( is_mmio )
{
+ rc = hvm_mmio_intercept(p);
+ if ( rc == X86EMUL_UNHANDLEABLE )
+ rc = hvm_buffered_io_intercept(p);
+ }
+ else
+ {
+ rc = hvm_portio_intercept(p);
+ }
+
+ switch ( rc )
+ {
+ case X86EMUL_OKAY:
+ case X86EMUL_RETRY:
+ *reps = p->count;
p->state = STATE_IORESP_READY;
hvm_io_assist();
if ( val != NULL )
*val = curr->arch.hvm_vcpu.io_data;
curr->arch.hvm_vcpu.io_state = HVMIO_none;
- return X86EMUL_OKAY;
+ break;
+ case X86EMUL_UNHANDLEABLE:
+ hvm_send_assist_req(curr);
+ rc = (val != NULL) ? X86EMUL_RETRY : X86EMUL_OKAY;
+ break;
+ default:
+ BUG();
}
- hvm_send_assist_req(curr);
- return (val != NULL) ? X86EMUL_RETRY : X86EMUL_OKAY;
+ return rc;
}
static int hvmemul_do_pio(
- unsigned long port, unsigned long count, int size,
+ unsigned long port, unsigned long *reps, int size,
paddr_t value, int dir, int df, int value_is_ptr, unsigned long *val)
{
- return hvmemul_do_io(0, port, count, size, value,
+ return hvmemul_do_io(0, port, reps, size, value,
dir, df, value_is_ptr, val);
}
static int hvmemul_do_mmio(
- paddr_t gpa, unsigned long count, int size,
+ paddr_t gpa, unsigned long *reps, int size,
paddr_t value, int dir, int df, int value_is_ptr, unsigned long *val)
{
- return hvmemul_do_io(1, gpa, count, size, value,
+ return hvmemul_do_io(1, gpa, reps, size, value,
dir, df, value_is_ptr, val);
}
@@ -206,7 +227,7 @@ static int __hvmemul_read(
struct hvm_emulate_ctxt *hvmemul_ctxt)
{
struct vcpu *curr = current;
- unsigned long addr;
+ unsigned long addr, reps = 1;
uint32_t pfec = PFEC_page_present;
paddr_t gpa;
int rc;
@@ -226,7 +247,8 @@ static int __hvmemul_read(
return X86EMUL_UNHANDLEABLE;
gpa = (((paddr_t)curr->arch.hvm_vcpu.mmio_gpfn << PAGE_SHIFT) | off);
if ( (off + bytes) <= PAGE_SIZE )
- return hvmemul_do_mmio(gpa, 1, bytes, 0, IOREQ_READ, 0, 0, val);
+ return hvmemul_do_mmio(gpa, &reps, bytes, 0,
+ IOREQ_READ, 0, 0, val);
}
if ( (seg != x86_seg_none) &&
@@ -251,7 +273,7 @@ static int __hvmemul_read(
if ( rc != X86EMUL_OKAY )
return rc;
- return hvmemul_do_mmio(gpa, 1, bytes, 0, IOREQ_READ, 0, 0, val);
+ return hvmemul_do_mmio(gpa, &reps, bytes, 0, IOREQ_READ, 0, 0, val);
}
return X86EMUL_OKAY;
@@ -302,7 +324,7 @@ static int hvmemul_write(
struct hvm_emulate_ctxt *hvmemul_ctxt =
container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
struct vcpu *curr = current;
- unsigned long addr;
+ unsigned long addr, reps = 1;
uint32_t pfec = PFEC_page_present | PFEC_write_access;
paddr_t gpa;
int rc;
@@ -318,8 +340,8 @@ static int hvmemul_write(
unsigned int off = addr & (PAGE_SIZE - 1);
gpa = (((paddr_t)curr->arch.hvm_vcpu.mmio_gpfn << PAGE_SHIFT) | off);
if ( (off + bytes) <= PAGE_SIZE )
- return hvmemul_do_mmio(gpa, 1, bytes, val, IOREQ_WRITE,
- 0, 0, NULL);
+ return hvmemul_do_mmio(gpa, &reps, bytes, val,
+ IOREQ_WRITE, 0, 0, NULL);
}
if ( (seg != x86_seg_none) &&
@@ -339,7 +361,8 @@ static int hvmemul_write(
if ( rc != X86EMUL_OKAY )
return rc;
- return hvmemul_do_mmio(gpa, 1, bytes, val, IOREQ_WRITE, 0, 0, NULL);
+ return hvmemul_do_mmio(gpa, &reps, bytes, val,
+ IOREQ_WRITE, 0, 0, NULL);
}
return X86EMUL_OKAY;
@@ -386,7 +409,7 @@ static int hvmemul_rep_ins(
if ( rc != X86EMUL_OKAY )
return rc;
- return hvmemul_do_pio(src_port, *reps, bytes_per_rep, gpa, IOREQ_READ,
+ return hvmemul_do_pio(src_port, reps, bytes_per_rep, gpa, IOREQ_READ,
!!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL);
}
@@ -419,7 +442,7 @@ static int hvmemul_rep_outs(
if ( rc != X86EMUL_OKAY )
return rc;
- return hvmemul_do_pio(dst_port, *reps, bytes_per_rep, gpa, IOREQ_WRITE,
+ return hvmemul_do_pio(dst_port, reps, bytes_per_rep, gpa, IOREQ_WRITE,
!!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL);
}
@@ -469,14 +492,14 @@ static int hvmemul_rep_movs(
(void)gfn_to_mfn_current(sgpa >> PAGE_SHIFT, &p2mt);
if ( !p2m_is_ram(p2mt) )
return hvmemul_do_mmio(
- sgpa, *reps, bytes_per_rep, dgpa, IOREQ_READ,
+ sgpa, reps, bytes_per_rep, dgpa, IOREQ_READ,
!!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL);
(void)gfn_to_mfn_current(dgpa >> PAGE_SHIFT, &p2mt);
if ( p2m_is_ram(p2mt) )
return X86EMUL_UNHANDLEABLE;
return hvmemul_do_mmio(
- dgpa, *reps, bytes_per_rep, sgpa, IOREQ_WRITE,
+ dgpa, reps, bytes_per_rep, sgpa, IOREQ_WRITE,
!!(ctxt->regs->eflags & X86_EFLAGS_DF), 1, NULL);
}
@@ -513,7 +536,8 @@ static int hvmemul_read_io(
unsigned long *val,
struct x86_emulate_ctxt *ctxt)
{
- return hvmemul_do_pio(port, 1, bytes, 0, IOREQ_READ, 0, 0, val);
+ unsigned long reps = 1;
+ return hvmemul_do_pio(port, &reps, bytes, 0, IOREQ_READ, 0, 0, val);
}
static int hvmemul_write_io(
@@ -522,7 +546,8 @@ static int hvmemul_write_io(
unsigned long val,
struct x86_emulate_ctxt *ctxt)
{
- return hvmemul_do_pio(port, 1, bytes, val, IOREQ_WRITE, 0, 0, NULL);
+ unsigned long reps = 1;
+ return hvmemul_do_pio(port, &reps, bytes, val, IOREQ_WRITE, 0, 0, NULL);
}
static int hvmemul_read_cr(
diff --git a/xen/arch/x86/hvm/hpet.c b/xen/arch/x86/hvm/hpet.c
index 49ca998d37..03dfbf3bd8 100644
--- a/xen/arch/x86/hvm/hpet.c
+++ b/xen/arch/x86/hvm/hpet.c
@@ -150,8 +150,9 @@ static inline uint64_t hpet_read_maincounter(HPETState *h)
return h->hpet.mc64;
}
-static unsigned long hpet_read(
- struct vcpu *v, unsigned long addr, unsigned long length)
+static int hpet_read(
+ struct vcpu *v, unsigned long addr, unsigned long length,
+ unsigned long *pval)
{
HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
unsigned long result;
@@ -160,7 +161,10 @@ static unsigned long hpet_read(
addr &= HPET_MMAP_SIZE-1;
if ( hpet_check_access_length(addr, length) != 0 )
- return ~0UL;
+ {
+ result = ~0ul;
+ goto out;
+ }
spin_lock(&h->lock);
@@ -174,7 +178,9 @@ static unsigned long hpet_read(
spin_unlock(&h->lock);
- return result;
+ out:
+ *pval = result;
+ return X86EMUL_OKAY;
}
static void hpet_stop_timer(HPETState *h, unsigned int tn)
@@ -234,7 +240,7 @@ static inline uint64_t hpet_fixup_reg(
return new;
}
-static void hpet_write(
+static int hpet_write(
struct vcpu *v, unsigned long addr,
unsigned long length, unsigned long val)
{
@@ -245,7 +251,7 @@ static void hpet_write(
addr &= HPET_MMAP_SIZE-1;
if ( hpet_check_access_length(addr, length) != 0 )
- return;
+ goto out;
spin_lock(&h->lock);
@@ -349,6 +355,9 @@ static void hpet_write(
}
spin_unlock(&h->lock);
+
+ out:
+ return X86EMUL_OKAY;
}
static int hpet_range(struct vcpu *v, unsigned long addr)
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 961bfbf354..97a1aaa17c 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -181,7 +181,8 @@ void hvm_do_resume(struct vcpu *v)
break;
default:
gdprintk(XENLOG_ERR, "Weird HVM iorequest state %d.\n", p->state);
- domain_crash_synchronous();
+ domain_crash(v->domain);
+ return; /* bail */
}
}
}
@@ -276,7 +277,7 @@ static int hvm_print_line(
}
spin_unlock(&hd->pbuf_lock);
- return 1;
+ return X86EMUL_OKAY;
}
int hvm_domain_initialise(struct domain *d)
@@ -478,11 +479,11 @@ static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
vc = &v->arch.guest_context;
/* Need to init this vcpu before loading its contents */
- LOCK_BIGLOCK(d);
+ domain_lock(d);
if ( !v->is_initialised )
if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 )
return rc;
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
if ( hvm_load_entry(CPU, h, &ctxt) != 0 )
return -EINVAL;
@@ -687,47 +688,26 @@ void hvm_vcpu_destroy(struct vcpu *v)
/*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
}
-
-void hvm_vcpu_reset(struct vcpu *v)
-{
- vcpu_pause(v);
-
- vlapic_reset(vcpu_vlapic(v));
-
- hvm_funcs.vcpu_initialise(v);
-
- set_bit(_VPF_down, &v->pause_flags);
- clear_bit(_VPF_blocked, &v->pause_flags);
- v->fpu_initialised = 0;
- v->fpu_dirtied = 0;
- v->is_initialised = 0;
-
- vcpu_unpause(v);
-}
-
-static void hvm_vcpu_down(void)
+void hvm_vcpu_down(struct vcpu *v)
{
- struct vcpu *v = current;
struct domain *d = v->domain;
int online_count = 0;
- gdprintk(XENLOG_INFO, "VCPU%d: going offline.\n", v->vcpu_id);
-
/* Doesn't halt us immediately, but we'll never return to guest context. */
set_bit(_VPF_down, &v->pause_flags);
vcpu_sleep_nosync(v);
/* Any other VCPUs online? ... */
- LOCK_BIGLOCK(d);
+ domain_lock(d);
for_each_vcpu ( d, v )
if ( !test_bit(_VPF_down, &v->pause_flags) )
online_count++;
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
/* ... Shut down the domain if not. */
if ( online_count == 0 )
{
- gdprintk(XENLOG_INFO, "all CPUs offline -- powering off.\n");
+ gdprintk(XENLOG_INFO, "All CPUs offline -- powering off.\n");
domain_shutdown(d, SHUTDOWN_poweroff);
}
}
@@ -742,9 +722,10 @@ void hvm_send_assist_req(struct vcpu *v)
p = &get_ioreq(v)->vp_ioreq;
if ( unlikely(p->state != STATE_IOREQ_NONE) )
{
- /* This indicates a bug in the device model. Crash the domain. */
+ /* This indicates a bug in the device model. Crash the domain. */
gdprintk(XENLOG_ERR, "Device model set bad IO state %d.\n", p->state);
- domain_crash_synchronous();
+ domain_crash(v->domain);
+ return;
}
prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port);
@@ -765,7 +746,7 @@ void hvm_hlt(unsigned long rflags)
* out of this.
*/
if ( unlikely(!(rflags & X86_EFLAGS_IF)) )
- return hvm_vcpu_down();
+ return hvm_vcpu_down(current);
do_sched_op_compat(SCHEDOP_block, 0);
}
@@ -1894,79 +1875,6 @@ void hvm_hypercall_page_initialise(struct domain *d,
hvm_funcs.init_hypercall_page(d, hypercall_page);
}
-int hvm_bringup_ap(int vcpuid, int trampoline_vector)
-{
- struct domain *d = current->domain;
- struct vcpu *v;
- struct vcpu_guest_context *ctxt;
- struct segment_register reg;
-
- ASSERT(is_hvm_domain(d));
-
- if ( (v = d->vcpu[vcpuid]) == NULL )
- return -ENOENT;
-
- v->fpu_initialised = 0;
- v->arch.flags |= TF_kernel_mode;
- v->is_initialised = 1;
-
- ctxt = &v->arch.guest_context;
- memset(ctxt, 0, sizeof(*ctxt));
- ctxt->flags = VGCF_online;
- ctxt->user_regs.eflags = 2;
-
- v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET;
- hvm_update_guest_cr(v, 0);
-
- v->arch.hvm_vcpu.guest_cr[2] = 0;
- hvm_update_guest_cr(v, 2);
-
- v->arch.hvm_vcpu.guest_cr[3] = 0;
- hvm_update_guest_cr(v, 3);
-
- v->arch.hvm_vcpu.guest_cr[4] = 0;
- hvm_update_guest_cr(v, 4);
-
- v->arch.hvm_vcpu.guest_efer = 0;
- hvm_update_guest_efer(v);
-
- reg.sel = trampoline_vector << 8;
- reg.base = (uint32_t)reg.sel << 4;
- reg.limit = 0xffff;
- reg.attr.bytes = 0x89b;
- hvm_set_segment_register(v, x86_seg_cs, &reg);
-
- reg.sel = reg.base = 0;
- reg.limit = 0xffff;
- reg.attr.bytes = 0x893;
- hvm_set_segment_register(v, x86_seg_ds, &reg);
- hvm_set_segment_register(v, x86_seg_es, &reg);
- hvm_set_segment_register(v, x86_seg_fs, &reg);
- hvm_set_segment_register(v, x86_seg_gs, &reg);
- hvm_set_segment_register(v, x86_seg_ss, &reg);
-
- reg.attr.bytes = 0x82; /* LDT */
- hvm_set_segment_register(v, x86_seg_ldtr, &reg);
-
- reg.attr.bytes = 0x8b; /* 32-bit TSS (busy) */
- hvm_set_segment_register(v, x86_seg_tr, &reg);
-
- reg.attr.bytes = 0;
- hvm_set_segment_register(v, x86_seg_gdtr, &reg);
- hvm_set_segment_register(v, x86_seg_idtr, &reg);
-
- /* Sync AP's TSC with BSP's. */
- v->arch.hvm_vcpu.cache_tsc_offset =
- v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
- hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
-
- if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
- vcpu_wake(v);
-
- gdprintk(XENLOG_INFO, "AP %d bringup succeeded.\n", vcpuid);
- return 0;
-}
-
static int hvmop_set_pci_intx_level(
XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t) uop)
{
@@ -2185,13 +2093,16 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
if ( op == HVMOP_set_param )
{
+ rc = 0;
+
switch ( a.index )
{
case HVM_PARAM_IOREQ_PFN:
iorp = &d->arch.hvm_domain.ioreq;
- rc = hvm_set_ioreq_page(d, iorp, a.value);
+ if ( (rc = hvm_set_ioreq_page(d, iorp, a.value)) != 0 )
+ break;
spin_lock(&iorp->lock);
- if ( (rc == 0) && (iorp->va != NULL) )
+ if ( iorp->va != NULL )
/* Initialise evtchn port info if VCPUs already created. */
for_each_vcpu ( d, v )
get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
@@ -2206,13 +2117,72 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
hvm_latch_shinfo_size(d);
break;
case HVM_PARAM_TIMER_MODE:
- rc = -EINVAL;
if ( a.value > HVMPTM_one_missed_tick_pending )
- goto param_fail;
+ rc = -EINVAL;
+ break;
+ case HVM_PARAM_IDENT_PT:
+ rc = -EPERM;
+ if ( !IS_PRIV(current->domain) )
+ break;
+
+ rc = -EINVAL;
+ if ( d->arch.hvm_domain.params[a.index] != 0 )
+ break;
+
+ rc = 0;
+ if ( !paging_mode_hap(d) )
+ break;
+
+ domain_pause(d);
+
+ /*
+ * Update GUEST_CR3 in each VMCS to point at identity map.
+ * All foreign updates to guest state must synchronise on
+ * the domctl_lock.
+ */
+ spin_lock(&domctl_lock);
+ d->arch.hvm_domain.params[a.index] = a.value;
+ for_each_vcpu ( d, v )
+ paging_update_cr3(v);
+ spin_unlock(&domctl_lock);
+
+ domain_unpause(d);
+ break;
+ case HVM_PARAM_DM_DOMAIN:
+ /* Privileged domains only, as we must domain_pause(d). */
+ rc = -EPERM;
+ if ( !IS_PRIV_FOR(current->domain, d) )
+ break;
+
+ if ( a.value == DOMID_SELF )
+ a.value = current->domain->domain_id;
+
+ rc = 0;
+ domain_pause(d); /* safe to change per-vcpu xen_port */
+ iorp = &d->arch.hvm_domain.ioreq;
+ for_each_vcpu ( d, v )
+ {
+ int old_port, new_port;
+ new_port = alloc_unbound_xen_event_channel(v, a.value);
+ if ( new_port < 0 )
+ {
+ rc = new_port;
+ break;
+ }
+ /* xchg() ensures that only we free_xen_event_channel() */
+ old_port = xchg(&v->arch.hvm_vcpu.xen_port, new_port);
+ free_xen_event_channel(v, old_port);
+ spin_lock(&iorp->lock);
+ if ( iorp->va != NULL )
+ get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port;
+ spin_unlock(&iorp->lock);
+ }
+ domain_unpause(d);
break;
}
- d->arch.hvm_domain.params[a.index] = a.value;
- rc = 0;
+
+ if ( rc == 0 )
+ d->arch.hvm_domain.params[a.index] = a.value;
}
else
{
diff --git a/xen/arch/x86/hvm/i8254.c b/xen/arch/x86/hvm/i8254.c
index 01c78f7799..493b7317b9 100644
--- a/xen/arch/x86/hvm/i8254.c
+++ b/xen/arch/x86/hvm/i8254.c
@@ -401,50 +401,6 @@ void pit_stop_channel0_irq(PITState *pit)
spin_unlock(&pit->lock);
}
-#ifdef HVM_DEBUG_SUSPEND
-static void pit_info(PITState *pit)
-{
- struct hvm_hw_pit_channel *s;
- struct periodic_time *pt;
- int i;
-
- for ( i = 0; i < 3; i++ )
- {
- printk("*****pit channel %d's state:*****\n", i);
- s = &pit->hw.channels[i];
- printk("pit 0x%x.\n", s->count);
- printk("pit 0x%x.\n", s->latched_count);
- printk("pit 0x%x.\n", s->count_latched);
- printk("pit 0x%x.\n", s->status_latched);
- printk("pit 0x%x.\n", s->status);
- printk("pit 0x%x.\n", s->read_state);
- printk("pit 0x%x.\n", s->write_state);
- printk("pit 0x%x.\n", s->write_latch);
- printk("pit 0x%x.\n", s->rw_mode);
- printk("pit 0x%x.\n", s->mode);
- printk("pit 0x%x.\n", s->bcd);
- printk("pit 0x%x.\n", s->gate);
- printk("pit %"PRId64"\n", pit->count_load_time[i]);
-
- }
-
- pt = &pit->pt0;
- printk("pit channel 0 periodic timer:\n", i);
- printk("pt %d.\n", pt->enabled);
- printk("pt %d.\n", pt->one_shot);
- printk("pt %d.\n", pt->irq);
- printk("pt %d.\n", pt->first_injected);
- printk("pt %d.\n", pt->pending_intr_nr);
- printk("pt %d.\n", pt->period);
- printk("pt %"PRId64"\n", pt->period_cycles);
- printk("pt %"PRId64"\n", pt->last_plt_gtime);
-}
-#else
-static void pit_info(PITState *pit)
-{
-}
-#endif
-
static int pit_save(struct domain *d, hvm_domain_context_t *h)
{
PITState *pit = domain_vpit(d);
@@ -452,9 +408,6 @@ static int pit_save(struct domain *d, hvm_domain_context_t *h)
spin_lock(&pit->lock);
- pit_info(pit);
-
- /* Save the PIT hardware state */
rc = hvm_save_entry(PIT, 0, h, &pit->hw);
spin_unlock(&pit->lock);
@@ -469,22 +422,21 @@ static int pit_load(struct domain *d, hvm_domain_context_t *h)
spin_lock(&pit->lock);
- /* Restore the PIT hardware state */
if ( hvm_load_entry(PIT, h, &pit->hw) )
{
spin_unlock(&pit->lock);
return 1;
}
- /* Recreate platform timers from hardware state. There will be some
+ /*
+ * Recreate platform timers from hardware state. There will be some
* time jitter here, but the wall-clock will have jumped massively, so
- * we hope the guest can handle it. */
+ * we hope the guest can handle it.
+ */
pit->pt0.last_plt_gtime = hvm_get_guest_time(d->vcpu[0]);
for ( i = 0; i < 3; i++ )
pit_load_count(pit, i, pit->hw.channels[i].count);
- pit_info(pit);
-
spin_unlock(&pit->lock);
return 0;
@@ -535,7 +487,7 @@ static int handle_pit_io(
if ( bytes != 1 )
{
gdprintk(XENLOG_WARNING, "PIT bad access\n");
- return 1;
+ return X86EMUL_OKAY;
}
if ( dir == IOREQ_WRITE )
@@ -550,7 +502,7 @@ static int handle_pit_io(
gdprintk(XENLOG_WARNING, "PIT: read A1:A0=3!\n");
}
- return 1;
+ return X86EMUL_OKAY;
}
static void speaker_ioport_write(
@@ -574,11 +526,7 @@ static int handle_speaker_io(
{
struct PITState *vpit = vcpu_vpit(current);
- if ( bytes != 1 )
- {
- gdprintk(XENLOG_WARNING, "PIT_SPEAKER bad access\n");
- return 1;
- }
+ BUG_ON(bytes != 1);
spin_lock(&vpit->lock);
@@ -589,7 +537,7 @@ static int handle_speaker_io(
spin_unlock(&vpit->lock);
- return 1;
+ return X86EMUL_OKAY;
}
int pv_pit_handler(int port, int data, int write)
diff --git a/xen/arch/x86/hvm/intercept.c b/xen/arch/x86/hvm/intercept.c
index 04c5da7b6f..0e110e00dc 100644
--- a/xen/arch/x86/hvm/intercept.c
+++ b/xen/arch/x86/hvm/intercept.c
@@ -45,53 +45,63 @@ static struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] =
&vioapic_mmio_handler
};
-static inline void hvm_mmio_access(struct vcpu *v,
- ioreq_t *p,
- hvm_mmio_read_t read_handler,
- hvm_mmio_write_t write_handler)
+static int hvm_mmio_access(struct vcpu *v,
+ ioreq_t *p,
+ hvm_mmio_read_t read_handler,
+ hvm_mmio_write_t write_handler)
{
unsigned long data;
+ int rc = X86EMUL_OKAY, i, sign = p->df ? -1 : 1;
- switch ( p->type )
+ if ( !p->data_is_ptr )
{
- case IOREQ_TYPE_COPY:
- if ( !p->data_is_ptr ) {
- if ( p->dir == IOREQ_READ )
- p->data = read_handler(v, p->addr, p->size);
- else /* p->dir == IOREQ_WRITE */
- write_handler(v, p->addr, p->size, p->data);
- } else { /* p->data_is_ptr */
- int i, sign = (p->df) ? -1 : 1;
-
- if ( p->dir == IOREQ_READ ) {
- for ( i = 0; i < p->count; i++ ) {
- data = read_handler(v,
- p->addr + (sign * i * p->size),
- p->size);
- (void)hvm_copy_to_guest_phys(
- p->data + (sign * i * p->size),
- &data,
- p->size);
- }
- } else {/* p->dir == IOREQ_WRITE */
- for ( i = 0; i < p->count; i++ ) {
- (void)hvm_copy_from_guest_phys(
- &data,
- p->data + (sign * i * p->size),
- p->size);
- write_handler(v,
- p->addr + (sign * i * p->size),
- p->size, data);
- }
- }
+ if ( p->dir == IOREQ_READ )
+ {
+ rc = read_handler(v, p->addr, p->size, &data);
+ p->data = data;
}
- break;
+ else /* p->dir == IOREQ_WRITE */
+ rc = write_handler(v, p->addr, p->size, p->data);
+ return rc;
+ }
- default:
- printk("hvm_mmio_access: error ioreq type %x\n", p->type);
- domain_crash_synchronous();
- break;
+ if ( p->dir == IOREQ_READ )
+ {
+ for ( i = 0; i < p->count; i++ )
+ {
+ rc = read_handler(
+ v,
+ p->addr + (sign * i * p->size),
+ p->size, &data);
+ if ( rc != X86EMUL_OKAY )
+ break;
+ (void)hvm_copy_to_guest_phys(
+ p->data + (sign * i * p->size),
+ &data,
+ p->size);
+ }
+ }
+ else
+ {
+ for ( i = 0; i < p->count; i++ )
+ {
+ (void)hvm_copy_from_guest_phys(
+ &data,
+ p->data + (sign * i * p->size),
+ p->size);
+ rc = write_handler(
+ v,
+ p->addr + (sign * i * p->size),
+ p->size, data);
+ if ( rc != X86EMUL_OKAY )
+ break;
+ }
}
+
+ if ( (p->count = i) != 0 )
+ rc = X86EMUL_OKAY;
+
+ return rc;
}
int hvm_mmio_intercept(ioreq_t *p)
@@ -100,60 +110,62 @@ int hvm_mmio_intercept(ioreq_t *p)
int i;
for ( i = 0; i < HVM_MMIO_HANDLER_NR; i++ )
- {
if ( hvm_mmio_handlers[i]->check_handler(v, p->addr) )
- {
- hvm_mmio_access(v, p,
- hvm_mmio_handlers[i]->read_handler,
- hvm_mmio_handlers[i]->write_handler);
- return 1;
- }
- }
+ return hvm_mmio_access(
+ v, p,
+ hvm_mmio_handlers[i]->read_handler,
+ hvm_mmio_handlers[i]->write_handler);
- return 0;
+ return X86EMUL_UNHANDLEABLE;
}
static int process_portio_intercept(portio_action_t action, ioreq_t *p)
{
- int rc = 1, i, sign = p->df ? -1 : 1;
+ int rc = X86EMUL_OKAY, i, sign = p->df ? -1 : 1;
uint32_t data;
- if ( p->dir == IOREQ_READ )
+ if ( !p->data_is_ptr )
{
- if ( !p->data_is_ptr )
+ if ( p->dir == IOREQ_READ )
{
rc = action(IOREQ_READ, p->addr, p->size, &data);
p->data = data;
}
else
{
- for ( i = 0; i < p->count; i++ )
- {
- rc = action(IOREQ_READ, p->addr, p->size, &data);
- (void)hvm_copy_to_guest_phys(p->data + sign*i*p->size,
- &data, p->size);
- }
+ data = p->data;
+ rc = action(IOREQ_WRITE, p->addr, p->size, &data);
}
+ return rc;
}
- else /* p->dir == IOREQ_WRITE */
+
+ if ( p->dir == IOREQ_READ )
{
- if ( !p->data_is_ptr )
+ for ( i = 0; i < p->count; i++ )
{
- data = p->data;
- rc = action(IOREQ_WRITE, p->addr, p->size, &data);
+ rc = action(IOREQ_READ, p->addr, p->size, &data);
+ if ( rc != X86EMUL_OKAY )
+ break;
+ (void)hvm_copy_to_guest_phys(p->data + sign*i*p->size,
+ &data, p->size);
}
- else
+ }
+ else /* p->dir == IOREQ_WRITE */
+ {
+ for ( i = 0; i < p->count; i++ )
{
- for ( i = 0; i < p->count; i++ )
- {
- data = 0;
- (void)hvm_copy_from_guest_phys(&data, p->data + sign*i*p->size,
- p->size);
- rc = action(IOREQ_WRITE, p->addr, p->size, &data);
- }
+ data = 0;
+ (void)hvm_copy_from_guest_phys(&data, p->data + sign*i*p->size,
+ p->size);
+ rc = action(IOREQ_WRITE, p->addr, p->size, &data);
+ if ( rc != X86EMUL_OKAY )
+ break;
}
}
+ if ( (p->count = i) != 0 )
+ rc = X86EMUL_OKAY;
+
return rc;
}
@@ -170,7 +182,7 @@ int hvm_io_intercept(ioreq_t *p, int type)
unsigned long addr, size;
if ( (type == HVM_PORTIO) && (dpci_ioport_intercept(p)) )
- return 1;
+ return X86EMUL_OKAY;
for ( i = 0; i < handler->num_slot; i++ )
{
@@ -188,10 +200,10 @@ int hvm_io_intercept(ioreq_t *p, int type)
}
}
- return 0;
+ return X86EMUL_UNHANDLEABLE;
}
-int register_io_handler(
+void register_io_handler(
struct domain *d, unsigned long addr, unsigned long size,
void *action, int type)
{
@@ -207,9 +219,8 @@ int register_io_handler(
else
handler->hdl_list[num].action.mmio = action;
handler->num_slot++;
-
- return 1;
}
+
/*
* Local variables:
* mode: C
diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
index ac1e62782a..6a8e0885c0 100644
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -148,20 +148,19 @@ void send_timeoffset_req(unsigned long timeoff)
void send_invalidate_req(void)
{
struct vcpu *v = current;
- vcpu_iodata_t *vio;
+ vcpu_iodata_t *vio = get_ioreq(v);
ioreq_t *p;
- vio = get_ioreq(v);
- if ( vio == NULL )
- {
- printk("bad shared page: %lx\n", (unsigned long) vio);
- domain_crash_synchronous();
- }
+ BUG_ON(vio == NULL);
p = &vio->vp_ioreq;
if ( p->state != STATE_IOREQ_NONE )
- printk("WARNING: send invalidate req with something "
- "already pending (%d)?\n", p->state);
+ {
+ gdprintk(XENLOG_ERR, "WARNING: send invalidate req with something "
+ "already pending (%d)?\n", p->state);
+ domain_crash(v->domain);
+ return;
+ }
p->type = IOREQ_TYPE_INVALIDATE;
p->size = 4;
@@ -225,12 +224,6 @@ void hvm_io_assist(void)
ioreq_t *p = &get_ioreq(curr)->vp_ioreq;
enum hvm_io_state io_state;
- if ( p->state != STATE_IORESP_READY )
- {
- gdprintk(XENLOG_ERR, "Unexpected HVM iorequest state %d.\n", p->state);
- domain_crash_synchronous();
- }
-
rmb(); /* see IORESP_READY /then/ read contents of ioreq */
p->state = STATE_IOREQ_NONE;
@@ -253,74 +246,59 @@ void hvm_io_assist(void)
void dpci_ioport_read(uint32_t mport, ioreq_t *p)
{
- uint64_t i;
- uint64_t z_data;
- uint64_t length = (p->count * p->size);
+ int i, sign = p->df ? -1 : 1;
+ uint32_t data = 0;
- for ( i = 0; i < length; i += p->size )
+ for ( i = 0; i < p->count; i++ )
{
- z_data = ~0ULL;
-
switch ( p->size )
{
case 1:
- z_data = (uint64_t)inb(mport);
+ data = inb(mport);
break;
case 2:
- z_data = (uint64_t)inw(mport);
+ data = inw(mport);
break;
case 4:
- z_data = (uint64_t)inl(mport);
+ data = inl(mport);
break;
default:
- gdprintk(XENLOG_ERR, "Error: unable to handle size: %"
- PRId64 "\n", p->size);
- return;
+ BUG();
}
- p->data = z_data;
- if ( p->data_is_ptr &&
- hvm_copy_to_guest_phys(p->data + i, (void *)&z_data,
- (int)p->size) )
- {
- gdprintk(XENLOG_ERR, "Error: couldn't copy to hvm phys\n");
- return;
- }
+ if ( p->data_is_ptr )
+ (void)hvm_copy_to_guest_phys(
+ p->data + (sign * i * p->size), &data, p->size);
+ else
+ p->data = data;
}
}
void dpci_ioport_write(uint32_t mport, ioreq_t *p)
{
- uint64_t i;
- uint64_t z_data = 0;
- uint64_t length = (p->count * p->size);
+ int i, sign = p->df ? -1 : 1;
+ uint32_t data;
- for ( i = 0; i < length; i += p->size )
+ for ( i = 0; i < p->count; i++ )
{
- z_data = p->data;
- if ( p->data_is_ptr &&
- hvm_copy_from_guest_phys((void *)&z_data,
- p->data + i, (int)p->size) )
- {
- gdprintk(XENLOG_ERR, "Error: couldn't copy from hvm phys\n");
- return;
- }
+ data = p->data;
+ if ( p->data_is_ptr )
+ (void)hvm_copy_from_guest_phys(
+ &data, p->data + (sign * i & p->size), p->size);
switch ( p->size )
{
case 1:
- outb((uint8_t) z_data, mport);
+ outb(data, mport);
break;
case 2:
- outw((uint16_t) z_data, mport);
+ outw(data, mport);
break;
case 4:
- outl((uint32_t) z_data, mport);
+ outl(data, mport);
break;
default:
- gdprintk(XENLOG_ERR, "Error: unable to handle size: %"
- PRId64 "\n", p->size);
- break;
+ BUG();
}
}
}
diff --git a/xen/arch/x86/hvm/mtrr.c b/xen/arch/x86/hvm/mtrr.c
index 3bd0dc9d7c..4e50680022 100644
--- a/xen/arch/x86/hvm/mtrr.c
+++ b/xen/arch/x86/hvm/mtrr.c
@@ -266,7 +266,7 @@ static void setup_var_mtrrs(struct vcpu *v)
{
if ( e820_table[i].addr == 0x100000 )
{
- size = e820_table[i].size + 0x100000 + PAGE_SIZE * 4;
+ size = e820_table[i].size + 0x100000 + PAGE_SIZE * 5;
addr = 0;
}
else
diff --git a/xen/arch/x86/hvm/pmtimer.c b/xen/arch/x86/hvm/pmtimer.c
index 8d3fff8f44..4924a80687 100644
--- a/xen/arch/x86/hvm/pmtimer.c
+++ b/xen/arch/x86/hvm/pmtimer.c
@@ -169,7 +169,7 @@ static int handle_evt_io(
spin_unlock(&s->lock);
- return 1;
+ return X86EMUL_OKAY;
}
@@ -183,7 +183,7 @@ static int handle_pmt_io(
if ( bytes != 4 )
{
gdprintk(XENLOG_WARNING, "HVM_PMT bad access\n");
- return 1;
+ return X86EMUL_OKAY;
}
if ( dir == IOREQ_READ )
@@ -192,10 +192,10 @@ static int handle_pmt_io(
pmt_update_time(s);
*val = s->pm.tmr_val;
spin_unlock(&s->lock);
- return 1;
+ return X86EMUL_OKAY;
}
- return 0;
+ return X86EMUL_UNHANDLEABLE;
}
static int pmtimer_save(struct domain *d, hvm_domain_context_t *h)
diff --git a/xen/arch/x86/hvm/rtc.c b/xen/arch/x86/hvm/rtc.c
index b9e4b4a241..e196c72866 100644
--- a/xen/arch/x86/hvm/rtc.c
+++ b/xen/arch/x86/hvm/rtc.c
@@ -403,21 +403,21 @@ static int handle_rtc_io(
if ( bytes != 1 )
{
gdprintk(XENLOG_WARNING, "HVM_RTC bas access\n");
- return 1;
+ return X86EMUL_OKAY;
}
if ( dir == IOREQ_WRITE )
{
if ( rtc_ioport_write(vrtc, port, (uint8_t)*val) )
- return 1;
+ return X86EMUL_OKAY;
}
else if ( vrtc->hw.cmos_index < RTC_CMOS_SIZE )
{
*val = rtc_ioport_read(vrtc, port);
- return 1;
+ return X86EMUL_OKAY;
}
- return 0;
+ return X86EMUL_UNHANDLEABLE;
}
void rtc_migrate_timers(struct vcpu *v)
diff --git a/xen/arch/x86/hvm/stdvga.c b/xen/arch/x86/hvm/stdvga.c
index 56260c5c77..25b16bddac 100644
--- a/xen/arch/x86/hvm/stdvga.c
+++ b/xen/arch/x86/hvm/stdvga.c
@@ -32,6 +32,7 @@
#include <xen/sched.h>
#include <xen/domain_page.h>
#include <asm/hvm/support.h>
+#include <xen/numa.h>
#define PAT(x) (x)
static const uint32_t mask16[16] = {
@@ -166,19 +167,19 @@ static void stdvga_out(uint32_t port, uint32_t bytes, uint32_t val)
}
}
-int stdvga_intercept_pio(
+static int stdvga_intercept_pio(
int dir, uint32_t port, uint32_t bytes, uint32_t *val)
{
struct hvm_hw_stdvga *s = &current->domain->arch.hvm_domain.stdvga;
- if ( dir == IOREQ_READ )
- return 0;
-
- spin_lock(&s->lock);
- stdvga_out(port, bytes, *val);
- spin_unlock(&s->lock);
+ if ( dir == IOREQ_WRITE )
+ {
+ spin_lock(&s->lock);
+ stdvga_out(port, bytes, *val);
+ spin_unlock(&s->lock);
+ }
- return 0; /* propagate to external ioemu */
+ return X86EMUL_UNHANDLEABLE; /* propagate to external ioemu */
}
#define GET_PLANE(data, p) (((data) >> ((p) * 8)) & 0xff)
@@ -458,7 +459,7 @@ static int mmio_move(struct hvm_hw_stdvga *s, ioreq_t *p)
return 1;
}
-int stdvga_intercept_mmio(ioreq_t *p)
+static int stdvga_intercept_mmio(ioreq_t *p)
{
struct domain *d = current->domain;
struct hvm_hw_stdvga *s = &d->arch.hvm_domain.stdvga;
@@ -467,7 +468,7 @@ int stdvga_intercept_mmio(ioreq_t *p)
if ( p->size > 8 )
{
gdprintk(XENLOG_WARNING, "invalid mmio size %d\n", (int)p->size);
- return 0;
+ return X86EMUL_UNHANDLEABLE;
}
spin_lock(&s->lock);
@@ -498,7 +499,7 @@ int stdvga_intercept_mmio(ioreq_t *p)
spin_unlock(&s->lock);
- return rc;
+ return rc ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;
}
void stdvga_init(struct domain *d)
@@ -513,7 +514,8 @@ void stdvga_init(struct domain *d)
for ( i = 0; i != ARRAY_SIZE(s->vram_page); i++ )
{
- if ( (pg = alloc_domheap_page(NULL)) == NULL )
+ pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
+ if ( pg == NULL )
break;
s->vram_page[i] = pg;
p = map_domain_page(page_to_mfn(pg));
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index be166a868c..7c10127966 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -255,11 +255,6 @@ static int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
svm_update_guest_cr(v, 2);
svm_update_guest_cr(v, 4);
-#ifdef HVM_DEBUG_SUSPEND
- printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
- __func__, c->cr3, c->cr0, c->cr4);
-#endif
-
vmcb->sysenter_cs = c->sysenter_cs;
vmcb->sysenter_esp = c->sysenter_esp;
vmcb->sysenter_eip = c->sysenter_eip;
@@ -472,7 +467,7 @@ static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- ASSERT(v == current);
+ ASSERT((v == current) || !vcpu_runnable(v));
switch ( seg )
{
diff --git a/xen/arch/x86/hvm/vioapic.c b/xen/arch/x86/hvm/vioapic.c
index c01618c69f..8ebaa260cf 100644
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -88,9 +88,9 @@ static unsigned long vioapic_read_indirect(struct hvm_hw_vioapic *vioapic,
return result;
}
-static unsigned long vioapic_read(struct vcpu *v,
- unsigned long addr,
- unsigned long length)
+static int vioapic_read(
+ struct vcpu *v, unsigned long addr,
+ unsigned long length, unsigned long *pval)
{
struct hvm_hw_vioapic *vioapic = domain_vioapic(v->domain);
uint32_t result;
@@ -114,11 +114,13 @@ static unsigned long vioapic_read(struct vcpu *v,
break;
}
- return result;
+ *pval = result;
+ return X86EMUL_OKAY;
}
static void vioapic_write_redirent(
- struct hvm_hw_vioapic *vioapic, unsigned int idx, int top_word, uint32_t val)
+ struct hvm_hw_vioapic *vioapic, unsigned int idx,
+ int top_word, uint32_t val)
{
struct domain *d = vioapic_domain(vioapic);
struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
@@ -196,10 +198,9 @@ static void vioapic_write_indirect(
}
}
-static void vioapic_write(struct vcpu *v,
- unsigned long addr,
- unsigned long length,
- unsigned long val)
+static int vioapic_write(
+ struct vcpu *v, unsigned long addr,
+ unsigned long length, unsigned long val)
{
struct hvm_hw_vioapic *vioapic = domain_vioapic(v->domain);
@@ -224,6 +225,8 @@ static void vioapic_write(struct vcpu *v,
default:
break;
}
+
+ return X86EMUL_OKAY;
}
static int vioapic_range(struct vcpu *v, unsigned long addr)
@@ -477,45 +480,16 @@ void vioapic_update_EOI(struct domain *d, int vector)
spin_unlock(&d->arch.hvm_domain.irq_lock);
}
-#ifdef HVM_DEBUG_SUSPEND
-static void ioapic_info(struct hvm_hw_vioapic *s)
-{
- int i;
- printk("*****ioapic state:*****\n");
- printk("ioapic 0x%x.\n", s->ioregsel);
- printk("ioapic 0x%x.\n", s->id);
- printk("ioapic 0x%lx.\n", s->base_address);
- for (i = 0; i < VIOAPIC_NUM_PINS; i++) {
- printk("ioapic redirtbl[%d]:0x%"PRIx64"\n", i, s->redirtbl[i].bits);
- }
-
-}
-#else
-static void ioapic_info(struct hvm_hw_vioapic *s)
-{
-}
-#endif
-
-
static int ioapic_save(struct domain *d, hvm_domain_context_t *h)
{
struct hvm_hw_vioapic *s = domain_vioapic(d);
- ioapic_info(s);
-
- /* save io-apic state*/
- return ( hvm_save_entry(IOAPIC, 0, h, s) );
+ return hvm_save_entry(IOAPIC, 0, h, s);
}
static int ioapic_load(struct domain *d, hvm_domain_context_t *h)
{
struct hvm_hw_vioapic *s = domain_vioapic(d);
-
- /* restore ioapic state */
- if ( hvm_load_entry(IOAPIC, h, s) != 0 )
- return -EINVAL;
-
- ioapic_info(s);
- return 0;
+ return hvm_load_entry(IOAPIC, h, s);
}
HVM_REGISTER_SAVE_RESTORE(IOAPIC, ioapic_save, ioapic_load, 1, HVMSR_PER_DOM);
diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c
index bf53ba7a1a..9bfc2cc3d1 100644
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -33,6 +33,7 @@
#include <xen/sched.h>
#include <asm/current.h>
#include <asm/hvm/vmx/vmx.h>
+#include <xen/numa.h>
#include <public/hvm/ioreq.h>
#include <public/hvm/params.h>
@@ -240,12 +241,145 @@ static int vlapic_match_dest(struct vcpu *v, struct vlapic *source,
return result;
}
+static int vlapic_vcpu_pause_async(struct vcpu *v)
+{
+ vcpu_pause_nosync(v);
+
+ if ( v->is_running )
+ {
+ vcpu_unpause(v);
+ return 0;
+ }
+
+ sync_vcpu_execstate(v);
+ return 1;
+}
+
+static void vlapic_init_action(unsigned long _vcpu)
+{
+ struct vcpu *v = (struct vcpu *)_vcpu;
+ struct domain *d = v->domain;
+
+ /* If the VCPU is not on its way down we have nothing to do. */
+ if ( !test_bit(_VPF_down, &v->pause_flags) )
+ return;
+
+ if ( !vlapic_vcpu_pause_async(v) )
+ {
+ tasklet_schedule(&vcpu_vlapic(v)->init_tasklet);
+ return;
+ }
+
+ domain_lock(d);
+
+ /* Paranoia makes us re-assert VPF_down under the domain lock. */
+ set_bit(_VPF_down, &v->pause_flags);
+ v->is_initialised = 0;
+ clear_bit(_VPF_blocked, &v->pause_flags);
+
+ vlapic_reset(vcpu_vlapic(v));
+
+ domain_unlock(d);
+
+ vcpu_unpause(v);
+}
+
+static int vlapic_accept_init(struct vcpu *v)
+{
+ /* Nothing to do if the VCPU is already reset. */
+ if ( !v->is_initialised )
+ return X86EMUL_OKAY;
+
+ /* Asynchronously take the VCPU down and schedule reset work. */
+ hvm_vcpu_down(v);
+ tasklet_schedule(&vcpu_vlapic(v)->init_tasklet);
+ return X86EMUL_RETRY;
+}
+
+static int vlapic_accept_sipi(struct vcpu *v, int trampoline_vector)
+{
+ struct domain *d = current->domain;
+ struct vcpu_guest_context *ctxt;
+ struct segment_register reg;
+
+ /* If the VCPU is not on its way down we have nothing to do. */
+ if ( !test_bit(_VPF_down, &v->pause_flags) )
+ return X86EMUL_OKAY;
+
+ if ( !vlapic_vcpu_pause_async(v) )
+ return X86EMUL_RETRY;
+
+ domain_lock(d);
+
+ if ( v->is_initialised )
+ goto out;
+
+ ctxt = &v->arch.guest_context;
+ memset(ctxt, 0, sizeof(*ctxt));
+ ctxt->flags = VGCF_online;
+ ctxt->user_regs.eflags = 2;
+
+ v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET;
+ hvm_update_guest_cr(v, 0);
+
+ v->arch.hvm_vcpu.guest_cr[2] = 0;
+ hvm_update_guest_cr(v, 2);
+
+ v->arch.hvm_vcpu.guest_cr[3] = 0;
+ hvm_update_guest_cr(v, 3);
+
+ v->arch.hvm_vcpu.guest_cr[4] = 0;
+ hvm_update_guest_cr(v, 4);
+
+ v->arch.hvm_vcpu.guest_efer = 0;
+ hvm_update_guest_efer(v);
+
+ reg.sel = trampoline_vector << 8;
+ reg.base = (uint32_t)reg.sel << 4;
+ reg.limit = 0xffff;
+ reg.attr.bytes = 0x89b;
+ hvm_set_segment_register(v, x86_seg_cs, &reg);
+
+ reg.sel = reg.base = 0;
+ reg.limit = 0xffff;
+ reg.attr.bytes = 0x893;
+ hvm_set_segment_register(v, x86_seg_ds, &reg);
+ hvm_set_segment_register(v, x86_seg_es, &reg);
+ hvm_set_segment_register(v, x86_seg_fs, &reg);
+ hvm_set_segment_register(v, x86_seg_gs, &reg);
+ hvm_set_segment_register(v, x86_seg_ss, &reg);
+
+ reg.attr.bytes = 0x82; /* LDT */
+ hvm_set_segment_register(v, x86_seg_ldtr, &reg);
+
+ reg.attr.bytes = 0x8b; /* 32-bit TSS (busy) */
+ hvm_set_segment_register(v, x86_seg_tr, &reg);
+
+ reg.attr.bytes = 0;
+ hvm_set_segment_register(v, x86_seg_gdtr, &reg);
+ hvm_set_segment_register(v, x86_seg_idtr, &reg);
+
+ /* Sync AP's TSC with BSP's. */
+ v->arch.hvm_vcpu.cache_tsc_offset =
+ v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
+ hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
+
+ v->arch.flags |= TF_kernel_mode;
+ v->is_initialised = 1;
+ clear_bit(_VPF_down, &v->pause_flags);
+
+ out:
+ domain_unlock(d);
+ vcpu_unpause(v);
+ return X86EMUL_OKAY;
+}
+
/* Add a pending IRQ into lapic. */
static int vlapic_accept_irq(struct vcpu *v, int delivery_mode,
int vector, int level, int trig_mode)
{
- int result = 0;
struct vlapic *vlapic = vcpu_vlapic(v);
+ int rc = X86EMUL_OKAY;
switch ( delivery_mode )
{
@@ -270,8 +404,6 @@ static int vlapic_accept_irq(struct vcpu *v, int delivery_mode,
}
vcpu_kick(v);
-
- result = 1;
break;
case APIC_DM_REMRD:
@@ -291,43 +423,20 @@ static int vlapic_accept_irq(struct vcpu *v, int delivery_mode,
/* No work on INIT de-assert for P4-type APIC. */
if ( trig_mode && !(level & APIC_INT_ASSERT) )
break;
- /* FIXME How to check the situation after vcpu reset? */
- if ( v->is_initialised )
- hvm_vcpu_reset(v);
- v->arch.hvm_vcpu.init_sipi_sipi_state =
- HVM_VCPU_INIT_SIPI_SIPI_STATE_WAIT_SIPI;
- result = 1;
+ rc = vlapic_accept_init(v);
break;
case APIC_DM_STARTUP:
- if ( v->arch.hvm_vcpu.init_sipi_sipi_state ==
- HVM_VCPU_INIT_SIPI_SIPI_STATE_NORM )
- break;
-
- v->arch.hvm_vcpu.init_sipi_sipi_state =
- HVM_VCPU_INIT_SIPI_SIPI_STATE_NORM;
-
- if ( v->is_initialised )
- {
- gdprintk(XENLOG_ERR, "SIPI for initialized vcpu %x\n", v->vcpu_id);
- goto exit_and_crash;
- }
-
- if ( hvm_bringup_ap(v->vcpu_id, vector) != 0 )
- result = 0;
+ rc = vlapic_accept_sipi(v, vector);
break;
default:
gdprintk(XENLOG_ERR, "TODO: unsupported delivery mode %x\n",
delivery_mode);
- goto exit_and_crash;
+ domain_crash(v->domain);
}
- return result;
-
- exit_and_crash:
- domain_crash(v->domain);
- return 0;
+ return rc;
}
/* This function is used by both ioapic and lapic.The bitmap is for vcpu_id. */
@@ -369,11 +478,9 @@ void vlapic_EOI_set(struct vlapic *vlapic)
vioapic_update_EOI(vlapic_domain(vlapic), vector);
}
-static void vlapic_ipi(struct vlapic *vlapic)
+static int vlapic_ipi(
+ struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high)
{
- uint32_t icr_low = vlapic_get_reg(vlapic, APIC_ICR);
- uint32_t icr_high = vlapic_get_reg(vlapic, APIC_ICR2);
-
unsigned int dest = GET_APIC_DEST_FIELD(icr_high);
unsigned int short_hand = icr_low & APIC_SHORT_MASK;
unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG;
@@ -385,6 +492,7 @@ static void vlapic_ipi(struct vlapic *vlapic)
struct vlapic *target;
struct vcpu *v;
uint32_t lpr_map = 0;
+ int rc = X86EMUL_OKAY;
HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "icr_high 0x%x, icr_low 0x%x, "
"short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
@@ -399,18 +507,23 @@ static void vlapic_ipi(struct vlapic *vlapic)
if ( delivery_mode == APIC_DM_LOWEST )
__set_bit(v->vcpu_id, &lpr_map);
else
- vlapic_accept_irq(v, delivery_mode,
- vector, level, trig_mode);
+ rc = vlapic_accept_irq(v, delivery_mode,
+ vector, level, trig_mode);
}
+
+ if ( rc != X86EMUL_OKAY )
+ break;
}
if ( delivery_mode == APIC_DM_LOWEST )
{
target = apic_round_robin(vlapic_domain(v), vector, lpr_map);
if ( target != NULL )
- vlapic_accept_irq(vlapic_vcpu(target), delivery_mode,
- vector, level, trig_mode);
+ rc = vlapic_accept_irq(vlapic_vcpu(target), delivery_mode,
+ vector, level, trig_mode);
}
+
+ return rc;
}
static uint32_t vlapic_get_tmcct(struct vlapic *vlapic)
@@ -465,17 +578,18 @@ static void vlapic_read_aligned(
}
}
-static unsigned long vlapic_read(struct vcpu *v, unsigned long address,
- unsigned long len)
+static int vlapic_read(
+ struct vcpu *v, unsigned long address,
+ unsigned long len, unsigned long *pval)
{
unsigned int alignment;
unsigned int tmp;
- unsigned long result;
+ unsigned long result = 0;
struct vlapic *vlapic = vcpu_vlapic(v);
unsigned int offset = address - vlapic_base_address(vlapic);
if ( offset > (APIC_TDCR + 0x3) )
- return 0;
+ goto out;
alignment = offset & 0x3;
@@ -507,14 +621,16 @@ static unsigned long vlapic_read(struct vcpu *v, unsigned long address,
HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "offset 0x%x with length 0x%lx, "
"and the result is 0x%lx", offset, len, result);
- return result;
+ out:
+ *pval = result;
+ return X86EMUL_OKAY;
unaligned_exit_and_crash:
gdprintk(XENLOG_ERR, "Unaligned LAPIC read len=0x%lx at offset=0x%x.\n",
len, offset);
exit_and_crash:
domain_crash(v->domain);
- return 0;
+ return X86EMUL_OKAY;
}
void vlapic_pt_cb(struct vcpu *v, void *data)
@@ -522,11 +638,12 @@ void vlapic_pt_cb(struct vcpu *v, void *data)
*(s_time_t *)data = hvm_get_guest_time(v);
}
-static void vlapic_write(struct vcpu *v, unsigned long address,
- unsigned long len, unsigned long val)
+static int vlapic_write(struct vcpu *v, unsigned long address,
+ unsigned long len, unsigned long val)
{
struct vlapic *vlapic = vcpu_vlapic(v);
unsigned int offset = address - vlapic_base_address(vlapic);
+ int rc = X86EMUL_OKAY;
if ( offset != 0xb0 )
HVM_DBG_LOG(DBG_LEVEL_VLAPIC,
@@ -540,13 +657,13 @@ static void vlapic_write(struct vcpu *v, unsigned long address,
val = (uint32_t)val;
if ( len != 4 )
{
- unsigned int tmp;
+ unsigned long tmp;
unsigned char alignment;
gdprintk(XENLOG_INFO, "Notice: Local APIC write with len = %lx\n",len);
alignment = offset & 0x3;
- tmp = vlapic_read(v, offset & ~0x3, 4);
+ (void)vlapic_read(v, offset & ~0x3, 4, &tmp);
switch ( len )
{
@@ -617,9 +734,10 @@ static void vlapic_write(struct vcpu *v, unsigned long address,
break;
case APIC_ICR:
- /* No delay here, so we always clear the pending bit*/
- vlapic_set_reg(vlapic, APIC_ICR, val & ~(1 << 12));
- vlapic_ipi(vlapic);
+ val &= ~(1 << 12); /* always clear the pending bit */
+ rc = vlapic_ipi(vlapic, val, vlapic_get_reg(vlapic, APIC_ICR2));
+ if ( rc == X86EMUL_OKAY )
+ vlapic_set_reg(vlapic, APIC_ICR, val);
break;
case APIC_ICR2:
@@ -669,13 +787,14 @@ static void vlapic_write(struct vcpu *v, unsigned long address,
break;
}
- return;
+ return rc;
unaligned_exit_and_crash:
gdprintk(XENLOG_ERR, "Unaligned LAPIC write len=0x%lx at offset=0x%x.\n",
len, offset);
exit_and_crash:
domain_crash(v->domain);
+ return rc;
}
static int vlapic_range(struct vcpu *v, unsigned long addr)
@@ -788,77 +907,58 @@ void vlapic_reset(struct vlapic *vlapic)
vlapic_set_reg(vlapic, APIC_SPIV, 0xff);
vlapic->hw.disabled |= VLAPIC_SW_DISABLED;
-}
-#ifdef HVM_DEBUG_SUSPEND
-static void lapic_info(struct vlapic *s)
-{
- printk("*****lapic state:*****\n");
- printk("lapic 0x%"PRIx64".\n", s->hw.apic_base_msr);
- printk("lapic 0x%x.\n", s->hw.disabled);
- printk("lapic 0x%x.\n", s->hw.timer_divisor);
-}
-#else
-static void lapic_info(struct vlapic *s)
-{
+ destroy_periodic_time(&vlapic->pt);
}
-#endif
/* rearm the actimer if needed, after a HVM restore */
static void lapic_rearm(struct vlapic *s)
{
- unsigned long tmict;
+ unsigned long tmict = vlapic_get_reg(s, APIC_TMICT);
+ uint64_t period;
- tmict = vlapic_get_reg(s, APIC_TMICT);
- if ( tmict > 0 )
- {
- uint64_t period = (uint64_t)APIC_BUS_CYCLE_NS *
- (uint32_t)tmict * s->hw.timer_divisor;
- uint32_t lvtt = vlapic_get_reg(s, APIC_LVTT);
-
- s->pt.irq = lvtt & APIC_VECTOR_MASK;
- create_periodic_time(vlapic_vcpu(s), &s->pt, period, s->pt.irq,
- !vlapic_lvtt_period(s), vlapic_pt_cb,
- &s->timer_last_update);
- s->timer_last_update = s->pt.last_plt_gtime;
-
- printk("lapic_load to rearm the actimer:"
- "bus cycle is %uns, "
- "saved tmict count %lu, period %"PRIu64"ns, irq=%"PRIu8"\n",
- APIC_BUS_CYCLE_NS, tmict, period, s->pt.irq);
- }
+ if ( (tmict = vlapic_get_reg(s, APIC_TMICT)) == 0 )
+ return;
- lapic_info(s);
+ period = ((uint64_t)APIC_BUS_CYCLE_NS *
+ (uint32_t)tmict * s->hw.timer_divisor);
+ s->pt.irq = vlapic_get_reg(s, APIC_LVTT) & APIC_VECTOR_MASK;
+ create_periodic_time(vlapic_vcpu(s), &s->pt, period, s->pt.irq,
+ !vlapic_lvtt_period(s), vlapic_pt_cb,
+ &s->timer_last_update);
+ s->timer_last_update = s->pt.last_plt_gtime;
}
static int lapic_save_hidden(struct domain *d, hvm_domain_context_t *h)
{
struct vcpu *v;
struct vlapic *s;
+ int rc = 0;
- for_each_vcpu(d, v)
+ for_each_vcpu ( d, v )
{
s = vcpu_vlapic(v);
- lapic_info(s);
-
- if ( hvm_save_entry(LAPIC, v->vcpu_id, h, &s->hw) != 0 )
- return 1;
+ if ( (rc = hvm_save_entry(LAPIC, v->vcpu_id, h, &s->hw)) != 0 )
+ break;
}
- return 0;
+
+ return rc;
}
static int lapic_save_regs(struct domain *d, hvm_domain_context_t *h)
{
struct vcpu *v;
struct vlapic *s;
+ int rc = 0;
- for_each_vcpu(d, v)
+ for_each_vcpu ( d, v )
{
s = vcpu_vlapic(v);
- if ( hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs) != 0 )
- return 1;
+ if ( (rc = hvm_save_entry(LAPIC_REGS, v->vcpu_id, h, s->regs)) != 0 )
+ break;
}
- return 0;
+
+ return rc;
}
static int lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
@@ -879,8 +979,6 @@ static int lapic_load_hidden(struct domain *d, hvm_domain_context_t *h)
if ( hvm_load_entry(LAPIC, h, &s->hw) != 0 )
return -EINVAL;
- lapic_info(s);
-
vmx_vlapic_msr_changed(v);
return 0;
@@ -916,7 +1014,7 @@ HVM_REGISTER_SAVE_RESTORE(LAPIC_REGS, lapic_save_regs, lapic_load_regs,
int vlapic_init(struct vcpu *v)
{
struct vlapic *vlapic = vcpu_vlapic(v);
- unsigned int memflags = 0;
+ unsigned int memflags = MEMF_node(vcpu_to_node(v));
HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "%d", v->vcpu_id);
@@ -925,10 +1023,10 @@ int vlapic_init(struct vcpu *v)
#ifdef __i386__
/* 32-bit VMX may be limited to 32-bit physical addresses. */
if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
- memflags = MEMF_bits(32);
+ memflags |= MEMF_bits(32);
#endif
- vlapic->regs_page = alloc_domheap_pages(NULL, 0, memflags);
+ vlapic->regs_page = alloc_domheap_page(NULL, memflags);
if ( vlapic->regs_page == NULL )
{
dprintk(XENLOG_ERR, "alloc vlapic regs error: %d/%d\n",
@@ -941,7 +1039,7 @@ int vlapic_init(struct vcpu *v)
{
dprintk(XENLOG_ERR, "map vlapic regs error: %d/%d\n",
v->domain->domain_id, v->vcpu_id);
- return -ENOMEM;
+ return -ENOMEM;
}
clear_page(vlapic->regs);
@@ -953,6 +1051,8 @@ int vlapic_init(struct vcpu *v)
if ( v->vcpu_id == 0 )
vlapic->hw.apic_base_msr |= MSR_IA32_APICBASE_BSP;
+ tasklet_init(&vlapic->init_tasklet, vlapic_init_action, (unsigned long)v);
+
return 0;
}
@@ -960,6 +1060,7 @@ void vlapic_destroy(struct vcpu *v)
{
struct vlapic *vlapic = vcpu_vlapic(v);
+ tasklet_kill(&vlapic->init_tasklet);
destroy_periodic_time(&vlapic->pt);
unmap_domain_page_global(vlapic->regs);
free_domheap_page(vlapic->regs_page);
diff --git a/xen/arch/x86/hvm/vmx/realmode.c b/xen/arch/x86/hvm/vmx/realmode.c
index c00e8b1e42..5d13f4e60b 100644
--- a/xen/arch/x86/hvm/vmx/realmode.c
+++ b/xen/arch/x86/hvm/vmx/realmode.c
@@ -172,7 +172,7 @@ static void realmode_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt)
hvmemul_ctxt->insn_buf[0], hvmemul_ctxt->insn_buf[1],
hvmemul_ctxt->insn_buf[2], hvmemul_ctxt->insn_buf[3],
hvmemul_ctxt->insn_buf[4], hvmemul_ctxt->insn_buf[5]);
- domain_crash_synchronous();
+ domain_crash(curr->domain);
}
void vmx_realmode(struct cpu_user_regs *regs)
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index bee9eb1deb..48506c5b32 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -38,6 +38,9 @@
#include <asm/shadow.h>
#include <asm/tboot.h>
+static int opt_vpid_enabled = 1;
+boolean_param("vpid", opt_vpid_enabled);
+
/* Dynamic (run-time adjusted) execution control flags. */
u32 vmx_pin_based_exec_control __read_mostly;
u32 vmx_cpu_based_exec_control __read_mostly;
@@ -84,14 +87,16 @@ static void vmx_init_vmcs_config(void)
min = (CPU_BASED_HLT_EXITING |
CPU_BASED_INVLPG_EXITING |
+ CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING |
CPU_BASED_MONITOR_EXITING |
CPU_BASED_MWAIT_EXITING |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_ACTIVATE_IO_BITMAP |
CPU_BASED_USE_TSC_OFFSETING);
- opt = CPU_BASED_ACTIVATE_MSR_BITMAP;
- opt |= CPU_BASED_TPR_SHADOW;
- opt |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+ opt = (CPU_BASED_ACTIVATE_MSR_BITMAP |
+ CPU_BASED_TPR_SHADOW |
+ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
_vmx_cpu_based_exec_control = adjust_vmx_controls(
min, opt, MSR_IA32_VMX_PROCBASED_CTLS);
#ifdef __x86_64__
@@ -107,11 +112,25 @@ static void vmx_init_vmcs_config(void)
{
min = 0;
opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
- SECONDARY_EXEC_WBINVD_EXITING);
+ SECONDARY_EXEC_WBINVD_EXITING |
+ SECONDARY_EXEC_ENABLE_EPT);
+ if ( opt_vpid_enabled )
+ opt |= SECONDARY_EXEC_ENABLE_VPID;
_vmx_secondary_exec_control = adjust_vmx_controls(
min, opt, MSR_IA32_VMX_PROCBASED_CTLS2);
}
+ if ( _vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT )
+ {
+ /* To use EPT we expect to be able to clear certain intercepts. */
+ uint32_t must_be_one, must_be_zero;
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, must_be_one, must_be_zero);
+ if ( must_be_one & (CPU_BASED_INVLPG_EXITING |
+ CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING) )
+ _vmx_secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+ }
+
#if defined(__i386__)
/* If we can't virtualise APIC accesses, the TPR shadow is pointless. */
if ( !(_vmx_secondary_exec_control &
@@ -301,6 +320,10 @@ int vmx_cpu_up(void)
return 0;
}
+ ept_sync_all();
+
+ vpid_sync_all();
+
return 1;
}
@@ -439,6 +462,7 @@ void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr)
static int construct_vmcs(struct vcpu *v)
{
+ struct domain *d = v->domain;
uint16_t sysenter_cs;
unsigned long sysenter_eip;
@@ -448,10 +472,25 @@ static int construct_vmcs(struct vcpu *v)
__vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
__vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
__vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
- __vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmx_cpu_based_exec_control);
+
v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
- if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
- __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control);
+ v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control;
+
+ if ( paging_mode_hap(d) )
+ {
+ v->arch.hvm_vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING |
+ CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING);
+ }
+ else
+ {
+ v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+ }
+
+ __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+ if ( cpu_has_vmx_secondary_exec_control )
+ __vmwrite(SECONDARY_VM_EXEC_CONTROL,
+ v->arch.hvm_vmx.secondary_exec_control);
/* MSR access bitmap. */
if ( cpu_has_vmx_msr_bitmap )
@@ -570,9 +609,10 @@ static int construct_vmcs(struct vcpu *v)
__vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
#endif
- __vmwrite(EXCEPTION_BITMAP, (HVM_TRAP_MASK |
- (1U << TRAP_page_fault) |
- (1U << TRAP_no_device)));
+ __vmwrite(EXCEPTION_BITMAP,
+ HVM_TRAP_MASK
+ | (paging_mode_hap(d) ? 0 : (1U << TRAP_page_fault))
+ | (1U << TRAP_no_device));
v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
hvm_update_guest_cr(v, 0);
@@ -587,6 +627,22 @@ static int construct_vmcs(struct vcpu *v)
__vmwrite(TPR_THRESHOLD, 0);
}
+ if ( paging_mode_hap(d) )
+ {
+ __vmwrite(EPT_POINTER, d->arch.hvm_domain.vmx.ept_control.eptp);
+#ifdef CONFIG_X86_PAE
+ __vmwrite(EPT_POINTER_HIGH,
+ d->arch.hvm_domain.vmx.ept_control.eptp >> 32);
+#endif
+ }
+
+ if ( cpu_has_vmx_vpid )
+ {
+ v->arch.hvm_vmx.vpid =
+ v->domain->arch.hvm_domain.vmx.vpid_base + v->vcpu_id;
+ __vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vmx.vpid);
+ }
+
vmx_vmcs_exit(v);
paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */
@@ -729,14 +785,14 @@ void vmx_destroy_vmcs(struct vcpu *v)
arch_vmx->vmcs = NULL;
}
-void vm_launch_fail(unsigned long eflags)
+void vm_launch_fail(void)
{
unsigned long error = __vmread(VM_INSTRUCTION_ERROR);
printk("<vm_launch_fail> error code %lx\n", error);
domain_crash_synchronous();
}
-void vm_resume_fail(unsigned long eflags)
+void vm_resume_fail(void)
{
unsigned long error = __vmread(VM_INSTRUCTION_ERROR);
printk("<vm_resume_fail> error code %lx\n", error);
@@ -780,6 +836,7 @@ void vmx_do_resume(struct vcpu *v)
vmx_load_vmcs(v);
hvm_migrate_timers(v);
vmx_set_host_env(v);
+ vpid_sync_vcpu_all(v);
}
debug_state = v->domain->debugger_attached;
@@ -932,6 +989,10 @@ void vmcs_dump_vcpu(struct vcpu *v)
(uint32_t)vmr(IDT_VECTORING_ERROR_CODE));
printk("TPR Threshold = 0x%02x\n",
(uint32_t)vmr(TPR_THRESHOLD));
+ printk("EPT pointer = 0x%08x%08x\n",
+ (uint32_t)vmr(EPT_POINTER_HIGH), (uint32_t)vmr(EPT_POINTER));
+ printk("Virtual processor ID = 0x%04x\n",
+ (uint32_t)vmr(VIRTUAL_PROCESSOR_ID));
vmx_vmcs_exit(v);
}
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 29dcb68503..628cbddfcf 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -57,6 +57,8 @@ static void vmx_ctxt_switch_to(struct vcpu *v);
static int vmx_alloc_vlapic_mapping(struct domain *d);
static void vmx_free_vlapic_mapping(struct domain *d);
+static int vmx_alloc_vpid(struct domain *d);
+static void vmx_free_vpid(struct domain *d);
static void vmx_install_vlapic_mapping(struct vcpu *v);
static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr);
static void vmx_update_guest_efer(struct vcpu *v);
@@ -71,12 +73,30 @@ static void vmx_invlpg_intercept(unsigned long vaddr);
static int vmx_domain_initialise(struct domain *d)
{
- return vmx_alloc_vlapic_mapping(d);
+ int rc;
+
+ d->arch.hvm_domain.vmx.ept_control.etmt = EPT_DEFAULT_MT;
+ d->arch.hvm_domain.vmx.ept_control.gaw = EPT_DEFAULT_GAW;
+ d->arch.hvm_domain.vmx.ept_control.asr =
+ pagetable_get_pfn(d->arch.phys_table);
+
+ if ( (rc = vmx_alloc_vpid(d)) != 0 )
+ return rc;
+
+ if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 )
+ {
+ vmx_free_vpid(d);
+ return rc;
+ }
+
+ return 0;
}
static void vmx_domain_destroy(struct domain *d)
{
+ ept_sync_domain(d);
vmx_free_vlapic_mapping(d);
+ vmx_free_vpid(d);
}
static int vmx_vcpu_initialise(struct vcpu *v)
@@ -492,20 +512,23 @@ static int vmx_restore_cr0_cr3(
unsigned long mfn = 0;
p2m_type_t p2mt;
- if ( cr0 & X86_CR0_PG )
+ if ( paging_mode_shadow(v->domain) )
{
- mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt));
- if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
+ if ( cr0 & X86_CR0_PG )
{
- gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%lx\n", cr3);
- return -EINVAL;
+ mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt));
+ if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
+ {
+ gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%lx\n", cr3);
+ return -EINVAL;
+ }
}
- }
- if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
- put_page(pagetable_get_page(v->arch.guest_table));
+ if ( hvm_paging_enabled(v) )
+ put_page(pagetable_get_page(v->arch.guest_table));
- v->arch.guest_table = pagetable_from_pfn(mfn);
+ v->arch.guest_table = pagetable_from_pfn(mfn);
+ }
v->arch.hvm_vcpu.guest_cr[0] = cr0 | X86_CR0_ET;
v->arch.hvm_vcpu.guest_cr[3] = cr3;
@@ -538,11 +561,6 @@ static int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
vmx_update_guest_cr(v, 2);
vmx_update_guest_cr(v, 4);
-#ifdef HVM_DEBUG_SUSPEND
- printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
- __func__, c->cr3, c->cr0, c->cr4);
-#endif
-
v->arch.hvm_vcpu.guest_efer = c->msr_efer;
vmx_update_guest_efer(v);
@@ -573,20 +591,6 @@ static int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
return 0;
}
-#if defined(__x86_64__) && defined(HVM_DEBUG_SUSPEND)
-static void dump_msr_state(struct vmx_msr_state *m)
-{
- int i = 0;
- printk("**** msr state ****\n");
- printk("shadow_gs=0x%lx, flags=0x%lx, msr_items:", m->shadow_gs, m->flags);
- for ( i = 0; i < VMX_MSR_COUNT; i++ )
- printk("0x%lx,", m->msrs[i]);
- printk("\n");
-}
-#else
-#define dump_msr_state(m) ((void)0)
-#endif
-
static void vmx_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
{
#ifdef __x86_64__
@@ -604,8 +608,6 @@ static void vmx_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
#endif
data->tsc = hvm_get_guest_time(v);
-
- dump_msr_state(guest_state);
}
static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
@@ -624,8 +626,6 @@ static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
#endif
hvm_set_guest_time(v, data->tsc);
-
- dump_msr_state(guest_state);
}
@@ -900,6 +900,56 @@ static void vmx_set_interrupt_shadow(struct vcpu *v, unsigned int intr_shadow)
__vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
}
+static void vmx_load_pdptrs(struct vcpu *v)
+{
+ unsigned long cr3 = v->arch.hvm_vcpu.guest_cr[3], mfn;
+ uint64_t *guest_pdptrs;
+ p2m_type_t p2mt;
+ char *p;
+
+ /* EPT needs to load PDPTRS into VMCS for PAE. */
+ if ( !hvm_pae_enabled(v) || (v->arch.hvm_vcpu.guest_efer & EFER_LMA) )
+ return;
+
+ if ( cr3 & 0x1fUL )
+ goto crash;
+
+ mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt));
+ if ( !p2m_is_ram(p2mt) )
+ goto crash;
+
+ p = map_domain_page(mfn);
+
+ guest_pdptrs = (uint64_t *)(p + (cr3 & ~PAGE_MASK));
+
+ /*
+ * We do not check the PDPTRs for validity. The CPU will do this during
+ * vm entry, and we can handle the failure there and crash the guest.
+ * The only thing we could do better here is #GP instead.
+ */
+
+ vmx_vmcs_enter(v);
+
+ __vmwrite(GUEST_PDPTR0, guest_pdptrs[0]);
+ __vmwrite(GUEST_PDPTR1, guest_pdptrs[1]);
+ __vmwrite(GUEST_PDPTR2, guest_pdptrs[2]);
+ __vmwrite(GUEST_PDPTR3, guest_pdptrs[3]);
+#ifdef CONFIG_X86_PAE
+ __vmwrite(GUEST_PDPTR0_HIGH, guest_pdptrs[0] >> 32);
+ __vmwrite(GUEST_PDPTR1_HIGH, guest_pdptrs[1] >> 32);
+ __vmwrite(GUEST_PDPTR2_HIGH, guest_pdptrs[2] >> 32);
+ __vmwrite(GUEST_PDPTR3_HIGH, guest_pdptrs[3] >> 32);
+#endif
+
+ vmx_vmcs_exit(v);
+
+ unmap_domain_page(p);
+ return;
+
+ crash:
+ domain_crash(v->domain);
+}
+
static void vmx_update_host_cr3(struct vcpu *v)
{
vmx_vmcs_enter(v);
@@ -915,7 +965,24 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
{
case 0: {
unsigned long hw_cr0_mask =
- X86_CR0_NE | X86_CR0_PG | X86_CR0_WP | X86_CR0_PE;
+ X86_CR0_NE | X86_CR0_PG | X86_CR0_PE;
+
+ if ( paging_mode_shadow(v->domain) )
+ hw_cr0_mask |= X86_CR0_WP;
+
+ if ( paging_mode_hap(v->domain) )
+ {
+ /* We manage GUEST_CR3 when guest CR0.PE is zero. */
+ uint32_t cr3_ctls = (CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING);
+ v->arch.hvm_vmx.exec_control &= ~cr3_ctls;
+ if ( !hvm_paging_enabled(v) )
+ v->arch.hvm_vmx.exec_control |= cr3_ctls;
+ __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+
+ /* Changing CR0.PE can change some bits in real CR4. */
+ vmx_update_guest_cr(v, 4);
+ }
if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
{
@@ -939,11 +1006,27 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
/* CR2 is updated in exit stub. */
break;
case 3:
+ if ( paging_mode_hap(v->domain) )
+ {
+ if ( !hvm_paging_enabled(v) )
+ v->arch.hvm_vcpu.hw_cr[3] =
+ v->domain->arch.hvm_domain.params[HVM_PARAM_IDENT_PT];
+ vmx_load_pdptrs(v);
+ }
+
__vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]);
+ vpid_sync_vcpu_all(v);
break;
case 4:
- v->arch.hvm_vcpu.hw_cr[4] =
- v->arch.hvm_vcpu.guest_cr[4] | HVM_CR4_HOST_MASK;
+ v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK;
+ if ( paging_mode_hap(v->domain) )
+ v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;
+ v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4];
+ if ( paging_mode_hap(v->domain) && !hvm_paging_enabled(v) )
+ {
+ v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_PSE;
+ v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;
+ }
__vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]);
break;
@@ -978,12 +1061,29 @@ static void vmx_update_guest_efer(struct vcpu *v)
static void vmx_flush_guest_tlbs(void)
{
- /* No tagged TLB support on VMX yet. The fact that we're in Xen
- * at all means any guest will have a clean TLB when it's next run,
- * because VMRESUME will flush it for us. */
+ /*
+ * If VPID (i.e. tagged TLB support) is not enabled, the fact that
+ * we're in Xen at all means any guest will have a clean TLB when
+ * it's next run, because VMRESUME will flush it for us.
+ *
+ * If enabled, we invalidate all translations associated with all
+ * VPID values.
+ */
+ vpid_sync_all();
}
+static void __ept_sync_domain(void *info)
+{
+ struct domain *d = info;
+ __invept(1, d->arch.hvm_domain.vmx.ept_control.eptp, 0);
+}
+void ept_sync_domain(struct domain *d)
+{
+ /* Only if using EPT and this domain has some VCPUs to dirty. */
+ if ( d->arch.hvm_domain.hap_enabled && d->vcpu[0] )
+ on_each_cpu(__ept_sync_domain, d, 1, 1);
+}
static void __vmx_inject_exception(
struct vcpu *v, int trap, int type, int error_code)
@@ -1100,6 +1200,9 @@ static struct hvm_function_table vmx_function_table = {
.invlpg_intercept = vmx_invlpg_intercept
};
+static unsigned long *vpid_bitmap;
+#define VPID_BITMAP_SIZE ((1u << VMCS_VPID_WIDTH) / MAX_VIRT_CPUS)
+
void start_vmx(void)
{
static int bootstrapped;
@@ -1133,6 +1236,25 @@ void start_vmx(void)
return;
}
+ if ( cpu_has_vmx_ept )
+ {
+ printk("VMX: EPT is available.\n");
+ vmx_function_table.hap_supported = 1;
+ }
+
+ if ( cpu_has_vmx_vpid )
+ {
+ printk("VMX: VPID is available.\n");
+
+ vpid_bitmap = xmalloc_array(
+ unsigned long, BITS_TO_LONGS(VPID_BITMAP_SIZE));
+ BUG_ON(vpid_bitmap == NULL);
+ memset(vpid_bitmap, 0, BITS_TO_LONGS(VPID_BITMAP_SIZE) * sizeof(long));
+
+ /* VPID 0 is used by VMX root mode (the hypervisor). */
+ __set_bit(0, vpid_bitmap);
+ }
+
setup_vmcs_dump();
hvm_enable(&vmx_function_table);
@@ -1635,18 +1757,47 @@ static int vmx_alloc_vlapic_mapping(struct domain *d)
share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable);
set_mmio_p2m_entry(
d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), _mfn(virt_to_mfn(apic_va)));
- d->arch.hvm_domain.vmx_apic_access_mfn = virt_to_mfn(apic_va);
+ d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va);
return 0;
}
static void vmx_free_vlapic_mapping(struct domain *d)
{
- unsigned long mfn = d->arch.hvm_domain.vmx_apic_access_mfn;
+ unsigned long mfn = d->arch.hvm_domain.vmx.apic_access_mfn;
if ( mfn != 0 )
free_xenheap_page(mfn_to_virt(mfn));
}
+static int vmx_alloc_vpid(struct domain *d)
+{
+ int idx;
+
+ if ( !cpu_has_vmx_vpid )
+ return 0;
+
+ do {
+ idx = find_first_zero_bit(vpid_bitmap, VPID_BITMAP_SIZE);
+ if ( idx >= VPID_BITMAP_SIZE )
+ {
+ dprintk(XENLOG_WARNING, "VMX VPID space exhausted.\n");
+ return -EBUSY;
+ }
+ }
+ while ( test_and_set_bit(idx, vpid_bitmap) );
+
+ d->arch.hvm_domain.vmx.vpid_base = idx * MAX_VIRT_CPUS;
+ return 0;
+}
+
+static void vmx_free_vpid(struct domain *d)
+{
+ if ( !cpu_has_vmx_vpid )
+ return;
+
+ clear_bit(d->arch.hvm_domain.vmx.vpid_base / MAX_VIRT_CPUS, vpid_bitmap);
+}
+
static void vmx_install_vlapic_mapping(struct vcpu *v)
{
paddr_t virt_page_ma, apic_page_ma;
@@ -1655,7 +1806,7 @@ static void vmx_install_vlapic_mapping(struct vcpu *v)
return;
virt_page_ma = page_to_maddr(vcpu_vlapic(v)->regs_page);
- apic_page_ma = v->domain->arch.hvm_domain.vmx_apic_access_mfn;
+ apic_page_ma = v->domain->arch.hvm_domain.vmx.apic_access_mfn;
apic_page_ma <<= PAGE_SHIFT;
vmx_vmcs_enter(v);
@@ -1900,6 +2051,51 @@ static void vmx_wbinvd_intercept(void)
wbinvd();
}
+static void ept_handle_violation(unsigned long qualification, paddr_t gpa)
+{
+ unsigned long gla_validity = qualification & EPT_GLA_VALIDITY_MASK;
+ struct domain *d = current->domain;
+ unsigned long gfn = gpa >> PAGE_SHIFT;
+ mfn_t mfn;
+ p2m_type_t t;
+
+ if ( unlikely(qualification & EPT_GAW_VIOLATION) )
+ {
+ gdprintk(XENLOG_ERR, "EPT violation: guest physical address %"PRIpaddr
+ " exceeded its width limit.\n", gpa);
+ goto crash;
+ }
+
+ if ( unlikely(gla_validity == EPT_GLA_VALIDITY_RSVD) ||
+ unlikely(gla_validity == EPT_GLA_VALIDITY_PDPTR_LOAD) )
+ {
+ gdprintk(XENLOG_ERR, "EPT violation: reserved bit or "
+ "pdptr load violation.\n");
+ goto crash;
+ }
+
+ mfn = gfn_to_mfn(d, gfn, &t);
+ if ( p2m_is_ram(t) && paging_mode_log_dirty(d) )
+ {
+ paging_mark_dirty(d, mfn_x(mfn));
+ p2m_change_type(d, gfn, p2m_ram_logdirty, p2m_ram_rw);
+ flush_tlb_mask(d->domain_dirty_cpumask);
+ return;
+ }
+
+ /* This can only happen in log-dirty mode, writing back A/D bits. */
+ if ( unlikely(gla_validity == EPT_GLA_VALIDITY_GPT_WALK) )
+ goto crash;
+
+ ASSERT(gla_validity == EPT_GLA_VALIDITY_MATCH);
+ handle_mmio();
+
+ return;
+
+ crash:
+ domain_crash(d);
+}
+
static void vmx_failed_vmentry(unsigned int exit_reason,
struct cpu_user_regs *regs)
{
@@ -1939,6 +2135,10 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
unsigned long exit_qualification, inst_len = 0;
struct vcpu *v = current;
+ if ( paging_mode_hap(v->domain) && hvm_paging_enabled(v) )
+ v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] =
+ __vmread(GUEST_CR3);
+
exit_reason = __vmread(VM_EXIT_REASON);
hvmtrace_vmexit(v, regs->eip, exit_reason);
@@ -2171,6 +2371,17 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
break;
}
+ case EXIT_REASON_EPT_VIOLATION:
+ {
+ paddr_t gpa = __vmread(GUEST_PHYSICAL_ADDRESS);
+#ifdef CONFIG_X86_PAE
+ gpa |= (paddr_t)__vmread(GUEST_PHYSICAL_ADDRESS_HIGH) << 32;
+#endif
+ exit_qualification = __vmread(EXIT_QUALIFICATION);
+ ept_handle_violation(exit_qualification, gpa);
+ break;
+ }
+
default:
exit_and_crash:
gdprintk(XENLOG_ERR, "Bad vmexit (reason %x)\n", exit_reason);
diff --git a/xen/arch/x86/hvm/vmx/x86_32/exits.S b/xen/arch/x86/hvm/vmx/x86_32/exits.S
index 11db8cfc21..eff089a112 100644
--- a/xen/arch/x86/hvm/vmx/x86_32/exits.S
+++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S
@@ -129,7 +129,6 @@ ENTRY(vmx_asm_do_vmentry)
/*vmx_resume:*/
HVM_RESTORE_ALL_NOSEGREGS
VMRESUME
- pushf
call vm_resume_fail
ud2
@@ -137,7 +136,6 @@ vmx_launch:
movb $1,VCPU_vmx_launched(%ebx)
HVM_RESTORE_ALL_NOSEGREGS
VMLAUNCH
- pushf
call vm_launch_fail
ud2
diff --git a/xen/arch/x86/hvm/vmx/x86_64/exits.S b/xen/arch/x86/hvm/vmx/x86_64/exits.S
index 48da4869bd..56fdb8ad54 100644
--- a/xen/arch/x86/hvm/vmx/x86_64/exits.S
+++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S
@@ -148,7 +148,6 @@ ENTRY(vmx_asm_do_vmentry)
/*vmx_resume:*/
HVM_RESTORE_ALL_NOSEGREGS
VMRESUME
- pushfq
call vm_resume_fail
ud2
@@ -156,7 +155,6 @@ vmx_launch:
movb $1,VCPU_vmx_launched(%rbx)
HVM_RESTORE_ALL_NOSEGREGS
VMLAUNCH
- pushfq
call vm_launch_fail
ud2
diff --git a/xen/arch/x86/hvm/vpic.c b/xen/arch/x86/hvm/vpic.c
index ce3943eaab..a3d6f2d9ca 100644
--- a/xen/arch/x86/hvm/vpic.c
+++ b/xen/arch/x86/hvm/vpic.c
@@ -319,7 +319,7 @@ static int vpic_intercept_pic_io(
if ( bytes != 1 )
{
gdprintk(XENLOG_WARNING, "PIC_IO bad access size %d\n", bytes);
- return 1;
+ return X86EMUL_OKAY;
}
vpic = &current->domain->arch.hvm_domain.vpic[port >> 7];
@@ -329,7 +329,7 @@ static int vpic_intercept_pic_io(
else
*val = (uint8_t)vpic_ioport_read(vpic, port);
- return 1;
+ return X86EMUL_OKAY;
}
static int vpic_intercept_elcr_io(
@@ -338,11 +338,7 @@ static int vpic_intercept_elcr_io(
struct hvm_hw_vpic *vpic;
uint32_t data;
- if ( bytes != 1 )
- {
- gdprintk(XENLOG_WARNING, "PIC_IO bad access size %d\n", bytes);
- return 1;
- }
+ BUG_ON(bytes != 1);
vpic = &current->domain->arch.hvm_domain.vpic[port & 1];
@@ -360,34 +356,8 @@ static int vpic_intercept_elcr_io(
*val = vpic->elcr & vpic_elcr_mask(vpic);
}
- return 1;
-}
-
-#ifdef HVM_DEBUG_SUSPEND
-static void vpic_info(struct hvm_hw_vpic *s)
-{
- printk("*****pic state:*****\n");
- printk("pic 0x%x.\n", s->irr);
- printk("pic 0x%x.\n", s->imr);
- printk("pic 0x%x.\n", s->isr);
- printk("pic 0x%x.\n", s->irq_base);
- printk("pic 0x%x.\n", s->init_state);
- printk("pic 0x%x.\n", s->priority_add);
- printk("pic 0x%x.\n", s->readsel_isr);
- printk("pic 0x%x.\n", s->poll);
- printk("pic 0x%x.\n", s->auto_eoi);
- printk("pic 0x%x.\n", s->rotate_on_auto_eoi);
- printk("pic 0x%x.\n", s->special_fully_nested_mode);
- printk("pic 0x%x.\n", s->special_mask_mode);
- printk("pic 0x%x.\n", s->elcr);
- printk("pic 0x%x.\n", s->int_output);
- printk("pic 0x%x.\n", s->is_master);
-}
-#else
-static void vpic_info(struct hvm_hw_vpic *s)
-{
+ return X86EMUL_OKAY;
}
-#endif
static int vpic_save(struct domain *d, hvm_domain_context_t *h)
{
@@ -398,7 +368,6 @@ static int vpic_save(struct domain *d, hvm_domain_context_t *h)
for ( i = 0; i < 2 ; i++ )
{
s = &d->arch.hvm_domain.vpic[i];
- vpic_info(s);
if ( hvm_save_entry(PIC, i, h, s) )
return 1;
}
@@ -421,7 +390,6 @@ static int vpic_load(struct domain *d, hvm_domain_context_t *h)
if ( hvm_load_entry(PIC, h, s) != 0 )
return -EINVAL;
- vpic_info(s);
return 0;
}
diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
index 9ccbefd22a..b7e50ae8f1 100644
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -1244,7 +1244,11 @@ static void __init setup_ioapic_ids_from_mpc(void) { }
*/
static int __init timer_irq_works(void)
{
- unsigned long t1 = jiffies;
+ extern unsigned long pit0_ticks;
+ unsigned long t1;
+
+ t1 = pit0_ticks;
+ mb();
local_irq_enable();
/* Let ten ticks pass... */
@@ -1257,7 +1261,8 @@ static int __init timer_irq_works(void)
* might have cached one ExtINT interrupt. Finally, at
* least one tick may be lost due to delays.
*/
- if (jiffies - t1 > 4)
+ mb();
+ if (pit0_ticks - t1 > 4)
return 1;
return 0;
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index a1220af3b3..15f2cf57eb 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -299,7 +299,7 @@ int memory_is_conventional_ram(paddr_t p)
unsigned long domain_get_maximum_gpfn(struct domain *d)
{
if ( is_hvm_domain(d) )
- return d->arch.p2m.max_mapped_pfn;
+ return d->arch.p2m->max_mapped_pfn;
/* NB. PV guests specify nr_pfns rather than max_pfn so we adjust here. */
return arch_get_max_pfn(d) - 1;
}
@@ -476,7 +476,7 @@ static void invalidate_shadow_ldt(struct vcpu *v)
if ( pfn == 0 ) continue;
l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
page = mfn_to_page(pfn);
- ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
+ ASSERT_PAGE_IS_TYPE(page, PGT_seg_desc_page);
ASSERT_PAGE_IS_DOMAIN(page, v->domain);
put_page_and_type(page);
}
@@ -530,7 +530,7 @@ int map_ldt_shadow_page(unsigned int off)
if ( unlikely(!mfn_valid(mfn)) )
return 0;
- okay = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
+ okay = get_page_and_type(mfn_to_page(mfn), d, PGT_seg_desc_page);
if ( unlikely(!okay) )
return 0;
@@ -924,7 +924,7 @@ void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
{
/* We expect this is rare so we blow the entire shadow LDT. */
if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) ==
- PGT_ldt_page)) &&
+ PGT_seg_desc_page)) &&
unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) &&
(d == e) )
{
@@ -1748,8 +1748,7 @@ static int alloc_page_type(struct page_info *page, unsigned long type)
return alloc_l3_table(page);
case PGT_l4_page_table:
return alloc_l4_table(page);
- case PGT_gdt_page:
- case PGT_ldt_page:
+ case PGT_seg_desc_page:
return alloc_segdesc_page(page);
default:
printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n",
@@ -2189,7 +2188,7 @@ int do_mmuext_op(
goto out;
}
- LOCK_BIGLOCK(d);
+ domain_lock(d);
for ( i = 0; i < count; i++ )
{
@@ -2438,7 +2437,7 @@ int do_mmuext_op(
process_deferred_ops();
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
perfc_add(num_mmuext_ops, i);
@@ -2493,7 +2492,7 @@ int do_mmu_update(
domain_mmap_cache_init(&mapcache);
- LOCK_BIGLOCK(d);
+ domain_lock(d);
for ( i = 0; i < count; i++ )
{
@@ -2665,7 +2664,7 @@ int do_mmu_update(
process_deferred_ops();
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
domain_mmap_cache_destroy(&mapcache);
@@ -2694,7 +2693,7 @@ static int create_grant_pte_mapping(
l1_pgentry_t ol1e;
struct domain *d = v->domain;
- ASSERT(spin_is_locked(&d->big_lock));
+ ASSERT(domain_is_locked(d));
adjust_guest_l1e(nl1e, d);
@@ -2817,7 +2816,7 @@ static int create_grant_va_mapping(
unsigned long gl1mfn;
int okay;
- ASSERT(spin_is_locked(&d->big_lock));
+ ASSERT(domain_is_locked(d));
adjust_guest_l1e(nl1e, d);
@@ -3015,7 +3014,7 @@ int do_update_va_mapping(unsigned long va, u64 val64,
if ( rc )
return rc;
- LOCK_BIGLOCK(d);
+ domain_lock(d);
pl1e = guest_map_l1e(v, va, &gl1mfn);
@@ -3028,7 +3027,7 @@ int do_update_va_mapping(unsigned long va, u64 val64,
process_deferred_ops();
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
switch ( flags & UVMF_FLUSHTYPE_MASK )
{
@@ -3134,7 +3133,7 @@ long set_gdt(struct vcpu *v,
{
mfn = frames[i] = gmfn_to_mfn(d, frames[i]);
if ( !mfn_valid(mfn) ||
- !get_page_and_type(mfn_to_page(mfn), d, PGT_gdt_page) )
+ !get_page_and_type(mfn_to_page(mfn), d, PGT_seg_desc_page) )
goto fail;
}
@@ -3173,12 +3172,12 @@ long do_set_gdt(XEN_GUEST_HANDLE(ulong) frame_list, unsigned int entries)
if ( copy_from_guest(frames, frame_list, nr_pages) )
return -EFAULT;
- LOCK_BIGLOCK(curr->domain);
+ domain_lock(curr->domain);
if ( (ret = set_gdt(curr, frames, entries)) == 0 )
flush_tlb_local();
- UNLOCK_BIGLOCK(curr->domain);
+ domain_unlock(curr->domain);
return ret;
}
@@ -3211,12 +3210,8 @@ long do_update_descriptor(u64 pa, u64 desc)
/* Check if the given frame is in use in an unsafe context. */
switch ( page->u.inuse.type_info & PGT_type_mask )
{
- case PGT_gdt_page:
- if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
- goto out;
- break;
- case PGT_ldt_page:
- if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
+ case PGT_seg_desc_page:
+ if ( unlikely(!get_page_type(page, PGT_seg_desc_page)) )
goto out;
break;
default:
@@ -3316,7 +3311,7 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
return -EINVAL;
}
- LOCK_BIGLOCK(d);
+ domain_lock(d);
/* Remove previously mapped page if it was present. */
prev_mfn = gmfn_to_mfn(d, xatp.gpfn);
@@ -3338,7 +3333,7 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
/* Map at new location. */
guest_physmap_add_page(d, xatp.gpfn, mfn);
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
rcu_unlock_domain(d);
@@ -3674,7 +3669,7 @@ int ptwr_do_page_fault(struct vcpu *v, unsigned long addr,
struct ptwr_emulate_ctxt ptwr_ctxt;
int rc;
- LOCK_BIGLOCK(d);
+ domain_lock(d);
/* Attempt to read the PTE that maps the VA being accessed. */
guest_get_eff_l1e(v, addr, &pte);
@@ -3699,12 +3694,12 @@ int ptwr_do_page_fault(struct vcpu *v, unsigned long addr,
if ( rc == X86EMUL_UNHANDLEABLE )
goto bail;
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
perfc_incr(ptwr_emulations);
return EXCRET_fault_fixed;
bail:
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
return 0;
}
diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
index 160e5f36bf..64cb72786e 100644
--- a/xen/arch/x86/mm/hap/Makefile
+++ b/xen/arch/x86/mm/hap/Makefile
@@ -2,6 +2,7 @@ obj-y += hap.o
obj-y += guest_walk_2level.o
obj-y += guest_walk_3level.o
obj-y += guest_walk_4level.o
+obj-y += p2m-ept.o
guest_levels = $(subst level,,$(filter %level,$(subst ., ,$(subst _, ,$(1)))))
guest_walk_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1))
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index 15cdc23c96..e30acf6948 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -38,6 +38,7 @@
#include <asm/hap.h>
#include <asm/paging.h>
#include <asm/domain.h>
+#include <xen/numa.h>
#include "private.h"
@@ -61,7 +62,7 @@ int hap_enable_log_dirty(struct domain *d)
hap_unlock(d);
/* set l1e entries of P2M table to be read-only. */
- p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
+ p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
flush_tlb_mask(d->domain_dirty_cpumask);
return 0;
}
@@ -73,14 +74,14 @@ int hap_disable_log_dirty(struct domain *d)
hap_unlock(d);
/* set l1e entries of P2M table with normal mode */
- p2m_change_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
+ p2m_change_entry_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
return 0;
}
void hap_clean_dirty_bitmap(struct domain *d)
{
/* set l1e entries of P2M table to be read-only. */
- p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
+ p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
flush_tlb_mask(d->domain_dirty_cpumask);
}
@@ -135,7 +136,8 @@ static struct page_info *hap_alloc_p2m_page(struct domain *d)
&& mfn_x(page_to_mfn(pg)) >= (1UL << (32 - PAGE_SHIFT)) )
{
free_domheap_page(pg);
- pg = alloc_domheap_pages(NULL, 0, MEMF_bits(32));
+ pg = alloc_domheap_page(
+ NULL, MEMF_bits(32) | MEMF_node(domain_to_node(d)));
if ( likely(pg != NULL) )
{
void *p = hap_map_domain_page(page_to_mfn(pg));
@@ -199,7 +201,7 @@ hap_set_allocation(struct domain *d, unsigned int pages, int *preempted)
if ( d->arch.paging.hap.total_pages < pages )
{
/* Need to allocate more memory from domheap */
- pg = alloc_domheap_page(NULL);
+ pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
if ( pg == NULL )
{
HAP_PRINTK("failed to allocate hap pages.\n");
diff --git a/xen/arch/x86/mm/hap/p2m-ept.c b/xen/arch/x86/mm/hap/p2m-ept.c
new file mode 100644
index 0000000000..697ca4d697
--- /dev/null
+++ b/xen/arch/x86/mm/hap/p2m-ept.c
@@ -0,0 +1,257 @@
+/*
+ * ept-p2m.c: use the EPT page table as p2m
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/domain_page.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#include <asm/types.h>
+#include <asm/domain.h>
+#include <asm/p2m.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <xen/iommu.h>
+
+static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type)
+{
+ switch(type)
+ {
+ case p2m_invalid:
+ case p2m_mmio_dm:
+ default:
+ return;
+ case p2m_ram_rw:
+ case p2m_mmio_direct:
+ entry->r = entry->w = entry->x = 1;
+ return;
+ case p2m_ram_logdirty:
+ case p2m_ram_ro:
+ entry->r = entry->x = 1;
+ entry->w = 0;
+ return;
+ }
+}
+
+static int ept_next_level(struct domain *d, bool_t read_only,
+ ept_entry_t **table, unsigned long *gfn_remainder,
+ u32 shift)
+{
+ ept_entry_t *ept_entry, *next;
+ u32 index;
+
+ index = *gfn_remainder >> shift;
+ *gfn_remainder &= (1UL << shift) - 1;
+
+ ept_entry = (*table) + index;
+
+ if ( !(ept_entry->epte & 0x7) )
+ {
+ struct page_info *pg;
+
+ if ( read_only )
+ return 0;
+
+ pg = d->arch.p2m->alloc_page(d);
+ if ( pg == NULL )
+ return 0;
+
+ pg->count_info = 1;
+ pg->u.inuse.type_info = 1 | PGT_validated;
+ list_add_tail(&pg->list, &d->arch.p2m->pages);
+
+ ept_entry->emt = 0;
+ ept_entry->sp_avail = 0;
+ ept_entry->avail1 = 0;
+ ept_entry->mfn = page_to_mfn(pg);
+ ept_entry->rsvd = 0;
+ ept_entry->avail2 = 0;
+ /* last step */
+ ept_entry->r = ept_entry->w = ept_entry->x = 1;
+ }
+
+ next = map_domain_page(ept_entry->mfn);
+ unmap_domain_page(*table);
+ *table = next;
+
+ return 1;
+}
+
+static int
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+{
+ ept_entry_t *table =
+ map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+ unsigned long gfn_remainder = gfn;
+ ept_entry_t *ept_entry = NULL;
+ u32 index;
+ int i, rv = 0;
+
+ /* Should check if gfn obeys GAW here */
+
+ for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
+ if ( !ept_next_level(d, 0, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER) )
+ goto out;
+
+ index = gfn_remainder;
+ ept_entry = table + index;
+
+ if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
+ {
+ /* Track the highest gfn for which we have ever had a valid mapping */
+ if ( gfn > d->arch.p2m->max_mapped_pfn )
+ d->arch.p2m->max_mapped_pfn = gfn;
+
+ ept_entry->emt = EPT_DEFAULT_MT;
+ ept_entry->sp_avail = 0;
+ ept_entry->avail1 = p2mt;
+ ept_entry->mfn = mfn_x(mfn);
+ ept_entry->rsvd = 0;
+ ept_entry->avail2 = 0;
+ /* last step */
+ ept_entry->r = ept_entry->w = ept_entry->x = 1;
+ ept_p2m_type_to_flags(ept_entry, p2mt);
+ }
+ else
+ ept_entry->epte = 0;
+
+ /* Success */
+ rv = 1;
+
+ out:
+ unmap_domain_page(table);
+
+ ept_sync_domain(d);
+
+ /* If p2m table is shared with vtd page-table. */
+ if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
+ iommu_flush(d, gfn, (u64*)ept_entry);
+
+ return rv;
+}
+
+/* Read ept p2m entries */
+static mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t)
+{
+ ept_entry_t *table =
+ map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+ unsigned long gfn_remainder = gfn;
+ ept_entry_t *ept_entry;
+ u32 index;
+ int i;
+ mfn_t mfn = _mfn(INVALID_MFN);
+
+ *t = p2m_mmio_dm;
+
+ /* This pfn is higher than the highest the p2m map currently holds */
+ if ( gfn > d->arch.p2m->max_mapped_pfn )
+ goto out;
+
+ /* Should check if gfn obeys GAW here. */
+
+ for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
+ if ( !ept_next_level(d, 1, &table, &gfn_remainder,
+ i * EPT_TABLE_ORDER) )
+ goto out;
+
+ index = gfn_remainder;
+ ept_entry = table + index;
+
+ if ( ept_entry->avail1 != p2m_invalid )
+ {
+ *t = ept_entry->avail1;
+ mfn = _mfn(ept_entry->mfn);
+ }
+
+ out:
+ unmap_domain_page(table);
+ return mfn;
+}
+
+static mfn_t ept_get_entry_current(unsigned long gfn, p2m_type_t *t)
+{
+ return ept_get_entry(current->domain, gfn, t);
+}
+
+/* Walk the whole p2m table, changing any entries of the old type
+ * to the new type. This is used in hardware-assisted paging to
+ * quickly enable or diable log-dirty tracking */
+
+static void ept_change_entry_type_global(struct domain *d,
+ p2m_type_t ot, p2m_type_t nt)
+{
+ ept_entry_t *l4e, *l3e, *l2e, *l1e;
+ int i4, i3, i2, i1;
+
+ if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
+ return;
+
+ BUG_ON(EPT_DEFAULT_GAW != 3);
+
+ l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+ for (i4 = 0; i4 < EPT_PAGETABLE_ENTRIES; i4++ )
+ {
+ if ( !l4e[i4].epte || l4e[i4].sp_avail )
+ continue;
+ l3e = map_domain_page(l4e[i4].mfn);
+ for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
+ {
+ if ( !l3e[i3].epte || l3e[i3].sp_avail )
+ continue;
+ l2e = map_domain_page(l3e[i3].mfn);
+ for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
+ {
+ if ( !l2e[i2].epte || l2e[i2].sp_avail )
+ continue;
+ l1e = map_domain_page(l2e[i2].mfn);
+ for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
+ {
+ if ( !l1e[i1].epte )
+ continue;
+ if ( l1e[i1].avail1 != ot )
+ continue;
+ l1e[i1].avail1 = nt;
+ ept_p2m_type_to_flags(l1e+i1, nt);
+ }
+ unmap_domain_page(l1e);
+ }
+ unmap_domain_page(l2e);
+ }
+ unmap_domain_page(l3e);
+ }
+ unmap_domain_page(l4e);
+
+ ept_sync_domain(d);
+}
+
+void ept_p2m_init(struct domain *d)
+{
+ d->arch.p2m->set_entry = ept_set_entry;
+ d->arch.p2m->get_entry = ept_get_entry;
+ d->arch.p2m->get_entry_current = ept_get_entry_current;
+ d->arch.p2m->change_entry_type_global = ept_change_entry_type_global;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index e8298fb3bd..faee13955e 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -27,6 +27,7 @@
#include <asm/page.h>
#include <asm/paging.h>
#include <asm/p2m.h>
+#include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
#include <xen/iommu.h>
/* Debugging and auditing of the P2M code? */
@@ -41,36 +42,37 @@
* Locking discipline: always acquire this lock before the shadow or HAP one
*/
-#define p2m_lock_init(_d) \
- do { \
- spin_lock_init(&(_d)->arch.p2m.lock); \
- (_d)->arch.p2m.locker = -1; \
- (_d)->arch.p2m.locker_function = "nobody"; \
+#define p2m_lock_init(_p2m) \
+ do { \
+ spin_lock_init(&(_p2m)->lock); \
+ (_p2m)->locker = -1; \
+ (_p2m)->locker_function = "nobody"; \
} while (0)
-#define p2m_lock(_d) \
- do { \
- if ( unlikely((_d)->arch.p2m.locker == current->processor) )\
- { \
- printk("Error: p2m lock held by %s\n", \
- (_d)->arch.p2m.locker_function); \
- BUG(); \
- } \
- spin_lock(&(_d)->arch.p2m.lock); \
- ASSERT((_d)->arch.p2m.locker == -1); \
- (_d)->arch.p2m.locker = current->processor; \
- (_d)->arch.p2m.locker_function = __func__; \
+#define p2m_lock(_p2m) \
+ do { \
+ if ( unlikely((_p2m)->locker == current->processor) ) \
+ { \
+ printk("Error: p2m lock held by %s\n", \
+ (_p2m)->locker_function); \
+ BUG(); \
+ } \
+ spin_lock(&(_p2m)->lock); \
+ ASSERT((_p2m)->locker == -1); \
+ (_p2m)->locker = current->processor; \
+ (_p2m)->locker_function = __func__; \
} while (0)
-#define p2m_unlock(_d) \
- do { \
- ASSERT((_d)->arch.p2m.locker == current->processor); \
- (_d)->arch.p2m.locker = -1; \
- (_d)->arch.p2m.locker_function = "nobody"; \
- spin_unlock(&(_d)->arch.p2m.lock); \
+#define p2m_unlock(_p2m) \
+ do { \
+ ASSERT((_p2m)->locker == current->processor); \
+ (_p2m)->locker = -1; \
+ (_p2m)->locker_function = "nobody"; \
+ spin_unlock(&(_p2m)->lock); \
} while (0)
-
+#define p2m_locked_by_me(_p2m) \
+ (current->processor == (_p2m)->locker)
/* Printouts */
#define P2M_PRINTK(_f, _a...) \
@@ -152,7 +154,7 @@ p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
l1_pgentry_t *p2m_entry;
l1_pgentry_t new_entry;
void *next;
- ASSERT(d->arch.p2m.alloc_page);
+ ASSERT(d->arch.p2m->alloc_page);
if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
shift, max)) )
@@ -160,10 +162,10 @@ p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) )
{
- struct page_info *pg = d->arch.p2m.alloc_page(d);
+ struct page_info *pg = d->arch.p2m->alloc_page(d);
if ( pg == NULL )
return 0;
- list_add_tail(&pg->list, &d->arch.p2m.pages);
+ list_add_tail(&pg->list, &d->arch.p2m->pages);
pg->u.inuse.type_info = type | 1 | PGT_validated;
pg->count_info = 1;
@@ -202,7 +204,7 @@ p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table,
// Returns 0 on error (out of memory)
static int
-set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
{
// XXX -- this might be able to be faster iff current->domain == d
mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
@@ -244,8 +246,8 @@ set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
ASSERT(p2m_entry);
/* Track the highest gfn for which we have ever had a valid mapping */
- if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) )
- d->arch.p2m.max_mapped_pfn = gfn;
+ if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
+ d->arch.p2m->max_mapped_pfn = gfn;
if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
@@ -279,14 +281,170 @@ set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
return rv;
}
+static mfn_t
+p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t)
+{
+ mfn_t mfn;
+ paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
+ l2_pgentry_t *l2e;
+ l1_pgentry_t *l1e;
+
+ ASSERT(paging_mode_translate(d));
+
+ /* XXX This is for compatibility with the old model, where anything not
+ * XXX marked as RAM was considered to be emulated MMIO space.
+ * XXX Once we start explicitly registering MMIO regions in the p2m
+ * XXX we will return p2m_invalid for unmapped gfns */
+ *t = p2m_mmio_dm;
+
+ mfn = pagetable_get_mfn(d->arch.phys_table);
+
+ if ( gfn > d->arch.p2m->max_mapped_pfn )
+ /* This pfn is higher than the highest the p2m map currently holds */
+ return _mfn(INVALID_MFN);
+
+#if CONFIG_PAGING_LEVELS >= 4
+ {
+ l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
+ l4e += l4_table_offset(addr);
+ if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
+ {
+ unmap_domain_page(l4e);
+ return _mfn(INVALID_MFN);
+ }
+ mfn = _mfn(l4e_get_pfn(*l4e));
+ unmap_domain_page(l4e);
+ }
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
+ {
+ l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
+#if CONFIG_PAGING_LEVELS == 3
+ /* On PAE hosts the p2m has eight l3 entries, not four (see
+ * shadow_set_p2m_entry()) so we can't use l3_table_offset.
+ * Instead, just count the number of l3es from zero. It's safe
+ * to do this because we already checked that the gfn is within
+ * the bounds of the p2m. */
+ l3e += (addr >> L3_PAGETABLE_SHIFT);
+#else
+ l3e += l3_table_offset(addr);
+#endif
+ if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
+ {
+ unmap_domain_page(l3e);
+ return _mfn(INVALID_MFN);
+ }
+ mfn = _mfn(l3e_get_pfn(*l3e));
+ unmap_domain_page(l3e);
+ }
+#endif
+
+ l2e = map_domain_page(mfn_x(mfn));
+ l2e += l2_table_offset(addr);
+ if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
+ {
+ unmap_domain_page(l2e);
+ return _mfn(INVALID_MFN);
+ }
+ mfn = _mfn(l2e_get_pfn(*l2e));
+ unmap_domain_page(l2e);
+
+ l1e = map_domain_page(mfn_x(mfn));
+ l1e += l1_table_offset(addr);
+ if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
+ {
+ unmap_domain_page(l1e);
+ return _mfn(INVALID_MFN);
+ }
+ mfn = _mfn(l1e_get_pfn(*l1e));
+ *t = p2m_flags_to_type(l1e_get_flags(*l1e));
+ unmap_domain_page(l1e);
+
+ ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
+ return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+}
+
+/* Read the current domain's p2m table (through the linear mapping). */
+static mfn_t p2m_gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
+{
+ mfn_t mfn = _mfn(INVALID_MFN);
+ p2m_type_t p2mt = p2m_mmio_dm;
+ /* XXX This is for compatibility with the old model, where anything not
+ * XXX marked as RAM was considered to be emulated MMIO space.
+ * XXX Once we start explicitly registering MMIO regions in the p2m
+ * XXX we will return p2m_invalid for unmapped gfns */
+
+ if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
+ {
+ l1_pgentry_t l1e = l1e_empty();
+ int ret;
+
+ ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
+ / sizeof(l1_pgentry_t));
+
+ /* Need to __copy_from_user because the p2m is sparse and this
+ * part might not exist */
+ ret = __copy_from_user(&l1e,
+ &phys_to_machine_mapping[gfn],
+ sizeof(l1e));
+
+ if ( ret == 0 ) {
+ p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+ ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+ if ( p2m_is_valid(p2mt) )
+ mfn = _mfn(l1e_get_pfn(l1e));
+ else
+ /* XXX see above */
+ p2mt = p2m_mmio_dm;
+ }
+ }
+
+ *t = p2mt;
+ return mfn;
+}
/* Init the datastructures for later use by the p2m code */
-void p2m_init(struct domain *d)
+int p2m_init(struct domain *d)
{
- p2m_lock_init(d);
- INIT_LIST_HEAD(&d->arch.p2m.pages);
+ struct p2m_domain *p2m;
+
+ p2m = xmalloc(struct p2m_domain);
+ if ( p2m == NULL )
+ return -ENOMEM;
+
+ d->arch.p2m = p2m;
+
+ memset(p2m, 0, sizeof(*p2m));
+ p2m_lock_init(p2m);
+ INIT_LIST_HEAD(&p2m->pages);
+
+ p2m->set_entry = p2m_set_entry;
+ p2m->get_entry = p2m_gfn_to_mfn;
+ p2m->get_entry_current = p2m_gfn_to_mfn_current;
+ p2m->change_entry_type_global = p2m_change_type_global;
+
+ if ( is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) )
+ ept_p2m_init(d);
+
+ return 0;
}
+void p2m_change_entry_type_global(struct domain *d,
+ p2m_type_t ot, p2m_type_t nt)
+{
+ struct p2m_domain *p2m = d->arch.p2m;
+
+ p2m_lock(p2m);
+ p2m->change_entry_type_global(d, ot, nt);
+ p2m_unlock(p2m);
+}
+
+static inline
+int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
+{
+ return d->arch.p2m->set_entry(d, gfn, mfn, p2mt);
+}
// Allocate a new p2m table for a domain.
//
@@ -308,28 +466,29 @@ int p2m_alloc_table(struct domain *d,
struct page_info *page, *p2m_top;
unsigned int page_count = 0;
unsigned long gfn = -1UL;
+ struct p2m_domain *p2m = d->arch.p2m;
- p2m_lock(d);
+ p2m_lock(p2m);
if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
{
P2M_ERROR("p2m already allocated for this domain\n");
- p2m_unlock(d);
+ p2m_unlock(p2m);
return -EINVAL;
}
P2M_PRINTK("allocating p2m table\n");
- d->arch.p2m.alloc_page = alloc_page;
- d->arch.p2m.free_page = free_page;
+ p2m->alloc_page = alloc_page;
+ p2m->free_page = free_page;
- p2m_top = d->arch.p2m.alloc_page(d);
+ p2m_top = p2m->alloc_page(d);
if ( p2m_top == NULL )
{
- p2m_unlock(d);
+ p2m_unlock(p2m);
return -ENOMEM;
}
- list_add_tail(&p2m_top->list, &d->arch.p2m.pages);
+ list_add_tail(&p2m_top->list, &p2m->pages);
p2m_top->count_info = 1;
p2m_top->u.inuse.type_info =
@@ -376,13 +535,13 @@ int p2m_alloc_table(struct domain *d,
#endif
P2M_PRINTK("p2m table initialised (%u pages)\n", page_count);
- p2m_unlock(d);
+ p2m_unlock(p2m);
return 0;
error:
P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%"
PRI_mfn "\n", gfn, mfn_x(mfn));
- p2m_unlock(d);
+ p2m_unlock(p2m);
return -ENOMEM;
}
@@ -392,101 +551,24 @@ void p2m_teardown(struct domain *d)
{
struct list_head *entry, *n;
struct page_info *pg;
+ struct p2m_domain *p2m = d->arch.p2m;
- p2m_lock(d);
+ p2m_lock(p2m);
d->arch.phys_table = pagetable_null();
- list_for_each_safe(entry, n, &d->arch.p2m.pages)
+ list_for_each_safe(entry, n, &p2m->pages)
{
pg = list_entry(entry, struct page_info, list);
list_del(entry);
- d->arch.p2m.free_page(d, pg);
+ p2m->free_page(d, pg);
}
- p2m_unlock(d);
+ p2m_unlock(p2m);
}
-mfn_t
-gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t)
-/* Read another domain's p2m entries */
+void p2m_final_teardown(struct domain *d)
{
- mfn_t mfn;
- paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
- l2_pgentry_t *l2e;
- l1_pgentry_t *l1e;
-
- ASSERT(paging_mode_translate(d));
-
- /* XXX This is for compatibility with the old model, where anything not
- * XXX marked as RAM was considered to be emulated MMIO space.
- * XXX Once we start explicitly registering MMIO regions in the p2m
- * XXX we will return p2m_invalid for unmapped gfns */
- *t = p2m_mmio_dm;
-
- mfn = pagetable_get_mfn(d->arch.phys_table);
-
- if ( gfn > d->arch.p2m.max_mapped_pfn )
- /* This pfn is higher than the highest the p2m map currently holds */
- return _mfn(INVALID_MFN);
-
-#if CONFIG_PAGING_LEVELS >= 4
- {
- l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn));
- l4e += l4_table_offset(addr);
- if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
- {
- unmap_domain_page(l4e);
- return _mfn(INVALID_MFN);
- }
- mfn = _mfn(l4e_get_pfn(*l4e));
- unmap_domain_page(l4e);
- }
-#endif
-#if CONFIG_PAGING_LEVELS >= 3
- {
- l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn));
-#if CONFIG_PAGING_LEVELS == 3
- /* On PAE hosts the p2m has eight l3 entries, not four (see
- * shadow_set_p2m_entry()) so we can't use l3_table_offset.
- * Instead, just count the number of l3es from zero. It's safe
- * to do this because we already checked that the gfn is within
- * the bounds of the p2m. */
- l3e += (addr >> L3_PAGETABLE_SHIFT);
-#else
- l3e += l3_table_offset(addr);
-#endif
- if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
- {
- unmap_domain_page(l3e);
- return _mfn(INVALID_MFN);
- }
- mfn = _mfn(l3e_get_pfn(*l3e));
- unmap_domain_page(l3e);
- }
-#endif
-
- l2e = map_domain_page(mfn_x(mfn));
- l2e += l2_table_offset(addr);
- if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
- {
- unmap_domain_page(l2e);
- return _mfn(INVALID_MFN);
- }
- mfn = _mfn(l2e_get_pfn(*l2e));
- unmap_domain_page(l2e);
-
- l1e = map_domain_page(mfn_x(mfn));
- l1e += l1_table_offset(addr);
- if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
- {
- unmap_domain_page(l1e);
- return _mfn(INVALID_MFN);
- }
- mfn = _mfn(l1e_get_pfn(*l1e));
- *t = p2m_flags_to_type(l1e_get_flags(*l1e));
- unmap_domain_page(l1e);
-
- ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
- return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
+ xfree(d->arch.p2m);
+ d->arch.p2m = NULL;
}
#if P2M_AUDIT
@@ -564,7 +646,7 @@ static void audit_p2m(struct domain *d)
set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
}
- if ( test_linear && (gfn <= d->arch.p2m.max_mapped_pfn) )
+ if ( test_linear && (gfn <= d->arch.p2m->max_mapped_pfn) )
{
lp2mfn = mfn_x(gfn_to_mfn_current(gfn, &type));
if ( lp2mfn != mfn_x(p2mfn) )
@@ -695,11 +777,11 @@ void
guest_physmap_remove_page(struct domain *d, unsigned long gfn,
unsigned long mfn)
{
- p2m_lock(d);
+ p2m_lock(d->arch.p2m);
audit_p2m(d);
p2m_remove_page(d, gfn, mfn);
audit_p2m(d);
- p2m_unlock(d);
+ p2m_unlock(d->arch.p2m);
}
int
@@ -722,7 +804,7 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn,
*/
if ( paging_mode_hap(d) && (gfn > 0xfffffUL) )
{
- if ( !test_and_set_bool(d->arch.hvm_domain.amd_npt_4gb_warning) )
+ if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
" 4GB: specify 'hap=0' domain config option.\n",
d->domain_id);
@@ -730,7 +812,7 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn,
}
#endif
- p2m_lock(d);
+ p2m_lock(d->arch.p2m);
audit_p2m(d);
P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
@@ -781,7 +863,7 @@ guest_physmap_add_entry(struct domain *d, unsigned long gfn,
}
audit_p2m(d);
- p2m_unlock(d);
+ p2m_unlock(d->arch.p2m);
return rc;
}
@@ -812,7 +894,7 @@ void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
return;
- p2m_lock(d);
+ ASSERT(p2m_locked_by_me(d->arch.p2m));
#if CONFIG_PAGING_LEVELS == 4
l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
@@ -860,7 +942,7 @@ void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
mfn = l1e_get_pfn(l1e[i1]);
gfn = get_gpfn_from_mfn(mfn);
/* create a new 1le entry with the new type */
- flags = p2m_flags_to_type(nt);
+ flags = p2m_type_to_flags(nt);
l1e_content = l1e_from_pfn(mfn, flags);
paging_write_p2m_entry(d, gfn, &l1e[i1],
l1mfn, l1e_content, 1);
@@ -884,7 +966,6 @@ void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
unmap_domain_page(l2e);
#endif
- p2m_unlock(d);
}
/* Modify the p2m type of a single gfn from ot to nt, returning the
@@ -895,13 +976,13 @@ p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
p2m_type_t pt;
mfn_t mfn;
- p2m_lock(d);
+ p2m_lock(d->arch.p2m);
mfn = gfn_to_mfn(d, gfn, &pt);
if ( pt == ot )
set_p2m_entry(d, gfn, mfn, nt);
- p2m_unlock(d);
+ p2m_unlock(d->arch.p2m);
return pt;
}
diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
index e6c3cbb9e6..2247d8dd68 100644
--- a/xen/arch/x86/mm/paging.c
+++ b/xen/arch/x86/mm/paging.c
@@ -26,6 +26,7 @@
#include <asm/p2m.h>
#include <asm/hap.h>
#include <asm/guest_access.h>
+#include <xen/numa.h>
#include <xsm/xsm.h>
#define hap_enabled(d) (is_hvm_domain(d) && (d)->arch.hvm_domain.hap_enabled)
@@ -99,8 +100,9 @@
static mfn_t paging_new_log_dirty_page(struct domain *d, void **mapping_p)
{
mfn_t mfn;
- struct page_info *page = alloc_domheap_page(NULL);
+ struct page_info *page;
+ page = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
if ( unlikely(page == NULL) )
{
d->arch.paging.log_dirty.failed_allocs++;
@@ -482,9 +484,12 @@ void paging_log_dirty_teardown(struct domain*d)
/* CODE FOR PAGING SUPPORT */
/************************************************/
/* Domain paging struct initialization. */
-void paging_domain_init(struct domain *d)
+int paging_domain_init(struct domain *d)
{
- p2m_init(d);
+ int rc;
+
+ if ( (rc = p2m_init(d)) != 0 )
+ return rc;
/* The order of the *_init calls below is important, as the later
* ones may rewrite some common fields. Shadow pagetables are the
@@ -494,6 +499,8 @@ void paging_domain_init(struct domain *d)
/* ... but we will use hardware assistance if it's available. */
if ( hap_enabled(d) )
hap_domain_init(d);
+
+ return 0;
}
/* vcpu paging struct initialization goes here */
@@ -587,6 +594,8 @@ void paging_final_teardown(struct domain *d)
hap_final_teardown(d);
else
shadow_final_teardown(d);
+
+ p2m_final_teardown(d);
}
/* Enable an arbitrary paging-assistance mode. Call once at domain
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index e4a04bb456..d7239cde77 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -36,6 +36,7 @@
#include <asm/current.h>
#include <asm/flushtlb.h>
#include <asm/shadow.h>
+#include <xen/numa.h>
#include "private.h"
@@ -1249,7 +1250,7 @@ static unsigned int sh_set_allocation(struct domain *d,
{
/* Need to allocate more memory from domheap */
sp = (struct shadow_page_info *)
- alloc_domheap_pages(NULL, order, 0);
+ alloc_domheap_pages(NULL, order, MEMF_node(domain_to_node(d)));
if ( sp == NULL )
{
SHADOW_PRINTK("failed to allocate shadow pages.\n");
@@ -2171,13 +2172,12 @@ void sh_remove_shadows(struct vcpu *v, mfn_t gmfn, int fast, int all)
#undef DO_UNSHADOW
/* If that didn't catch the shadows, something is wrong */
- if ( !fast && (pg->count_info & PGC_page_table) )
+ if ( !fast && all && (pg->count_info & PGC_page_table) )
{
SHADOW_ERROR("can't find all shadows of mfn %05lx "
"(shadow_flags=%08lx)\n",
mfn_x(gmfn), pg->shadow_flags);
- if ( all )
- domain_crash(v->domain);
+ domain_crash(v->domain);
}
/* Need to flush TLBs now, so that linear maps are safe next time we
diff --git a/xen/arch/x86/pci.c b/xen/arch/x86/pci.c
new file mode 100644
index 0000000000..341457b4bc
--- /dev/null
+++ b/xen/arch/x86/pci.c
@@ -0,0 +1,118 @@
+/******************************************************************************
+ * pci.c
+ *
+ * PCI access functions.
+ */
+
+#include <xen/config.h>
+#include <xen/pci.h>
+#include <xen/spinlock.h>
+#include <asm/io.h>
+
+#define PCI_CONF_ADDRESS(bus, dev, func, reg) \
+ (0x80000000 | (bus << 16) | (dev << 11) | (func << 8) | (reg & ~3))
+
+static DEFINE_SPINLOCK(pci_config_lock);
+
+uint32_t pci_conf_read(uint32_t cf8, uint8_t offset, uint8_t bytes)
+{
+ unsigned long flags;
+ uint32_t value;
+
+ BUG_ON((offset + bytes) > 4);
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outl(cf8, 0xcf8);
+
+ switch ( bytes )
+ {
+ case 1:
+ value = inb(0xcfc + offset);
+ break;
+ case 2:
+ value = inw(0xcfc + offset);
+ break;
+ case 4:
+ value = inl(0xcfc + offset);
+ break;
+ default:
+ value = 0;
+ BUG();
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ return value;
+}
+
+void pci_conf_write(uint32_t cf8, uint8_t offset, uint8_t bytes, uint32_t data)
+{
+ unsigned long flags;
+
+ BUG_ON((offset + bytes) > 4);
+
+ spin_lock_irqsave(&pci_config_lock, flags);
+
+ outl(cf8, 0xcf8);
+
+ switch ( bytes )
+ {
+ case 1:
+ outb((uint8_t)data, 0xcfc + offset);
+ break;
+ case 2:
+ outw((uint16_t)data, 0xcfc + offset);
+ break;
+ case 4:
+ outl(data, 0xcfc + offset);
+ break;
+ }
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+}
+
+uint8_t pci_conf_read8(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg)
+{
+ BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255));
+ return pci_conf_read(PCI_CONF_ADDRESS(bus, dev, func, reg), reg & 3, 1);
+}
+
+uint16_t pci_conf_read16(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg)
+{
+ BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255));
+ return pci_conf_read(PCI_CONF_ADDRESS(bus, dev, func, reg), reg & 2, 2);
+}
+
+uint32_t pci_conf_read32(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg)
+{
+ BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255));
+ return pci_conf_read(PCI_CONF_ADDRESS(bus, dev, func, reg), 0, 4);
+}
+
+void pci_conf_write8(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg,
+ uint8_t data)
+{
+ BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255));
+ pci_conf_write(PCI_CONF_ADDRESS(bus, dev, func, reg), reg & 3, 1, data);
+}
+
+void pci_conf_write16(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg,
+ uint16_t data)
+{
+ BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255));
+ pci_conf_write(PCI_CONF_ADDRESS(bus, dev, func, reg), reg & 2, 2, data);
+}
+
+void pci_conf_write32(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg,
+ uint32_t data)
+{
+ BUG_ON((bus > 255) || (dev > 31) || (func > 7) || (reg > 255));
+ pci_conf_write(PCI_CONF_ADDRESS(bus, dev, func, reg), 0, 4, data);
+}
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 383a868225..9b025b51b1 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -861,6 +861,8 @@ void __init __start_xen(unsigned long mbi_p)
early_boot = 0;
+ softirq_init();
+
early_cpu_init();
paging_init();
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index 57135940bf..ccefc50cf2 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -40,7 +40,7 @@ string_param("clocksource", opt_clocksource);
unsigned long cpu_khz; /* CPU clock frequency in kHz. */
unsigned long hpet_address;
DEFINE_SPINLOCK(rtc_lock);
-volatile unsigned long jiffies;
+unsigned long pit0_ticks;
static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
static DEFINE_SPINLOCK(wc_lock);
@@ -67,19 +67,16 @@ struct platform_timesource {
static DEFINE_PER_CPU(struct cpu_time, cpu_time);
/*
- * Protected by platform_timer_lock, which must be acquired with interrupts
- * disabled because plt_overflow() is called from PIT ch0 interrupt context.
- */
-static s_time_t stime_platform_stamp;
-static u64 platform_timer_stamp;
-static DEFINE_SPINLOCK(platform_timer_lock);
-
-/*
- * Folding platform timer into 64-bit software counter is a really critical
- * operation! We therefore do it directly in PIT ch0 interrupt handler.
+ * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
+ * Otherwise overflow happens too quickly (~50ms) for us to guarantee that
+ * softirq handling will happen in time.
+ *
+ * The pit_lock protects the 16- and 32-bit stamp fields as well as the
*/
-static u32 plt_overflow_jiffies;
-static void plt_overflow(void);
+static DEFINE_SPINLOCK(pit_lock);
+static u16 pit_stamp16;
+static u32 pit_stamp32;
+static int using_pit;
/*
* 32-bit division of integer dividend and integer divisor yielding
@@ -146,22 +143,36 @@ static inline u64 scale_delta(u64 delta, struct time_scale *scale)
return product;
}
-void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
+static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
{
ASSERT(local_irq_is_enabled());
- /* Update jiffies counter. */
- (*(volatile unsigned long *)&jiffies)++;
+ /* Only for start-of-day interruopt tests in io_apic.c. */
+ (*(volatile unsigned long *)&pit0_ticks)++;
/* Rough hack to allow accurate timers to sort-of-work with no APIC. */
if ( !cpu_has_apic )
raise_softirq(TIMER_SOFTIRQ);
- if ( --plt_overflow_jiffies == 0 )
- plt_overflow();
+ /* Emulate a 32-bit PIT counter. */
+ if ( using_pit )
+ {
+ u16 count;
+
+ spin_lock_irq(&pit_lock);
+
+ outb(0x80, PIT_MODE);
+ count = inb(PIT_CH2);
+ count |= inb(PIT_CH2) << 8;
+
+ pit_stamp32 += (u16)(pit_stamp16 - count);
+ pit_stamp16 = count;
+
+ spin_unlock_irq(&pit_lock);
+ }
}
-static struct irqaction irq0 = { timer_interrupt, "timer", NULL};
+static struct irqaction irq0 = { timer_interrupt, "timer", NULL };
/* ------ Calibrate the TSC -------
* Return processor ticks per second / CALIBRATE_FRAC.
@@ -295,12 +306,21 @@ static char *freq_string(u64 freq)
static u32 read_pit_count(void)
{
- u16 count;
- ASSERT(spin_is_locked(&platform_timer_lock));
+ u16 count16;
+ u32 count32;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pit_lock, flags);
+
outb(0x80, PIT_MODE);
- count = inb(PIT_CH2);
- count |= inb(PIT_CH2) << 8;
- return ~count;
+ count16 = inb(PIT_CH2);
+ count16 |= inb(PIT_CH2) << 8;
+
+ count32 = pit_stamp32 + (u16)(pit_stamp16 - count16);
+
+ spin_unlock_irqrestore(&pit_lock, flags);
+
+ return count32;
}
static void init_pit(struct platform_timesource *pts)
@@ -308,7 +328,8 @@ static void init_pit(struct platform_timesource *pts)
pts->name = "PIT";
pts->frequency = CLOCK_TICK_RATE;
pts->read_counter = read_pit_count;
- pts->counter_bits = 16;
+ pts->counter_bits = 32;
+ using_pit = 1;
}
/************************************************************
@@ -466,24 +487,28 @@ static int init_pmtimer(struct platform_timesource *pts)
static struct platform_timesource plt_src; /* details of chosen timesource */
static u32 plt_mask; /* hardware-width mask */
-static u32 plt_overflow_period; /* jiffies between calls to plt_overflow() */
+static u64 plt_overflow_period; /* ns between calls to plt_overflow() */
static struct time_scale plt_scale; /* scale: platform counter -> nanosecs */
/* Protected by platform_timer_lock. */
-static u64 plt_count64; /* 64-bit platform counter stamp */
-static u32 plt_count; /* hardware-width platform counter stamp */
+static DEFINE_SPINLOCK(platform_timer_lock);
+static s_time_t stime_platform_stamp; /* System time at below platform time */
+static u64 platform_timer_stamp; /* Platform time at above system time */
+static u64 plt_stamp64; /* 64-bit platform counter stamp */
+static u32 plt_stamp; /* hardware-width platform counter stamp */
+static struct timer plt_overflow_timer;
-static void plt_overflow(void)
+static void plt_overflow(void *unused)
{
u32 count;
- unsigned long flags;
- spin_lock_irqsave(&platform_timer_lock, flags);
+ spin_lock(&platform_timer_lock);
count = plt_src.read_counter();
- plt_count64 += (count - plt_count) & plt_mask;
- plt_count = count;
- plt_overflow_jiffies = plt_overflow_period;
- spin_unlock_irqrestore(&platform_timer_lock, flags);
+ plt_stamp64 += (count - plt_stamp) & plt_mask;
+ plt_stamp = count;
+ spin_unlock(&platform_timer_lock);
+
+ set_timer(&plt_overflow_timer, NOW() + plt_overflow_period);
}
static s_time_t __read_platform_stime(u64 platform_time)
@@ -497,12 +522,11 @@ static s_time_t read_platform_stime(void)
{
u64 count;
s_time_t stime;
- unsigned long flags;
- spin_lock_irqsave(&platform_timer_lock, flags);
- count = plt_count64 + ((plt_src.read_counter() - plt_count) & plt_mask);
+ spin_lock(&platform_timer_lock);
+ count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
stime = __read_platform_stime(count);
- spin_unlock_irqrestore(&platform_timer_lock, flags);
+ spin_unlock(&platform_timer_lock);
return stime;
}
@@ -511,27 +535,25 @@ static void platform_time_calibration(void)
{
u64 count;
s_time_t stamp;
- unsigned long flags;
- spin_lock_irqsave(&platform_timer_lock, flags);
- count = plt_count64 + ((plt_src.read_counter() - plt_count) & plt_mask);
+ spin_lock(&platform_timer_lock);
+ count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
stamp = __read_platform_stime(count);
stime_platform_stamp = stamp;
platform_timer_stamp = count;
- spin_unlock_irqrestore(&platform_timer_lock, flags);
+ spin_unlock(&platform_timer_lock);
}
static void resume_platform_timer(void)
{
/* No change in platform_stime across suspend/resume. */
- platform_timer_stamp = plt_count64;
- plt_count = plt_src.read_counter();
+ platform_timer_stamp = plt_stamp64;
+ plt_stamp = plt_src.read_counter();
}
static void init_platform_timer(void)
{
struct platform_timesource *pts = &plt_src;
- u64 overflow_period;
int rc = -1;
if ( opt_clocksource[0] != '\0' )
@@ -561,13 +583,12 @@ static void init_platform_timer(void)
set_time_scale(&plt_scale, pts->frequency);
- overflow_period = scale_delta(1ull << (pts->counter_bits-1), &plt_scale);
- do_div(overflow_period, MILLISECS(1000/HZ));
- plt_overflow_period = overflow_period;
- plt_overflow();
- printk("Platform timer overflows in %d jiffies.\n", plt_overflow_period);
+ plt_overflow_period = scale_delta(
+ 1ull << (pts->counter_bits-1), &plt_scale);
+ init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
+ plt_overflow(NULL);
- platform_timer_stamp = plt_count64;
+ platform_timer_stamp = plt_stamp64;
printk("Platform timer is %s %s\n",
freq_string(pts->frequency), pts->name);
@@ -969,6 +990,19 @@ void __init early_time_init(void)
setup_irq(0, &irq0);
}
+static int __init disable_pit_irq(void)
+{
+ if ( !using_pit && cpu_has_apic )
+ {
+ /* Disable PIT CH0 timer interrupt. */
+ outb_p(0x30, PIT_MODE);
+ outb_p(0, PIT_CH0);
+ outb_p(0, PIT_CH0);
+ }
+ return 0;
+}
+__initcall(disable_pit_irq);
+
void send_timer_event(struct vcpu *v)
{
send_guest_vcpu_virq(v, VIRQ_TIMER);
@@ -1002,6 +1036,8 @@ int time_resume(void)
{
u64 tmp = init_pit_and_calibrate_tsc();
+ disable_pit_irq();
+
set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);
resume_platform_timer();
@@ -1019,7 +1055,7 @@ int time_resume(void)
int dom0_pit_access(struct ioreq *ioreq)
{
/* Is Xen using Channel 2? Then disallow direct dom0 access. */
- if ( plt_src.read_counter == read_pit_count )
+ if ( using_pit )
return 0;
switch ( ioreq->addr )
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 019e3e56cf..5e39c9b417 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1305,23 +1305,24 @@ static int read_gate_descriptor(unsigned int gate_sel,
const struct desc_struct *pdesc;
- pdesc = (const struct desc_struct *)(!(gate_sel & 4) ?
- GDT_VIRT_START(v) :
- LDT_VIRT_START(v))
- + (gate_sel >> 3);
- if ( gate_sel < 4 ||
- (gate_sel >= FIRST_RESERVED_GDT_BYTE && !(gate_sel & 4)) ||
+ pdesc = (const struct desc_struct *)
+ (!(gate_sel & 4) ? GDT_VIRT_START(v) : LDT_VIRT_START(v))
+ + (gate_sel >> 3);
+ if ( (gate_sel < 4) ||
+ ((gate_sel >= FIRST_RESERVED_GDT_BYTE) && !(gate_sel & 4)) ||
__get_user(desc, pdesc) )
return 0;
*sel = (desc.a >> 16) & 0x0000fffc;
*off = (desc.a & 0x0000ffff) | (desc.b & 0xffff0000);
*ar = desc.b & 0x0000ffff;
+
/*
* check_descriptor() clears the DPL field and stores the
* guest requested DPL in the selector's RPL field.
*/
- ASSERT(!(*ar & _SEGMENT_DPL));
+ if ( *ar & _SEGMENT_DPL )
+ return 0;
*ar |= (desc.a >> (16 - 13)) & _SEGMENT_DPL;
if ( !is_pv_32bit_vcpu(v) )
@@ -1352,7 +1353,7 @@ static int read_gate_descriptor(unsigned int gate_sel,
#endif
/* Has the guest requested sufficient permission for this I/O access? */
-static inline int guest_io_okay(
+static int guest_io_okay(
unsigned int port, unsigned int bytes,
struct vcpu *v, struct cpu_user_regs *regs)
{
@@ -1394,19 +1395,130 @@ static inline int guest_io_okay(
}
/* Has the administrator granted sufficient permission for this I/O access? */
-static inline int admin_io_okay(
+static int admin_io_okay(
unsigned int port, unsigned int bytes,
struct vcpu *v, struct cpu_user_regs *regs)
{
return ioports_access_permitted(v->domain, port, port + bytes - 1);
}
-#define guest_inb_okay(_p, _d, _r) admin_io_okay(_p, 1, _d, _r)
-#define guest_inw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r)
-#define guest_inl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r)
-#define guest_outb_okay(_p, _d, _r) admin_io_okay(_p, 1, _d, _r)
-#define guest_outw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r)
-#define guest_outl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r)
+static uint32_t guest_io_read(
+ unsigned int port, unsigned int bytes,
+ struct vcpu *v, struct cpu_user_regs *regs)
+{
+ extern uint32_t pci_conf_read(
+ uint32_t cf8, uint8_t offset, uint8_t bytes);
+
+ uint32_t data = 0;
+ unsigned int shift = 0;
+
+ if ( admin_io_okay(port, bytes, v, regs) )
+ {
+ switch ( bytes )
+ {
+ case 1: return inb(port);
+ case 2: return inw(port);
+ case 4: return inl(port);
+ }
+ }
+
+ while ( bytes != 0 )
+ {
+ unsigned int size = 1;
+ uint32_t sub_data = 0xff;
+
+ if ( (port == 0x42) || (port == 0x43) || (port == 0x61) )
+ {
+ sub_data = pv_pit_handler(port, 0, 0);
+ }
+ else if ( (port & 0xfffc) == 0xcf8 )
+ {
+ size = min(bytes, 4 - (port & 3));
+ sub_data = v->domain->arch.pci_cf8 >> ((port & 3) * 8);
+ }
+ else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) )
+ {
+ size = min(bytes, 4 - (port & 3));
+ if ( size == 3 )
+ size = 2;
+ sub_data = pci_conf_read(v->domain->arch.pci_cf8, port & 3, size);
+ }
+
+ if ( size == 4 )
+ return sub_data;
+
+ data |= (sub_data & ((1u << (size * 8)) - 1)) << shift;
+ shift += size * 8;
+ port += size;
+ bytes -= size;
+ }
+
+ return data;
+}
+
+static void guest_io_write(
+ unsigned int port, unsigned int bytes, uint32_t data,
+ struct vcpu *v, struct cpu_user_regs *regs)
+{
+ extern void pci_conf_write(
+ uint32_t cf8, uint8_t offset, uint8_t bytes, uint32_t data);
+
+ if ( admin_io_okay(port, bytes, v, regs) )
+ {
+ switch ( bytes ) {
+ case 1:
+ outb((uint8_t)data, port);
+ if ( pv_post_outb_hook )
+ pv_post_outb_hook(port, (uint8_t)data);
+ break;
+ case 2:
+ outw((uint16_t)data, port);
+ break;
+ case 4:
+ outl(data, port);
+ break;
+ }
+ return;
+ }
+
+ while ( bytes != 0 )
+ {
+ unsigned int size = 1;
+
+ if ( (port == 0x42) || (port == 0x43) || (port == 0x61) )
+ {
+ pv_pit_handler(port, (uint8_t)data, 1);
+ }
+ else if ( (port & 0xfffc) == 0xcf8 )
+ {
+ size = min(bytes, 4 - (port & 3));
+ if ( size == 4 )
+ {
+ v->domain->arch.pci_cf8 = data;
+ }
+ else
+ {
+ uint32_t mask = ((1u << (size * 8)) - 1) << ((port & 3) * 8);
+ v->domain->arch.pci_cf8 &= ~mask;
+ v->domain->arch.pci_cf8 |= (data << ((port & 3) * 8)) & mask;
+ }
+ }
+ else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) )
+ {
+ size = min(bytes, 4 - (port & 3));
+ if ( size == 3 )
+ size = 2;
+ pci_conf_write(v->domain->arch.pci_cf8, port & 3, size, data);
+ }
+
+ if ( size == 4 )
+ return;
+
+ port += size;
+ bytes -= size;
+ data >>= size * 8;
+ }
+}
/* I/O emulation support. Helper routines for, and type of, the stack stub.*/
void host_to_guest_gpr_switch(struct cpu_user_regs *)
@@ -1525,7 +1637,7 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
/* REX prefix. */
if ( rex & 8 ) /* REX.W */
- op_bytes = 4; /* emulating only opcodes not supporting 64-bit operands */
+ op_bytes = 4; /* emulate only opcodes not supporting 64-bit operands */
modrm_reg = (rex & 4) << 1; /* REX.R */
/* REX.X does not need to be decoded. */
modrm_rm = (rex & 1) << 3; /* REX.B */
@@ -1554,7 +1666,8 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
{
if ( !read_descriptor(data_sel, v, regs,
&data_base, &data_limit, &ar,
- _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P) )
+ _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|
+ _SEGMENT_P) )
goto fail;
if ( !(ar & _SEGMENT_S) ||
!(ar & _SEGMENT_P) ||
@@ -1601,69 +1714,39 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
case 0x6c: /* INSB */
op_bytes = 1;
case 0x6d: /* INSW/INSL */
- if ( data_limit < op_bytes - 1 ||
- rd_ad(edi) > data_limit - (op_bytes - 1) ||
+ if ( (data_limit < (op_bytes - 1)) ||
+ (rd_ad(edi) > (data_limit - (op_bytes - 1))) ||
!guest_io_okay(port, op_bytes, v, regs) )
goto fail;
- switch ( op_bytes )
- {
- case 1:
- /* emulate PIT counter 2 */
- data = (u8)(guest_inb_okay(port, v, regs) ? inb(port) :
- ((port == 0x42 || port == 0x43 || port == 0x61) ?
- pv_pit_handler(port, 0, 0) : ~0));
- break;
- case 2:
- data = (u16)(guest_inw_okay(port, v, regs) ? inw(port) : ~0);
- break;
- case 4:
- data = (u32)(guest_inl_okay(port, v, regs) ? inl(port) : ~0);
- break;
- }
- if ( (rc = copy_to_user((void *)data_base + rd_ad(edi), &data, op_bytes)) != 0 )
+ data = guest_io_read(port, op_bytes, v, regs);
+ if ( (rc = copy_to_user((void *)data_base + rd_ad(edi),
+ &data, op_bytes)) != 0 )
{
propagate_page_fault(data_base + rd_ad(edi) + op_bytes - rc,
PFEC_write_access);
return EXCRET_fault_fixed;
}
- wr_ad(edi, regs->edi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes));
+ wr_ad(edi, regs->edi + (int)((regs->eflags & EF_DF)
+ ? -op_bytes : op_bytes));
break;
case 0x6e: /* OUTSB */
op_bytes = 1;
case 0x6f: /* OUTSW/OUTSL */
- if ( data_limit < op_bytes - 1 ||
- rd_ad(esi) > data_limit - (op_bytes - 1) ||
- !guest_io_okay(port, op_bytes, v, regs) )
+ if ( (data_limit < (op_bytes - 1)) ||
+ (rd_ad(esi) > (data_limit - (op_bytes - 1))) ||
+ !guest_io_okay(port, op_bytes, v, regs) )
goto fail;
- rc = copy_from_user(&data, (void *)data_base + rd_ad(esi), op_bytes);
- if ( rc != 0 )
+ if ( (rc = copy_from_user(&data, (void *)data_base + rd_ad(esi),
+ op_bytes)) != 0 )
{
- propagate_page_fault(data_base + rd_ad(esi) + op_bytes - rc, 0);
+ propagate_page_fault(data_base + rd_ad(esi)
+ + op_bytes - rc, 0);
return EXCRET_fault_fixed;
}
- switch ( op_bytes )
- {
- case 1:
- if ( guest_outb_okay(port, v, regs) )
- {
- outb((u8)data, port);
- if ( pv_post_outb_hook )
- pv_post_outb_hook(port, data);
- }
- else if ( port == 0x42 || port == 0x43 || port == 0x61 )
- pv_pit_handler(port, data, 1);
- break;
- case 2:
- if ( guest_outw_okay(port, v, regs) )
- outw((u16)data, port);
- break;
- case 4:
- if ( guest_outl_okay(port, v, regs) )
- outl((u32)data, port);
- break;
- }
- wr_ad(esi, regs->esi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes));
+ guest_io_write(port, op_bytes, data, v, regs);
+ wr_ad(esi, regs->esi + (int)((regs->eflags & EF_DF)
+ ? -op_bytes : op_bytes));
break;
}
@@ -1727,31 +1810,17 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
exec_in:
if ( !guest_io_okay(port, op_bytes, v, regs) )
goto fail;
- switch ( op_bytes )
+ if ( admin_io_okay(port, op_bytes, v, regs) )
{
- case 1:
- if ( guest_inb_okay(port, v, regs) )
- io_emul(regs);
- else if ( port == 0x42 || port == 0x43 || port == 0x61 )
- {
- regs->eax &= ~0xffUL;
- regs->eax |= pv_pit_handler(port, 0, 0);
- }
- else
- regs->eax |= (u8)~0;
- break;
- case 2:
- if ( guest_inw_okay(port, v, regs) )
- io_emul(regs);
- else
- regs->eax |= (u16)~0;
- break;
- case 4:
- if ( guest_inl_okay(port, v, regs) )
- io_emul(regs);
+ io_emul(regs);
+ }
+ else
+ {
+ if ( op_bytes == 4 )
+ regs->eax = 0;
else
- regs->eax = (u32)~0;
- break;
+ regs->eax &= ~((1u << (op_bytes * 8)) - 1);
+ regs->eax |= guest_io_read(port, op_bytes, v, regs);
}
bpmatch = check_guest_io_breakpoint(v, port, op_bytes);
goto done;
@@ -1770,26 +1839,15 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
exec_out:
if ( !guest_io_okay(port, op_bytes, v, regs) )
goto fail;
- switch ( op_bytes )
+ if ( admin_io_okay(port, op_bytes, v, regs) )
{
- case 1:
- if ( guest_outb_okay(port, v, regs) )
- {
- io_emul(regs);
- if ( pv_post_outb_hook )
- pv_post_outb_hook(port, regs->eax);
- }
- else if ( port == 0x42 || port == 0x43 || port == 0x61 )
- pv_pit_handler(port, regs->eax, 1);
- break;
- case 2:
- if ( guest_outw_okay(port, v, regs) )
- io_emul(regs);
- break;
- case 4:
- if ( guest_outl_okay(port, v, regs) )
- io_emul(regs);
- break;
+ io_emul(regs);
+ if ( (op_bytes == 1) && pv_post_outb_hook )
+ pv_post_outb_hook(port, regs->eax);
+ }
+ else
+ {
+ guest_io_write(port, op_bytes, regs->eax, v, regs);
}
bpmatch = check_guest_io_breakpoint(v, port, op_bytes);
goto done;
@@ -1921,14 +1979,14 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
break;
case 3: /* Write CR3 */
- LOCK_BIGLOCK(v->domain);
+ domain_lock(v->domain);
if ( !is_pv_32on64_vcpu(v) )
rc = new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg)));
#ifdef CONFIG_COMPAT
else
rc = new_guest_cr3(gmfn_to_mfn(v->domain, compat_cr3_to_pfn(*reg)));
#endif
- UNLOCK_BIGLOCK(v->domain);
+ domain_unlock(v->domain);
if ( rc == 0 ) /* not okay */
goto fail;
break;
@@ -2137,8 +2195,8 @@ static void emulate_gate_op(struct cpu_user_regs *regs)
/* Check whether this fault is due to the use of a call gate. */
if ( !read_gate_descriptor(regs->error_code, v, &sel, &off, &ar) ||
- ((ar >> 13) & 3) < (regs->cs & 3) ||
- (ar & _SEGMENT_TYPE) != 0xc00 )
+ (((ar >> 13) & 3) < (regs->cs & 3)) ||
+ ((ar & _SEGMENT_TYPE) != 0xc00) )
{
do_guest_trap(TRAP_gp_fault, regs, 1);
return;
@@ -2232,15 +2290,18 @@ static void emulate_gate_op(struct cpu_user_regs *regs)
{
if ( (modrm & 7) == 4 )
{
- unsigned int sib = insn_fetch(u8, base, eip, limit);
+ unsigned int sib;
+ sib = insn_fetch(u8, base, eip, limit);
modrm = (modrm & ~7) | (sib & 7);
if ( (sib >>= 3) != 4 )
- opnd_off = *(unsigned long *)decode_register(sib & 7, regs, 0);
+ opnd_off = *(unsigned long *)
+ decode_register(sib & 7, regs, 0);
opnd_off <<= sib >> 3;
}
if ( (modrm & 7) != 5 || (modrm & 0xc0) )
- opnd_off += *(unsigned long *)decode_register(modrm & 7, regs, 0);
+ opnd_off += *(unsigned long *)
+ decode_register(modrm & 7, regs, 0);
else
modrm |= 0x87;
if ( !opnd_sel )
@@ -2576,12 +2637,14 @@ asmlinkage void do_general_protection(struct cpu_user_regs *regs)
panic("GENERAL PROTECTION FAULT\n[error_code=%04x]\n", regs->error_code);
}
-static void nmi_softirq(void)
+static void nmi_action(unsigned long unused)
{
/* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
vcpu_kick(dom0->vcpu[0]);
}
+static DECLARE_TASKLET(nmi_tasklet, nmi_action, 0);
+
static void nmi_dom0_report(unsigned int reason_idx)
{
struct domain *d;
@@ -2593,7 +2656,7 @@ static void nmi_dom0_report(unsigned int reason_idx)
set_bit(reason_idx, nmi_reason(d));
if ( !test_and_set_bool(v->nmi_pending) )
- raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */
+ tasklet_schedule(&nmi_tasklet); /* not safe to wake a vcpu here */
}
asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
@@ -2871,8 +2934,6 @@ void __init trap_init(void)
percpu_traps_init();
cpu_init();
-
- open_softirq(NMI_SOFTIRQ, nmi_softirq);
}
long register_guest_nmi_callback(unsigned long address)
diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c
index 256f7a5ac8..a1de1bab27 100644
--- a/xen/arch/x86/x86_64/compat/mm.c
+++ b/xen/arch/x86/x86_64/compat/mm.c
@@ -28,12 +28,12 @@ int compat_set_gdt(XEN_GUEST_HANDLE(uint) frame_list, unsigned int entries)
guest_handle_add_offset(frame_list, 1);
}
- LOCK_BIGLOCK(current->domain);
+ domain_lock(current->domain);
if ( (ret = set_gdt(current, frames, entries)) == 0 )
flush_tlb_local();
- UNLOCK_BIGLOCK(current->domain);
+ domain_unlock(current->domain);
return ret;
}
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index f9f33e0a88..3d79657989 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -59,7 +59,7 @@ void *alloc_xen_pagetable(void)
if ( !early_boot )
{
- struct page_info *pg = alloc_domheap_page(NULL);
+ struct page_info *pg = alloc_domheap_page(NULL, 0);
BUG_ON(pg == NULL);
return page_to_virt(pg);
}
@@ -108,7 +108,7 @@ void __init paging_init(void)
struct page_info *l1_pg, *l2_pg, *l3_pg;
/* Create user-accessible L2 directory to map the MPT for guests. */
- if ( (l3_pg = alloc_domheap_page(NULL)) == NULL )
+ if ( (l3_pg = alloc_domheap_page(NULL, 0)) == NULL )
goto nomem;
l3_ro_mpt = page_to_virt(l3_pg);
clear_page(l3_ro_mpt);
@@ -134,7 +134,7 @@ void __init paging_init(void)
1UL << L2_PAGETABLE_SHIFT);
if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
{
- if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
+ if ( (l2_pg = alloc_domheap_page(NULL, 0)) == NULL )
goto nomem;
va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
l2_ro_mpt = page_to_virt(l2_pg);
@@ -154,7 +154,7 @@ void __init paging_init(void)
l4_table_offset(HIRO_COMPAT_MPT_VIRT_START));
l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(
HIRO_COMPAT_MPT_VIRT_START)]);
- if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
+ if ( (l2_pg = alloc_domheap_page(NULL, 0)) == NULL )
goto nomem;
compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg);
clear_page(l2_ro_mpt);
diff --git a/xen/common/compat/domain.c b/xen/common/compat/domain.c
index 002a8a8362..9e58cb145c 100644
--- a/xen/common/compat/domain.c
+++ b/xen/common/compat/domain.c
@@ -42,11 +42,11 @@ int compat_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
break;
}
- LOCK_BIGLOCK(d);
+ domain_lock(d);
rc = -EEXIST;
if ( !v->is_initialised )
rc = boot_vcpu(d, vcpuid, cmp_ctxt);
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
xfree(cmp_ctxt);
break;
diff --git a/xen/common/compat/grant_table.c b/xen/common/compat/grant_table.c
index 882d435a65..8781a331cf 100644
--- a/xen/common/compat/grant_table.c
+++ b/xen/common/compat/grant_table.c
@@ -109,12 +109,24 @@ int compat_grant_table_op(unsigned int cmd,
rc = -EFAULT;
else
{
- BUG_ON((COMPAT_ARG_XLAT_SIZE - sizeof(*nat.setup)) / sizeof(*nat.setup->frame_list.p) < max_nr_grant_frames);
+ unsigned int max_frame_list_size_in_page =
+ (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.setup)) /
+ sizeof(*nat.setup->frame_list.p);
+ if ( max_frame_list_size_in_page < max_nr_grant_frames )
+ {
+ gdprintk(XENLOG_WARNING,
+ "max_nr_grant_frames is too large (%u,%u)\n",
+ max_nr_grant_frames, max_frame_list_size_in_page);
+ rc = -EINVAL;
+ }
+ else
+ {
#define XLAT_gnttab_setup_table_HNDL_frame_list(_d_, _s_) \
- set_xen_guest_handle((_d_)->frame_list, (unsigned long *)(nat.setup + 1))
- XLAT_gnttab_setup_table(nat.setup, &cmp.setup);
+ set_xen_guest_handle((_d_)->frame_list, (unsigned long *)(nat.setup + 1))
+ XLAT_gnttab_setup_table(nat.setup, &cmp.setup);
#undef XLAT_gnttab_setup_table_HNDL_frame_list
- rc = gnttab_setup_table(guest_handle_cast(nat.uop, gnttab_setup_table_t), 1);
+ rc = gnttab_setup_table(guest_handle_cast(nat.uop, gnttab_setup_table_t), 1);
+ }
}
if ( rc == 0 )
{
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 76b48f4296..c74fb07c0e 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -80,7 +80,7 @@ struct domain *alloc_domain(domid_t domid)
}
atomic_set(&d->refcnt, 1);
- spin_lock_init(&d->big_lock);
+ spin_lock_init(&d->domain_lock);
spin_lock_init(&d->page_alloc_lock);
spin_lock_init(&d->shutdown_lock);
spin_lock_init(&d->hypercall_deadlock_mutex);
@@ -629,7 +629,7 @@ int vcpu_reset(struct vcpu *v)
int rc;
domain_pause(d);
- LOCK_BIGLOCK(d);
+ domain_lock(d);
rc = arch_vcpu_reset(v);
if ( rc != 0 )
@@ -646,7 +646,7 @@ int vcpu_reset(struct vcpu *v)
clear_bit(_VPF_blocked, &v->pause_flags);
out:
- UNLOCK_BIGLOCK(v->domain);
+ domain_unlock(v->domain);
domain_unpause(d);
return rc;
@@ -678,11 +678,11 @@ long do_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
return -EFAULT;
}
- LOCK_BIGLOCK(d);
+ domain_lock(d);
rc = -EEXIST;
if ( !v->is_initialised )
rc = boot_vcpu(d, vcpuid, ctxt);
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
xfree(ctxt);
break;
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
index 52143dbd1d..19ea5bef1a 100644
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -25,6 +25,8 @@
#include <public/domctl.h>
#include <xsm/xsm.h>
+DEFINE_SPINLOCK(domctl_lock);
+
extern long arch_do_domctl(
struct xen_domctl *op, XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
@@ -90,7 +92,7 @@ static inline int is_free_domid(domid_t dom)
void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
{
- struct vcpu *v;
+ struct vcpu *v;
u64 cpu_time = 0;
int flags = XEN_DOMINF_blocked;
struct vcpu_runstate_info runstate;
@@ -119,7 +121,7 @@ void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
info->cpu_time = cpu_time;
- info->flags = flags |
+ info->flags = (info->nr_online_vcpus ? flags : 0) |
((d->is_dying == DOMDYING_dead) ? XEN_DOMINF_dying : 0) |
(d->is_shut_down ? XEN_DOMINF_shutdown : 0) |
(d->is_paused_by_controller ? XEN_DOMINF_paused : 0) |
@@ -180,7 +182,6 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl)
{
long ret = 0;
struct xen_domctl curop, *op = &curop;
- static DEFINE_SPINLOCK(domctl_lock);
if ( !IS_PRIV(current->domain) )
return -EPERM;
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index b385b54738..70b34fbd2f 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -109,9 +109,7 @@ static int get_free_port(struct domain *d)
if ( xsm_alloc_security_evtchn(&chn[i]) )
{
for ( j = 0; j < i; j++ )
- {
xsm_free_security_evtchn(&chn[j]);
- }
xfree(chn);
return -ENOMEM;
}
@@ -971,9 +969,18 @@ void free_xen_event_channel(
struct domain *d = local_vcpu->domain;
spin_lock(&d->evtchn_lock);
+
+ if ( unlikely(d->is_dying) )
+ {
+ spin_unlock(&d->evtchn_lock);
+ return;
+ }
+
+ BUG_ON(!port_is_valid(d, port));
chn = evtchn_from_port(d, port);
BUG_ON(!chn->consumer_is_xen);
chn->consumer_is_xen = 0;
+
spin_unlock(&d->evtchn_lock);
(void)__evtchn_close(d, port);
@@ -1035,6 +1042,7 @@ void evtchn_destroy(struct domain *d)
{
xsm_free_security_evtchn(d->evtchn[i]);
xfree(d->evtchn[i]);
+ d->evtchn[i] = NULL;
}
spin_unlock(&d->evtchn_lock);
}
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 2dbcfab8b6..53662df865 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -1102,7 +1102,7 @@ gnttab_transfer(
struct page_info *new_page;
void *sp, *dp;
- new_page = alloc_domheap_pages(NULL, 0, MEMF_bits(max_bitsize));
+ new_page = alloc_domheap_page(NULL, MEMF_bits(max_bitsize));
if ( new_page == NULL )
{
gop.status = GNTST_address_too_big;
@@ -1445,7 +1445,7 @@ do_grant_table_op(
if ( count > 512 )
return -EINVAL;
- LOCK_BIGLOCK(d);
+ domain_lock(d);
rc = -EFAULT;
switch ( cmd )
@@ -1516,7 +1516,7 @@ do_grant_table_op(
}
out:
- UNLOCK_BIGLOCK(d);
+ domain_unlock(d);
return rc;
}
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index b414452562..934edb5dc9 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -32,7 +32,7 @@ static struct {
static unsigned char keypress_key;
-static void keypress_softirq(void)
+static void keypress_action(unsigned long unused)
{
keyhandler_t *h;
unsigned char key = keypress_key;
@@ -42,6 +42,8 @@ static void keypress_softirq(void)
console_end_log_everything();
}
+static DECLARE_TASKLET(keypress_tasklet, keypress_action, 0);
+
void handle_keypress(unsigned char key, struct cpu_user_regs *regs)
{
irq_keyhandler_t *h;
@@ -56,7 +58,7 @@ void handle_keypress(unsigned char key, struct cpu_user_regs *regs)
else
{
keypress_key = key;
- raise_softirq(KEYPRESS_SOFTIRQ);
+ tasklet_schedule(&keypress_tasklet);
}
}
@@ -284,8 +286,6 @@ static void do_debug_key(unsigned char key, struct cpu_user_regs *regs)
void __init initialize_keytable(void)
{
- open_softirq(KEYPRESS_SOFTIRQ, keypress_softirq);
-
register_irq_keyhandler(
'd', dump_registers, "dump registers");
register_keyhandler(
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 70a05d5367..3e9f7071ab 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -21,6 +21,7 @@
#include <xen/errno.h>
#include <asm/current.h>
#include <asm/hardirq.h>
+#include <xen/numa.h>
#include <public/memory.h>
#include <xsm/xsm.h>
@@ -37,19 +38,13 @@ struct memop_args {
int preempted; /* Was the hypercall preempted? */
};
-static unsigned int select_local_cpu(struct domain *d)
-{
- struct vcpu *v = d->vcpu[0];
- return (v ? v->processor : 0);
-}
-
static void increase_reservation(struct memop_args *a)
{
struct page_info *page;
unsigned long i;
xen_pfn_t mfn;
struct domain *d = a->domain;
- unsigned int cpu = select_local_cpu(d);
+ unsigned int node = domain_to_node(d);
if ( !guest_handle_is_null(a->extent_list) &&
!guest_handle_okay(a->extent_list, a->nr_extents) )
@@ -67,7 +62,8 @@ static void increase_reservation(struct memop_args *a)
goto out;
}
- page = __alloc_domheap_pages(d, cpu, a->extent_order, a->memflags);
+ page = alloc_domheap_pages(
+ d, a->extent_order, a->memflags | MEMF_node(node));
if ( unlikely(page == NULL) )
{
gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
@@ -96,7 +92,7 @@ static void populate_physmap(struct memop_args *a)
unsigned long i, j;
xen_pfn_t gpfn, mfn;
struct domain *d = a->domain;
- unsigned int cpu = select_local_cpu(d);
+ unsigned int node = domain_to_node(d);
if ( !guest_handle_okay(a->extent_list, a->nr_extents) )
return;
@@ -116,7 +112,8 @@ static void populate_physmap(struct memop_args *a)
if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
goto out;
- page = __alloc_domheap_pages(d, cpu, a->extent_order, a->memflags);
+ page = alloc_domheap_pages(
+ d, a->extent_order, a->memflags | MEMF_node(node));
if ( unlikely(page == NULL) )
{
gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
@@ -296,7 +293,7 @@ static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
unsigned long in_chunk_order, out_chunk_order;
xen_pfn_t gpfn, gmfn, mfn;
unsigned long i, j, k;
- unsigned int memflags = 0, cpu;
+ unsigned int memflags = 0;
long rc = 0;
struct domain *d;
struct page_info *page;
@@ -351,8 +348,7 @@ static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
memflags |= MEMF_bits(domain_clamp_alloc_bitsize(
d, exch.out.address_bits ? : (BITS_PER_LONG+PAGE_SHIFT)));
-
- cpu = select_local_cpu(d);
+ memflags |= MEMF_node(domain_to_node(d));
for ( i = (exch.nr_exchanged >> in_chunk_order);
i < (exch.in.nr_extents >> in_chunk_order);
@@ -401,8 +397,7 @@ static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
/* Allocate a chunk's worth of anonymous output pages. */
for ( j = 0; j < (1UL << out_chunk_order); j++ )
{
- page = __alloc_domheap_pages(
- NULL, cpu, exch.out.extent_order, memflags);
+ page = alloc_domheap_pages(NULL, exch.out.extent_order, memflags);
if ( unlikely(page == NULL) )
{
rc = -ENOMEM;
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 2d5f3f6da3..d1773c679e 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -36,6 +36,7 @@
#include <xen/numa.h>
#include <xen/nodemask.h>
#include <asm/page.h>
+#include <asm/numa.h>
#include <asm/flushtlb.h>
/*
@@ -328,14 +329,17 @@ static void init_node_heap(int node)
/* Allocate 2^@order contiguous pages. */
static struct page_info *alloc_heap_pages(
unsigned int zone_lo, unsigned int zone_hi,
- unsigned int cpu, unsigned int order)
+ unsigned int node, unsigned int order)
{
unsigned int i, j, zone;
- unsigned int node = cpu_to_node(cpu), num_nodes = num_online_nodes();
+ unsigned int num_nodes = num_online_nodes();
unsigned long request = 1UL << order;
cpumask_t extra_cpus_mask, mask;
struct page_info *pg;
+ if ( node == NUMA_NO_NODE )
+ node = cpu_to_node(smp_processor_id());
+
ASSERT(node >= 0);
ASSERT(node < num_nodes);
ASSERT(zone_lo <= zone_hi);
@@ -670,7 +674,8 @@ void *alloc_xenheap_pages(unsigned int order)
ASSERT(!in_irq());
- pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN, smp_processor_id(), order);
+ pg = alloc_heap_pages(
+ MEMZONE_XEN, MEMZONE_XEN, cpu_to_node(smp_processor_id()), order);
if ( unlikely(pg == NULL) )
goto no_memory;
@@ -778,12 +783,12 @@ int assign_pages(
}
-struct page_info *__alloc_domheap_pages(
- struct domain *d, unsigned int cpu, unsigned int order,
- unsigned int memflags)
+struct page_info *alloc_domheap_pages(
+ struct domain *d, unsigned int order, unsigned int memflags)
{
struct page_info *pg = NULL;
unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1;
+ unsigned int node = (uint8_t)((memflags >> _MEMF_node) - 1);
ASSERT(!in_irq());
@@ -797,7 +802,7 @@ struct page_info *__alloc_domheap_pages(
if ( (zone_hi + PAGE_SHIFT) >= dma_bitsize )
{
- pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, cpu, order);
+ pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, node, order);
/* Failure? Then check if we can fall back to the DMA pool. */
if ( unlikely(pg == NULL) &&
@@ -811,7 +816,7 @@ struct page_info *__alloc_domheap_pages(
if ( (pg == NULL) &&
((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
- cpu, order)) == NULL) )
+ node, order)) == NULL) )
return NULL;
if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
@@ -823,12 +828,6 @@ struct page_info *__alloc_domheap_pages(
return pg;
}
-struct page_info *alloc_domheap_pages(
- struct domain *d, unsigned int order, unsigned int flags)
-{
- return __alloc_domheap_pages(d, smp_processor_id(), order, flags);
-}
-
void free_domheap_pages(struct page_info *pg, unsigned int order)
{
int i, drop_dom_ref;
diff --git a/xen/common/softirq.c b/xen/common/softirq.c
index b6c2b18886..be4728f2e0 100644
--- a/xen/common/softirq.c
+++ b/xen/common/softirq.c
@@ -52,6 +52,108 @@ void open_softirq(int nr, softirq_handler handler)
softirq_handlers[nr] = handler;
}
+static LIST_HEAD(tasklet_list);
+static DEFINE_SPINLOCK(tasklet_lock);
+
+void tasklet_schedule(struct tasklet *t)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&tasklet_lock, flags);
+
+ if ( !t->is_dead )
+ {
+ if ( !t->is_scheduled && !t->is_running )
+ {
+ BUG_ON(!list_empty(&t->list));
+ list_add_tail(&t->list, &tasklet_list);
+ }
+ t->is_scheduled = 1;
+ raise_softirq(TASKLET_SOFTIRQ);
+ }
+
+ spin_unlock_irqrestore(&tasklet_lock, flags);
+}
+
+static void tasklet_action(void)
+{
+ struct tasklet *t;
+
+ spin_lock_irq(&tasklet_lock);
+
+ if ( list_empty(&tasklet_list) )
+ {
+ spin_unlock_irq(&tasklet_lock);
+ return;
+ }
+
+ t = list_entry(tasklet_list.next, struct tasklet, list);
+ list_del_init(&t->list);
+
+ BUG_ON(t->is_dead || t->is_running || !t->is_scheduled);
+ t->is_scheduled = 0;
+ t->is_running = 1;
+
+ spin_unlock_irq(&tasklet_lock);
+ t->func(t->data);
+ spin_lock_irq(&tasklet_lock);
+
+ t->is_running = 0;
+
+ if ( t->is_scheduled )
+ {
+ BUG_ON(t->is_dead || !list_empty(&t->list));
+ list_add_tail(&t->list, &tasklet_list);
+ }
+
+ /*
+ * If there is more work to do then reschedule. We don't grab more work
+ * immediately as we want to allow other softirq work to happen first.
+ */
+ if ( !list_empty(&tasklet_list) )
+ raise_softirq(TASKLET_SOFTIRQ);
+
+ spin_unlock_irq(&tasklet_lock);
+}
+
+void tasklet_kill(struct tasklet *t)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&tasklet_lock, flags);
+
+ if ( !list_empty(&t->list) )
+ {
+ BUG_ON(t->is_dead || t->is_running || !t->is_scheduled);
+ list_del_init(&t->list);
+ }
+ t->is_scheduled = 0;
+ t->is_dead = 1;
+
+ while ( t->is_running )
+ {
+ spin_unlock_irqrestore(&tasklet_lock, flags);
+ cpu_relax();
+ spin_lock_irqsave(&tasklet_lock, flags);
+ }
+
+ spin_unlock_irqrestore(&tasklet_lock, flags);
+}
+
+void tasklet_init(
+ struct tasklet *t, void (*func)(unsigned long), unsigned long data)
+{
+ memset(t, 0, sizeof(*t));
+ INIT_LIST_HEAD(&t->list);
+ t->func = func;
+ t->data = data;
+}
+
+void __init softirq_init(void)
+{
+ open_softirq(TASKLET_SOFTIRQ, tasklet_action);
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/common/trace.c b/xen/common/trace.c
index 863816b9ee..12ff7a029c 100644
--- a/xen/common/trace.c
+++ b/xen/common/trace.c
@@ -69,12 +69,6 @@ static cpumask_t tb_cpu_mask = CPU_MASK_ALL;
/* which tracing events are enabled */
static u32 tb_event_mask = TRC_ALL;
-static void trace_notify_guest(void)
-{
- send_guest_global_virq(dom0, VIRQ_TBUF);
-}
-
-
/**
* alloc_trace_bufs - performs initialization of the per-cpu trace buffers.
*
@@ -120,7 +114,6 @@ static int alloc_trace_bufs(void)
}
t_buf_highwater = data_size >> 1; /* 50% high water */
- open_softirq(TRACE_SOFTIRQ, trace_notify_guest);
return 0;
}
@@ -513,7 +506,7 @@ void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
/* Notify trace buffer consumer that we've crossed the high water mark. */
if ( started_below_highwater &&
(calc_unconsumed_bytes(buf) >= t_buf_highwater) )
- raise_softirq(TRACE_SOFTIRQ);
+ send_guest_global_virq(dom0, VIRQ_TBUF);
}
/*
diff --git a/xen/common/xencomm.c b/xen/common/xencomm.c
index fc86cd0830..135e3d93a5 100644
--- a/xen/common/xencomm.c
+++ b/xen/common/xencomm.c
@@ -323,6 +323,7 @@ xencomm_copy_chunk_to(
(unsigned long)xencomm_vaddr(paddr, page));
memcpy(xencomm_vaddr(paddr, page), (void *)from, len);
+ xencomm_mark_dirty(xencomm_vaddr(paddr, page), len);
put_page(page);
return 0;
diff --git a/xen/drivers/passthrough/amd/iommu_detect.c b/xen/drivers/passthrough/amd/iommu_detect.c
index 96ad02bbf7..d7ffa4e216 100644
--- a/xen/drivers/passthrough/amd/iommu_detect.c
+++ b/xen/drivers/passthrough/amd/iommu_detect.c
@@ -21,9 +21,9 @@
#include <xen/config.h>
#include <xen/errno.h>
#include <xen/iommu.h>
+#include <xen/pci.h>
#include <asm/amd-iommu.h>
#include <asm/hvm/svm/amd-iommu-proto.h>
-#include "../pci-direct.h"
#include "../pci_regs.h"
static int __init valid_bridge_bus_config(
@@ -31,9 +31,9 @@ static int __init valid_bridge_bus_config(
{
int pri_bus;
- pri_bus = read_pci_config_byte(bus, dev, func, PCI_PRIMARY_BUS);
- *sec_bus = read_pci_config_byte(bus, dev, func, PCI_SECONDARY_BUS);
- *sub_bus = read_pci_config_byte(bus, dev, func, PCI_SUBORDINATE_BUS);
+ pri_bus = pci_conf_read8(bus, dev, func, PCI_PRIMARY_BUS);
+ *sec_bus = pci_conf_read8(bus, dev, func, PCI_SECONDARY_BUS);
+ *sub_bus = pci_conf_read8(bus, dev, func, PCI_SUBORDINATE_BUS);
return ((pri_bus == bus) && (*sec_bus > bus) && (*sub_bus >= *sec_bus));
}
@@ -59,12 +59,11 @@ int __init get_iommu_last_downstream_bus(struct amd_iommu *iommu)
}
func = PCI_FUNC(devfn);
- if ( !VALID_PCI_VENDOR_ID(
- read_pci_config_16(bus, dev, func, PCI_VENDOR_ID)) )
+ if ( !VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func,
+ PCI_VENDOR_ID)) )
continue;
- hdr_type = read_pci_config_byte(bus, dev, func,
- PCI_HEADER_TYPE);
+ hdr_type = pci_conf_read8(bus, dev, func, PCI_HEADER_TYPE);
if ( func == 0 )
multi_func = IS_PCI_MULTI_FUNCTION(hdr_type);
@@ -92,9 +91,9 @@ int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr,
u32 cap_header, cap_range, misc_info;
u64 mmio_bar;
- mmio_bar = (u64)read_pci_config(
+ mmio_bar = (u64)pci_conf_read32(
bus, dev, func, cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET) << 32;
- mmio_bar |= read_pci_config(bus, dev, func,
+ mmio_bar |= pci_conf_read32(bus, dev, func,
cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET);
iommu->mmio_base_phys = mmio_bar & (u64)~0x3FFF;
@@ -108,7 +107,7 @@ int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr,
iommu->bdf = (bus << 8) | PCI_DEVFN(dev, func);
iommu->cap_offset = cap_ptr;
- cap_header = read_pci_config(bus, dev, func, cap_ptr);
+ cap_header = pci_conf_read32(bus, dev, func, cap_ptr);
iommu->revision = get_field_from_reg_u32(
cap_header, PCI_CAP_REV_MASK, PCI_CAP_REV_SHIFT);
iommu->iotlb_support = get_field_from_reg_u32(
@@ -118,7 +117,7 @@ int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr,
iommu->pte_not_present_cached = get_field_from_reg_u32(
cap_header, PCI_CAP_NP_CACHE_MASK, PCI_CAP_NP_CACHE_SHIFT);
- cap_range = read_pci_config(bus, dev, func,
+ cap_range = pci_conf_read32(bus, dev, func,
cap_ptr + PCI_CAP_RANGE_OFFSET);
iommu->unit_id = get_field_from_reg_u32(
cap_range, PCI_CAP_UNIT_ID_MASK, PCI_CAP_UNIT_ID_SHIFT);
@@ -129,7 +128,7 @@ int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr,
iommu->last_devfn = get_field_from_reg_u32(
cap_range, PCI_CAP_LAST_DEVICE_MASK, PCI_CAP_LAST_DEVICE_SHIFT);
- misc_info = read_pci_config(bus, dev, func,
+ misc_info = pci_conf_read32(bus, dev, func,
cap_ptr + PCI_MISC_INFO_OFFSET);
iommu->msi_number = get_field_from_reg_u32(
misc_info, PCI_CAP_MSI_NUMBER_MASK, PCI_CAP_MSI_NUMBER_SHIFT);
@@ -146,14 +145,13 @@ static int __init scan_caps_for_iommu(
int count, error = 0;
count = 0;
- cap_ptr = read_pci_config_byte(bus, dev, func,
- PCI_CAPABILITY_LIST);
+ cap_ptr = pci_conf_read8(bus, dev, func, PCI_CAPABILITY_LIST);
while ( (cap_ptr >= PCI_MIN_CAP_OFFSET) &&
(count < PCI_MAX_CAP_BLOCKS) &&
!error )
{
cap_ptr &= PCI_CAP_PTR_MASK;
- cap_header = read_pci_config(bus, dev, func, cap_ptr);
+ cap_header = pci_conf_read32(bus, dev, func, cap_ptr);
cap_id = get_field_from_reg_u32(
cap_header, PCI_CAP_ID_MASK, PCI_CAP_ID_SHIFT);
@@ -182,12 +180,11 @@ static int __init scan_functions_for_iommu(
func = 0;
count = 1;
- while ( VALID_PCI_VENDOR_ID(read_pci_config_16(bus, dev, func,
- PCI_VENDOR_ID)) &&
+ while ( VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func,
+ PCI_VENDOR_ID)) &&
!error && (func < count) )
{
- hdr_type = read_pci_config_byte(bus, dev, func,
- PCI_HEADER_TYPE);
+ hdr_type = pci_conf_read8(bus, dev, func, PCI_HEADER_TYPE);
if ( func == 0 && IS_PCI_MULTI_FUNCTION(hdr_type) )
count = PCI_MAX_FUNC_COUNT;
diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
index bb01d7915a..f36c0e86c4 100644
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -20,10 +20,10 @@
#include <xen/config.h>
#include <xen/errno.h>
+#include <xen/pci.h>
#include <asm/amd-iommu.h>
#include <asm/hvm/svm/amd-iommu-proto.h>
#include <asm-x86/fixmap.h>
-#include "../pci-direct.h"
#include "../pci_regs.h"
extern int nr_amd_iommus;
diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c
index ad5a8517bd..aa584e615f 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -388,17 +388,17 @@ int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn)
unsigned long flags;
u64 maddr;
struct hvm_iommu *hd = domain_hvm_iommu(d);
- int iw, ir;
+ int iw = IOMMU_IO_WRITE_ENABLED;
+ int ir = IOMMU_IO_READ_ENABLED;
BUG_ON( !hd->root_table );
- maddr = (u64)mfn << PAGE_SHIFT;
-
- iw = IOMMU_IO_WRITE_ENABLED;
- ir = IOMMU_IO_READ_ENABLED;
-
spin_lock_irqsave(&hd->mapping_lock, flags);
+ if ( is_hvm_domain(d) && !hd->p2m_synchronized )
+ goto out;
+
+ maddr = (u64)mfn << PAGE_SHIFT;
pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
if ( pte == NULL )
{
@@ -409,7 +409,7 @@ int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn)
}
set_page_table_entry_present((u32 *)pte, maddr, iw, ir);
-
+out:
spin_unlock_irqrestore(&hd->mapping_lock, flags);
return 0;
}
@@ -425,11 +425,17 @@ int amd_iommu_unmap_page(struct domain *d, unsigned long gfn)
BUG_ON( !hd->root_table );
+ spin_lock_irqsave(&hd->mapping_lock, flags);
+
+ if ( is_hvm_domain(d) && !hd->p2m_synchronized )
+ {
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ return 0;
+ }
+
requestor_id = hd->domain_id;
io_addr = (u64)gfn << PAGE_SHIFT;
- spin_lock_irqsave(&hd->mapping_lock, flags);
-
pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
if ( pte == NULL )
{
@@ -486,3 +492,53 @@ int amd_iommu_reserve_domain_unity_map(
spin_unlock_irqrestore(&hd->mapping_lock, flags);
return 0;
}
+
+int amd_iommu_sync_p2m(struct domain *d)
+{
+ unsigned long mfn, gfn, flags;
+ void *pte;
+ u64 maddr;
+ struct list_head *entry;
+ struct page_info *page;
+ struct hvm_iommu *hd;
+ int iw = IOMMU_IO_WRITE_ENABLED;
+ int ir = IOMMU_IO_READ_ENABLED;
+
+ if ( !is_hvm_domain(d) )
+ return 0;
+
+ hd = domain_hvm_iommu(d);
+
+ spin_lock_irqsave(&hd->mapping_lock, flags);
+
+ if ( hd->p2m_synchronized )
+ goto out;
+
+ for ( entry = d->page_list.next; entry != &d->page_list;
+ entry = entry->next )
+ {
+ page = list_entry(entry, struct page_info, list);
+ mfn = page_to_mfn(page);
+ gfn = get_gpfn_from_mfn(mfn);
+
+ if ( gfn == INVALID_M2P_ENTRY )
+ continue;
+
+ maddr = (u64)mfn << PAGE_SHIFT;
+ pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
+ if ( pte == NULL )
+ {
+ dprintk(XENLOG_ERR,
+ "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn);
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ return -EFAULT;
+ }
+ set_page_table_entry_present((u32 *)pte, maddr, iw, ir);
+ }
+
+ hd->p2m_synchronized = 1;
+
+out:
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ return 0;
+}
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index 8b93e9c3cd..6a6b5484b3 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -18,12 +18,12 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <xen/sched.h>
+#include <xen/pci.h>
#include <asm/amd-iommu.h>
#include <asm/hvm/svm/amd-iommu-proto.h>
#include <asm/hvm/svm/amd-iommu-acpi.h>
-#include <xen/sched.h>
#include <asm/mm.h>
-#include "../pci-direct.h"
#include "../pci_regs.h"
struct list_head amd_iommu_head;
@@ -316,7 +316,7 @@ void __init amd_iommu_setup_dom0_devices(void)
{
for ( func = 0; func < 8; func++ )
{
- l = read_pci_config(bus, dev, func, PCI_VENDOR_ID);
+ l = pci_conf_read32(bus, dev, func, PCI_VENDOR_ID);
/* some broken boards return 0 or ~0 if a slot is empty: */
if ( (l == 0xffffffff) || (l == 0x00000000) ||
(l == 0x0000ffff) || (l == 0xffff0000) )
@@ -553,8 +553,9 @@ static int reassign_device( struct domain *source, struct domain *target,
int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
{
int bdf = (bus << 8) | devfn;
- int req_id;
- req_id = ivrs_mappings[bdf].dte_requestor_id;
+ int req_id = ivrs_mappings[bdf].dte_requestor_id;
+
+ amd_iommu_sync_p2m(d);
if ( ivrs_mappings[req_id].unity_map_enable )
{
diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index a364627d4e..528839550b 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -25,7 +25,7 @@ static void pt_irq_time_out(void *data)
{
struct hvm_mirq_dpci_mapping *irq_map = data;
unsigned int guest_gsi, machine_gsi = 0;
- struct hvm_irq_dpci *dpci = irq_map->dom->arch.hvm_domain.irq.dpci;
+ struct hvm_irq_dpci *dpci = domain_get_irq_dpci(irq_map->dom);
struct dev_intx_gsi_link *digl;
uint32_t device, intx;
@@ -49,7 +49,7 @@ static void pt_irq_time_out(void *data)
int pt_irq_create_bind_vtd(
struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
{
- struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
+ struct hvm_irq_dpci *hvm_irq_dpci = domain_get_irq_dpci(d);
uint32_t machine_gsi, guest_gsi;
uint32_t device, intx, link;
struct dev_intx_gsi_link *digl;
@@ -65,11 +65,8 @@ int pt_irq_create_bind_vtd(
for ( int i = 0; i < NR_IRQS; i++ )
INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list);
- if ( cmpxchg((unsigned long *)&d->arch.hvm_domain.irq.dpci,
- 0, (unsigned long)hvm_irq_dpci) != 0 )
+ if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 )
xfree(hvm_irq_dpci);
-
- hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
}
machine_gsi = pt_irq_bind->machine_irq;
@@ -116,7 +113,7 @@ int pt_irq_create_bind_vtd(
int pt_irq_destroy_bind_vtd(
struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
{
- struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
+ struct hvm_irq_dpci *hvm_irq_dpci = domain_get_irq_dpci(d);
uint32_t machine_gsi, guest_gsi;
uint32_t device, intx, link;
struct list_head *digl_list, *tmp;
@@ -133,14 +130,15 @@ int pt_irq_destroy_bind_vtd(
hvm_irq_dpci->link_cnt[link]--;
gdprintk(XENLOG_INFO,
- "pt_irq_destroy_bind_vtd: machine_gsi=%d, guest_gsi=%d, device=%d, intx=%d.\n",
- machine_gsi, guest_gsi, device, intx);
- memset(&hvm_irq_dpci->girq[guest_gsi], 0, sizeof(struct hvm_girq_dpci_mapping));
+ "pt_irq_destroy_bind_vtd: machine_gsi=%d "
+ "guest_gsi=%d, device=%d, intx=%d.\n",
+ machine_gsi, guest_gsi, device, intx);
+ memset(&hvm_irq_dpci->girq[guest_gsi], 0,
+ sizeof(struct hvm_girq_dpci_mapping));
/* clear the mirq info */
if ( hvm_irq_dpci->mirq[machine_gsi].valid )
{
-
list_for_each_safe ( digl_list, tmp,
&hvm_irq_dpci->mirq[machine_gsi].digl_list )
{
@@ -174,10 +172,10 @@ int pt_irq_destroy_bind_vtd(
int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq)
{
- struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+ struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d);
- if ( !iommu_enabled || (d == dom0) || (hvm_irq->dpci == NULL) ||
- !hvm_irq->dpci->mirq[mirq].valid )
+ if ( !iommu_enabled || (d == dom0) || !dpci ||
+ !dpci->mirq[mirq].valid )
return 0;
/*
@@ -186,58 +184,18 @@ int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq)
* this case the guest may not pick up the interrupt (e.g., masked at the
* PIC) and we need to detect that.
*/
- set_bit(mirq, hvm_irq->dpci->dirq_mask);
- set_timer(&hvm_irq->dpci->hvm_timer[irq_to_vector(mirq)],
+ set_bit(mirq, dpci->dirq_mask);
+ set_timer(&dpci->hvm_timer[irq_to_vector(mirq)],
NOW() + PT_IRQ_TIME_OUT);
vcpu_kick(d->vcpu[0]);
return 1;
}
-static void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq)
-{
- struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
- struct hvm_irq_dpci *dpci = hvm_irq->dpci;
- struct dev_intx_gsi_link *digl, *tmp;
- int i;
-
- ASSERT(isairq < NR_ISAIRQS);
- if ( !iommu_enabled || !dpci ||
- !test_bit(isairq, dpci->isairq_map) )
- return;
-
- /* Multiple mirq may be mapped to one isa irq */
- for ( i = 0; i < NR_IRQS; i++ )
- {
- if ( !dpci->mirq[i].valid )
- continue;
-
- list_for_each_entry_safe ( digl, tmp,
- &dpci->mirq[i].digl_list, list )
- {
- if ( hvm_irq->pci_link.route[digl->link] == isairq )
- {
- hvm_pci_intx_deassert(d, digl->device, digl->intx);
- spin_lock(&dpci->dirq_lock);
- if ( --dpci->mirq[i].pending == 0 )
- {
- spin_unlock(&dpci->dirq_lock);
- gdprintk(XENLOG_INFO VTDPREFIX,
- "hvm_dpci_isairq_eoi:: mirq = %x\n", i);
- stop_timer(&dpci->hvm_timer[irq_to_vector(i)]);
- pirq_guest_eoi(d, i);
- }
- else
- spin_unlock(&dpci->dirq_lock);
- }
- }
- }
-}
-
void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
union vioapic_redir_entry *ent)
{
- struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
+ struct hvm_irq_dpci *hvm_irq_dpci = domain_get_irq_dpci(d);
uint32_t device, intx, machine_gsi;
if ( !iommu_enabled || (hvm_irq_dpci == NULL) ||
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index b0f26944de..2cf5551337 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -58,7 +58,7 @@ int assign_device(struct domain *d, u8 bus, u8 devfn)
void iommu_domain_destroy(struct domain *d)
{
- struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
+ struct hvm_irq_dpci *hvm_irq_dpci = domain_get_irq_dpci(d);
uint32_t i;
struct hvm_iommu *hd = domain_hvm_iommu(d);
struct list_head *ioport_list, *digl_list, *tmp;
diff --git a/xen/drivers/passthrough/pci-direct.h b/xen/drivers/passthrough/pci-direct.h
deleted file mode 100644
index 920343851b..0000000000
--- a/xen/drivers/passthrough/pci-direct.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef ASM_PCI_DIRECT_H
-#define ASM_PCI_DIRECT_H 1
-
-#include <xen/types.h>
-#include <asm/io.h>
-
-/* Direct PCI access. This is used for PCI accesses in early boot before
- the PCI subsystem works. */
-
-#define PDprintk(x...)
-
-static inline u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset)
-{
- u32 v;
- outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
- v = inl(0xcfc);
- if (v != 0xffffffff)
- PDprintk("%x reading 4 from %x: %x\n", slot, offset, v);
- return v;
-}
-
-static inline u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset)
-{
- u8 v;
- outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
- v = inb(0xcfc + (offset&3));
- PDprintk("%x reading 1 from %x: %x\n", slot, offset, v);
- return v;
-}
-
-static inline u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset)
-{
- u16 v;
- outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
- v = inw(0xcfc + (offset&2));
- PDprintk("%x reading 2 from %x: %x\n", slot, offset, v);
- return v;
-}
-
-static inline void write_pci_config(
- u8 bus, u8 slot, u8 func, u8 offset, u32 val)
-{
- PDprintk("%x writing to %x: %x\n", slot, offset, val);
- outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
- outl(val, 0xcfc);
-}
-
-#endif
diff --git a/xen/drivers/passthrough/vtd/Makefile b/xen/drivers/passthrough/vtd/Makefile
index 53b18b8f3f..06ee624ae7 100644
--- a/xen/drivers/passthrough/vtd/Makefile
+++ b/xen/drivers/passthrough/vtd/Makefile
@@ -1,3 +1,5 @@
+subdir-$(x86) += x86
+
obj-y += iommu.o
obj-y += dmar.o
obj-y += utils.o
diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c
index 915e1d04ef..9498914044 100644
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -25,9 +25,9 @@
#include <xen/acpi.h>
#include <xen/mm.h>
#include <xen/xmalloc.h>
+#include <xen/pci.h>
#include <asm/string.h>
#include "dmar.h"
-#include "../pci-direct.h"
#include "../pci_regs.h"
int vtd_enabled;
@@ -211,7 +211,7 @@ struct acpi_atsr_unit * acpi_find_matched_atsr_unit(struct pci_dev *dev)
static int scope_device_count(void *start, void *end)
{
struct acpi_dev_scope *scope;
- u8 bus, sub_bus, sec_bus;
+ u16 bus, sub_bus, sec_bus;
struct acpi_pci_path *path;
int depth, count = 0;
u8 dev, func;
@@ -231,9 +231,9 @@ static int scope_device_count(void *start, void *end)
bus = scope->start_bus;
depth = (scope->length - sizeof(struct acpi_dev_scope))
/ sizeof(struct acpi_pci_path);
- while ( --depth )
+ while ( --depth > 0 )
{
- bus = read_pci_config_byte(
+ bus = pci_conf_read8(
bus, path->dev, path->fn, PCI_SECONDARY_BUS);
path++;
}
@@ -250,9 +250,9 @@ static int scope_device_count(void *start, void *end)
dprintk(XENLOG_INFO VTDPREFIX,
"found bridge: bdf = %x:%x:%x\n",
bus, path->dev, path->fn);
- sec_bus = read_pci_config_byte(
+ sec_bus = pci_conf_read8(
bus, path->dev, path->fn, PCI_SECONDARY_BUS);
- sub_bus = read_pci_config_byte(
+ sub_bus = pci_conf_read8(
bus, path->dev, path->fn, PCI_SUBORDINATE_BUS);
while ( sec_bus <= sub_bus )
@@ -261,7 +261,7 @@ static int scope_device_count(void *start, void *end)
{
for ( func = 0; func < 8; func++ )
{
- l = read_pci_config(
+ l = pci_conf_read32(
sec_bus, dev, func, PCI_VENDOR_ID);
/* some broken boards return 0 or
@@ -301,7 +301,7 @@ static int __init acpi_parse_dev_scope(
void *start, void *end, void *acpi_entry, int type)
{
struct acpi_dev_scope *scope;
- u8 bus, sub_bus, sec_bus;
+ u16 bus, sub_bus, sec_bus;
struct acpi_pci_path *path;
struct acpi_ioapic_unit *acpi_ioapic_unit = NULL;
int depth;
@@ -353,9 +353,9 @@ static int __init acpi_parse_dev_scope(
/ sizeof(struct acpi_pci_path);
bus = scope->start_bus;
- while ( --depth )
+ while ( --depth > 0 )
{
- bus = read_pci_config_byte(
+ bus = pci_conf_read8(
bus, path->dev, path->fn, PCI_SECONDARY_BUS);
path++;
}
@@ -374,9 +374,9 @@ static int __init acpi_parse_dev_scope(
dprintk(XENLOG_INFO VTDPREFIX,
"found bridge: bus = %x dev = %x func = %x\n",
bus, path->dev, path->fn);
- sec_bus = read_pci_config_byte(
+ sec_bus = pci_conf_read8(
bus, path->dev, path->fn, PCI_SECONDARY_BUS);
- sub_bus = read_pci_config_byte(
+ sub_bus = pci_conf_read8(
bus, path->dev, path->fn, PCI_SUBORDINATE_BUS);
while ( sec_bus <= sub_bus )
@@ -385,7 +385,7 @@ static int __init acpi_parse_dev_scope(
{
for ( func = 0; func < 8; func++ )
{
- l = read_pci_config(
+ l = pci_conf_read32(
sec_bus, dev, func, PCI_VENDOR_ID);
/* some broken boards return 0 or
diff --git a/xen/drivers/passthrough/vtd/dmar.h b/xen/drivers/passthrough/vtd/dmar.h
index 040a626e03..5a6ac2e7b6 100644
--- a/xen/drivers/passthrough/vtd/dmar.h
+++ b/xen/drivers/passthrough/vtd/dmar.h
@@ -92,14 +92,10 @@ struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev);
#define RMRR_TYPE 2
#define ATSR_TYPE 3
-#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
-#define time_after(a,b) \
- (typecheck(unsigned long, a) && \
- typecheck(unsigned long, b) && \
- ((long)(b) - (long)(a) < 0))
+#define DMAR_OPERATION_TIMEOUT MILLISECS(1000)
int vtd_hw_check(void);
void disable_pmr(struct iommu *iommu);
int is_usb_device(struct pci_dev *pdev);
-#endif // _DMAR_H_
+#endif /* _DMAR_H_ */
diff --git a/xen/drivers/passthrough/vtd/intremap.c b/xen/drivers/passthrough/vtd/intremap.c
index 301a8f623b..040d7ea5d4 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -21,10 +21,11 @@
#include <xen/irq.h>
#include <xen/sched.h>
#include <xen/iommu.h>
+#include <xen/time.h>
+#include <xen/pci.h>
#include "iommu.h"
#include "dmar.h"
#include "vtd.h"
-#include "../pci-direct.h"
#include "../pci_regs.h"
#include "msi.h"
#include "extern.h"
@@ -45,7 +46,7 @@ u16 apicid_to_bdf(int apic_id)
static void remap_entry_to_ioapic_rte(
struct iommu *iommu, struct IO_APIC_route_entry *old_rte)
{
- struct iremap_entry *iremap_entry = NULL;
+ struct iremap_entry *iremap_entry = NULL, *iremap_entries;
struct IO_APIC_route_remap_entry *remap_rte;
unsigned int index;
unsigned long flags;
@@ -70,7 +71,9 @@ static void remap_entry_to_ioapic_rte(
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
- iremap_entry = &ir_ctrl->iremap[index];
+ iremap_entries =
+ (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
+ iremap_entry = &iremap_entries[index];
old_rte->vector = iremap_entry->lo.vector;
old_rte->delivery_mode = iremap_entry->lo.dlm;
@@ -80,13 +83,14 @@ static void remap_entry_to_ioapic_rte(
old_rte->dest.logical.__reserved_1 = 0;
old_rte->dest.logical.logical_dest = iremap_entry->lo.dst;
+ unmap_vtd_domain_page(iremap_entries);
spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
}
static void ioapic_rte_to_remap_entry(struct iommu *iommu,
int apic_id, struct IO_APIC_route_entry *old_rte)
{
- struct iremap_entry *iremap_entry = NULL;
+ struct iremap_entry *iremap_entry = NULL, *iremap_entries;
struct IO_APIC_route_remap_entry *remap_rte;
unsigned int index;
unsigned long flags;
@@ -103,7 +107,10 @@ static void ioapic_rte_to_remap_entry(struct iommu *iommu,
goto out;
}
- iremap_entry = &(ir_ctrl->iremap[index]);
+ iremap_entries =
+ (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
+ iremap_entry = &iremap_entries[index];
+
if ( *(u64 *)iremap_entry != 0 )
dprintk(XENLOG_WARNING VTDPREFIX,
"Interrupt remapping entry is in use already!\n");
@@ -124,12 +131,13 @@ static void ioapic_rte_to_remap_entry(struct iommu *iommu,
iremap_entry->lo.p = 1; /* finally, set present bit */
ir_ctrl->iremap_index++;
+ unmap_vtd_domain_page(iremap_entries);
iommu_flush_iec_index(iommu, 0, index);
ret = invalidate_sync(iommu);
- /* now construct new ioapic rte entry */
+ /* now construct new ioapic rte entry */
remap_rte->vector = old_rte->vector;
- remap_rte->delivery_mode = 0; /* has to be 0 for remap format */
+ remap_rte->delivery_mode = 0; /* has to be 0 for remap format */
remap_rte->index_15 = index & 0x8000;
remap_rte->index_0_14 = index & 0x7fff;
remap_rte->delivery_status = old_rte->delivery_status;
@@ -154,7 +162,7 @@ io_apic_read_remap_rte(
struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
- if ( !iommu || !ir_ctrl || !(ir_ctrl->iremap) )
+ if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
{
*IO_APIC_BASE(apic) = reg;
return *(IO_APIC_BASE(apic)+4);
@@ -200,7 +208,7 @@ io_apic_write_remap_rte(
struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
- if ( !iommu || !ir_ctrl || !(ir_ctrl->iremap) )
+ if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
{
*IO_APIC_BASE(apic) = reg;
*(IO_APIC_BASE(apic)+4) = value;
@@ -237,43 +245,41 @@ io_apic_write_remap_rte(
int intremap_setup(struct iommu *iommu)
{
struct ir_ctrl *ir_ctrl;
- unsigned long start_time;
- u64 paddr;
+ s_time_t start_time;
if ( !ecap_intr_remap(iommu->ecap) )
return -ENODEV;
ir_ctrl = iommu_ir_ctrl(iommu);
- if ( ir_ctrl->iremap == NULL )
+ if ( ir_ctrl->iremap_maddr == 0 )
{
- ir_ctrl->iremap = alloc_xenheap_page();
- if ( ir_ctrl->iremap == NULL )
+ ir_ctrl->iremap_maddr = alloc_pgtable_maddr();
+ if ( ir_ctrl->iremap_maddr == 0 )
{
dprintk(XENLOG_WARNING VTDPREFIX,
- "Cannot allocate memory for ir_ctrl->iremap\n");
+ "Cannot allocate memory for ir_ctrl->iremap_maddr\n");
return -ENODEV;
}
- memset(ir_ctrl->iremap, 0, PAGE_SIZE);
}
- paddr = virt_to_maddr(ir_ctrl->iremap);
#if defined(ENABLED_EXTENDED_INTERRUPT_SUPPORT)
/* set extended interrupt mode bit */
- paddr |= ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIMI_SHIFT) : 0;
+ ir_ctrl->iremap_maddr |=
+ ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIMI_SHIFT) : 0;
#endif
/* size field = 256 entries per 4K page = 8 - 1 */
- paddr |= 7;
- dmar_writeq(iommu->reg, DMAR_IRTA_REG, paddr);
+ ir_ctrl->iremap_maddr |= 7;
+ dmar_writeq(iommu->reg, DMAR_IRTA_REG, ir_ctrl->iremap_maddr);
/* set SIRTP */
iommu->gcmd |= DMA_GCMD_SIRTP;
dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
/* Make sure hardware complete it */
- start_time = jiffies;
+ start_time = NOW();
while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_SIRTPS) )
{
- if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
+ if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
{
dprintk(XENLOG_ERR VTDPREFIX,
"Cannot set SIRTP field for interrupt remapping\n");
@@ -286,10 +292,10 @@ int intremap_setup(struct iommu *iommu)
iommu->gcmd |= DMA_GCMD_CFI;
dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
- start_time = jiffies;
+ start_time = NOW();
while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_CFIS) )
{
- if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
+ if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
{
dprintk(XENLOG_ERR VTDPREFIX,
"Cannot set CFI field for interrupt remapping\n");
@@ -302,10 +308,10 @@ int intremap_setup(struct iommu *iommu)
iommu->gcmd |= DMA_GCMD_IRE;
dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
- start_time = jiffies;
+ start_time = NOW();
while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_IRES) )
{
- if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
+ if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
{
dprintk(XENLOG_ERR VTDPREFIX,
"Cannot set IRE field for interrupt remapping\n");
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index 968e25d7f5..59babdfd66 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -24,12 +24,16 @@
#include <xen/xmalloc.h>
#include <xen/domain_page.h>
#include <xen/iommu.h>
+#include <xen/numa.h>
+#include <xen/time.h>
+#include <xen/pci.h>
+#include <asm/paging.h>
#include "iommu.h"
#include "dmar.h"
-#include "../pci-direct.h"
#include "../pci_regs.h"
#include "msi.h"
#include "extern.h"
+#include "vtd.h"
#define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
@@ -156,11 +160,11 @@ struct iommu_flush *iommu_get_flush(struct iommu *iommu)
return &(iommu->intel->flush);
}
-unsigned int x86_clflush_size;
+unsigned int clflush_size;
void clflush_cache_range(void *adr, int size)
{
int i;
- for ( i = 0; i < size; i += x86_clflush_size )
+ for ( i = 0; i < size; i += clflush_size )
clflush(adr + i);
}
@@ -170,78 +174,82 @@ static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
clflush_cache_range(addr, size);
}
-#define iommu_flush_cache_entry(iommu, addr) \
- __iommu_flush_cache(iommu, addr, 8)
-#define iommu_flush_cache_page(iommu, addr) \
- __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K)
+void iommu_flush_cache_entry(struct iommu *iommu, void *addr)
+{
+ __iommu_flush_cache(iommu, addr, 8);
+}
+
+void iommu_flush_cache_page(struct iommu *iommu, void *addr)
+{
+ __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K);
+}
int nr_iommus;
/* context entry handling */
-static struct context_entry * device_to_context_entry(struct iommu *iommu,
- u8 bus, u8 devfn)
+static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
{
- struct root_entry *root;
- struct context_entry *context;
- unsigned long phy_addr;
+ struct root_entry *root, *root_entries;
unsigned long flags;
+ u64 maddr;
spin_lock_irqsave(&iommu->lock, flags);
- root = &iommu->root_entry[bus];
+ root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
+ root = &root_entries[bus];
if ( !root_present(*root) )
{
- phy_addr = (unsigned long) alloc_xenheap_page();
- if ( !phy_addr )
+ maddr = alloc_pgtable_maddr();
+ if ( maddr == 0 )
{
spin_unlock_irqrestore(&iommu->lock, flags);
- return NULL;
+ return 0;
}
- memset((void *) phy_addr, 0, PAGE_SIZE);
- iommu_flush_cache_page(iommu, (void *)phy_addr);
- phy_addr = virt_to_maddr((void *)phy_addr);
- set_root_value(*root, phy_addr);
+ set_root_value(*root, maddr);
set_root_present(*root);
iommu_flush_cache_entry(iommu, root);
}
- phy_addr = (unsigned long) get_context_addr(*root);
- context = (struct context_entry *)maddr_to_virt(phy_addr);
+ maddr = (u64) get_context_addr(*root);
+ unmap_vtd_domain_page(root_entries);
spin_unlock_irqrestore(&iommu->lock, flags);
- return &context[devfn];
+ return maddr;
}
static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
{
- struct root_entry *root;
+ struct root_entry *root, *root_entries;
struct context_entry *context;
- unsigned long phy_addr;
+ u64 context_maddr;
int ret;
unsigned long flags;
spin_lock_irqsave(&iommu->lock, flags);
- root = &iommu->root_entry[bus];
+ root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
+ root = &root_entries[bus];
if ( !root_present(*root) )
{
ret = 0;
goto out;
}
- phy_addr = get_context_addr(*root);
- context = (struct context_entry *)maddr_to_virt(phy_addr);
+ context_maddr = get_context_addr(*root);
+ context = (struct context_entry *)map_vtd_domain_page(context_maddr);
ret = context_present(context[devfn]);
+ unmap_vtd_domain_page(context);
out:
+ unmap_vtd_domain_page(root_entries);
spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
}
-static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
+static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr)
{
struct hvm_iommu *hd = domain_hvm_iommu(domain);
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
int addr_width = agaw_to_width(hd->agaw);
- struct dma_pte *parent, *pte = NULL, *pgd;
+ struct dma_pte *parent, *pte = NULL;
int level = agaw_to_level(hd->agaw);
int offset;
unsigned long flags;
- struct page_info *pg = NULL;
+ u64 pte_maddr = 0;
u64 *vaddr = NULL;
drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
@@ -249,19 +257,14 @@ static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
addr &= (((u64)1) << addr_width) - 1;
spin_lock_irqsave(&hd->mapping_lock, flags);
- if ( !hd->pgd )
+ if ( hd->pgd_maddr == 0 )
{
- pgd = (struct dma_pte *)alloc_xenheap_page();
- if ( !pgd )
- {
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
- return NULL;
- }
- memset(pgd, 0, PAGE_SIZE);
- hd->pgd = pgd;
+ hd->pgd_maddr = alloc_pgtable_maddr();
+ if ( hd->pgd_maddr == 0 )
+ return 0;
}
- parent = hd->pgd;
+ parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
while ( level > 1 )
{
offset = address_level_offset(addr, level);
@@ -269,17 +272,15 @@ static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
if ( dma_pte_addr(*pte) == 0 )
{
- pg = alloc_domheap_page(NULL);
- vaddr = map_domain_page(page_to_mfn(pg));
+ u64 maddr = alloc_pgtable_maddr();
+ dma_set_pte_addr(*pte, maddr);
+ vaddr = map_vtd_domain_page(maddr);
if ( !vaddr )
{
+ unmap_vtd_domain_page(parent);
spin_unlock_irqrestore(&hd->mapping_lock, flags);
- return NULL;
+ return 0;
}
- memset(vaddr, 0, PAGE_SIZE);
- iommu_flush_cache_page(iommu, vaddr);
-
- dma_set_pte_addr(*pte, page_to_maddr(pg));
/*
* high level table always sets r/w, last level
@@ -291,21 +292,20 @@ static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
}
else
{
- pg = maddr_to_page(pte->val);
- vaddr = map_domain_page(page_to_mfn(pg));
+ vaddr = map_vtd_domain_page(pte->val);
if ( !vaddr )
{
+ unmap_vtd_domain_page(parent);
spin_unlock_irqrestore(&hd->mapping_lock, flags);
- return NULL;
+ return 0;
}
}
- if ( parent != hd->pgd )
- unmap_domain_page(parent);
-
- if ( level == 2 && vaddr )
+ unmap_vtd_domain_page(parent);
+ if ( level == 2 )
{
- unmap_domain_page(vaddr);
+ pte_maddr = pte->val & PAGE_MASK_4K;
+ unmap_vtd_domain_page(vaddr);
break;
}
@@ -315,50 +315,49 @@ static struct page_info *addr_to_dma_page(struct domain *domain, u64 addr)
}
spin_unlock_irqrestore(&hd->mapping_lock, flags);
- return pg;
+ return pte_maddr;
}
/* return address's page at specific level */
-static struct page_info *dma_addr_level_page(struct domain *domain,
- u64 addr, int level)
+static u64 dma_addr_level_page_maddr(
+ struct domain *domain, u64 addr, int level)
{
struct hvm_iommu *hd = domain_hvm_iommu(domain);
struct dma_pte *parent, *pte = NULL;
int total = agaw_to_level(hd->agaw);
int offset;
- struct page_info *pg = NULL;
+ u64 pg_maddr = hd->pgd_maddr;
- parent = hd->pgd;
+ if ( pg_maddr == 0 )
+ return 0;
+
+ parent = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
while ( level <= total )
{
offset = address_level_offset(addr, total);
pte = &parent[offset];
if ( dma_pte_addr(*pte) == 0 )
- {
- if ( parent != hd->pgd )
- unmap_domain_page(parent);
break;
- }
- pg = maddr_to_page(pte->val);
- if ( parent != hd->pgd )
- unmap_domain_page(parent);
+ pg_maddr = pte->val & PAGE_MASK_4K;
+ unmap_vtd_domain_page(parent);
if ( level == total )
- return pg;
+ return pg_maddr;
- parent = map_domain_page(page_to_mfn(pg));
+ parent = map_vtd_domain_page(pte->val);
total--;
}
- return NULL;
+ unmap_vtd_domain_page(parent);
+ return 0;
}
static void iommu_flush_write_buffer(struct iommu *iommu)
{
u32 val;
unsigned long flag;
- unsigned long start_time;
+ s_time_t start_time;
if ( !cap_rwbf(iommu->cap) )
return;
@@ -368,13 +367,13 @@ static void iommu_flush_write_buffer(struct iommu *iommu)
dmar_writel(iommu->reg, DMAR_GCMD_REG, val);
/* Make sure hardware complete it */
- start_time = jiffies;
+ start_time = NOW();
for ( ; ; )
{
val = dmar_readl(iommu->reg, DMAR_GSTS_REG);
if ( !(val & DMA_GSTS_WBFS) )
break;
- if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
+ if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
panic("DMAR hardware is malfunctional,"
" please disable IOMMU\n");
cpu_relax();
@@ -391,7 +390,7 @@ static int flush_context_reg(
struct iommu *iommu = (struct iommu *) _iommu;
u64 val = 0;
unsigned long flag;
- unsigned long start_time;
+ s_time_t start_time;
/*
* In the non-present entry flush case, if hardware doesn't cache
@@ -429,13 +428,13 @@ static int flush_context_reg(
dmar_writeq(iommu->reg, DMAR_CCMD_REG, val);
/* Make sure hardware complete it */
- start_time = jiffies;
+ start_time = NOW();
for ( ; ; )
{
val = dmar_readq(iommu->reg, DMAR_CCMD_REG);
if ( !(val & DMA_CCMD_ICC) )
break;
- if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
+ if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
panic("DMAR hardware is malfunctional, please disable IOMMU\n");
cpu_relax();
}
@@ -479,7 +478,7 @@ static int flush_iotlb_reg(void *_iommu, u16 did,
int tlb_offset = ecap_iotlb_offset(iommu->ecap);
u64 val = 0, val_iva = 0;
unsigned long flag;
- unsigned long start_time;
+ s_time_t start_time;
/*
* In the non-present entry flush case, if hardware doesn't cache
@@ -526,13 +525,13 @@ static int flush_iotlb_reg(void *_iommu, u16 did,
dmar_writeq(iommu->reg, tlb_offset + 8, val);
/* Make sure hardware complete it */
- start_time = jiffies;
+ start_time = NOW();
for ( ; ; )
{
val = dmar_readq(iommu->reg, tlb_offset + 8);
if ( !(val & DMA_TLB_IVT) )
break;
- if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
+ if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
panic("DMAR hardware is malfunctional, please disable IOMMU\n");
cpu_relax();
}
@@ -630,17 +629,17 @@ static void dma_pte_clear_one(struct domain *domain, u64 addr)
{
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
- struct dma_pte *pte = NULL;
- struct page_info *pg = NULL;
+ struct dma_pte *page = NULL, *pte = NULL;
+ u64 pg_maddr;
drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
/* get last level pte */
- pg = dma_addr_level_page(domain, addr, 1);
- if ( !pg )
+ pg_maddr = dma_addr_level_page_maddr(domain, addr, 1);
+ if ( pg_maddr == 0 )
return;
- pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
- pte += address_level_offset(addr, 1);
+ page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
+ pte = page + address_level_offset(addr, 1);
if ( pte )
{
dma_clear_pte(*pte);
@@ -656,7 +655,7 @@ static void dma_pte_clear_one(struct domain *domain, u64 addr)
iommu_flush_write_buffer(iommu);
}
}
- unmap_domain_page(pte);
+ unmap_vtd_domain_page(page);
}
/* clear last level pte, a tlb flush should be followed */
@@ -686,11 +685,11 @@ void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
struct hvm_iommu *hd = domain_hvm_iommu(domain);
struct iommu *iommu;
int addr_width = agaw_to_width(hd->agaw);
- struct dma_pte *pte;
+ struct dma_pte *page, *pte;
int total = agaw_to_level(hd->agaw);
int level;
u32 tmp;
- struct page_info *pg = NULL;
+ u64 pg_maddr;
drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
iommu = drhd->iommu;
@@ -708,15 +707,15 @@ void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
while ( tmp < end )
{
- pg = dma_addr_level_page(domain, tmp, level);
- if ( !pg )
+ pg_maddr = dma_addr_level_page_maddr(domain, tmp, level);
+ if ( pg_maddr == 0 )
return;
- pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
- pte += address_level_offset(tmp, level);
+ page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
+ pte = page + address_level_offset(tmp, level);
dma_clear_pte(*pte);
iommu_flush_cache_entry(iommu, pte);
- unmap_domain_page(pte);
- free_domheap_page(pg);
+ unmap_vtd_domain_page(page);
+ free_pgtable_maddr(pg_maddr);
tmp += level_size(level);
}
@@ -726,17 +725,15 @@ void dma_pte_free_pagetable(struct domain *domain, u64 start, u64 end)
/* free pgd */
if ( start == 0 && end == ((((u64)1) << addr_width) - 1) )
{
- free_xenheap_page((void *)hd->pgd);
- hd->pgd = NULL;
+ free_pgtable_maddr(hd->pgd_maddr);
+ hd->pgd_maddr = 0;
}
}
/* iommu handling */
static int iommu_set_root_entry(struct iommu *iommu)
{
- void *addr;
u32 cmd, sts;
- struct root_entry *root;
unsigned long flags;
if ( iommu == NULL )
@@ -746,25 +743,19 @@ static int iommu_set_root_entry(struct iommu *iommu)
return -EINVAL;
}
- if ( unlikely(!iommu->root_entry) )
+ if ( iommu->root_maddr != 0 )
{
- root = (struct root_entry *)alloc_xenheap_page();
- if ( root == NULL )
- return -ENOMEM;
-
- memset((u8*)root, 0, PAGE_SIZE);
- iommu_flush_cache_page(iommu, root);
-
- if ( cmpxchg((unsigned long *)&iommu->root_entry,
- 0, (unsigned long)root) != 0 )
- free_xenheap_page((void *)root);
+ free_pgtable_maddr(iommu->root_maddr);
+ iommu->root_maddr = 0;
}
- addr = iommu->root_entry;
-
spin_lock_irqsave(&iommu->register_lock, flags);
- dmar_writeq(iommu->reg, DMAR_RTADDR_REG, virt_to_maddr(addr));
+ iommu->root_maddr = alloc_pgtable_maddr();
+ if ( iommu->root_maddr == 0 )
+ return -ENOMEM;
+
+ dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
cmd = iommu->gcmd | DMA_GCMD_SRTP;
dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
@@ -1101,8 +1092,11 @@ static void free_iommu(struct iommu *iommu)
{
if ( !iommu )
return;
- if ( iommu->root_entry )
- free_xenheap_page((void *)iommu->root_entry);
+ if ( iommu->root_maddr != 0 )
+ {
+ free_pgtable_maddr(iommu->root_maddr);
+ iommu->root_maddr = 0;
+ }
if ( iommu->reg )
iounmap(iommu->reg);
free_intel_iommu(iommu->intel);
@@ -1157,13 +1151,17 @@ static int domain_context_mapping_one(
u8 bus, u8 devfn)
{
struct hvm_iommu *hd = domain_hvm_iommu(domain);
- struct context_entry *context;
+ struct context_entry *context, *context_entries;
unsigned long flags;
int ret = 0;
+ u64 maddr;
- context = device_to_context_entry(iommu, bus, devfn);
+ maddr = bus_to_context_maddr(iommu, bus);
+ context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
+ context = &context_entries[devfn];
if ( !context )
{
+ unmap_vtd_domain_page(context_entries);
gdprintk(XENLOG_ERR VTDPREFIX,
"domain_context_mapping_one:context == NULL:"
"bdf = %x:%x:%x\n",
@@ -1173,6 +1171,7 @@ static int domain_context_mapping_one(
if ( context_present(*context) )
{
+ unmap_vtd_domain_page(context_entries);
gdprintk(XENLOG_WARNING VTDPREFIX,
"domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1193,19 +1192,8 @@ static int domain_context_mapping_one(
else
{
#endif
- if ( !hd->pgd )
- {
- struct dma_pte *pgd = (struct dma_pte *)alloc_xenheap_page();
- if ( !pgd )
- {
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
- return -ENOMEM;
- }
- memset(pgd, 0, PAGE_SIZE);
- hd->pgd = pgd;
- }
-
- context_set_address_root(*context, virt_to_maddr(hd->pgd));
+ ASSERT(hd->pgd_maddr != 0);
+ context_set_address_root(*context, hd->pgd_maddr);
context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
#ifdef CONTEXT_PASSTHRU
}
@@ -1217,9 +1205,11 @@ static int domain_context_mapping_one(
gdprintk(XENLOG_INFO VTDPREFIX,
"domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64
- " hd->pgd=%p\n",
+ " hd->pgd_maddr=%"PRIx64"\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
- context->hi, context->lo, hd->pgd);
+ context->hi, context->lo, hd->pgd_maddr);
+
+ unmap_vtd_domain_page(context_entries);
if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
(((u16)bus) << 8) | devfn,
@@ -1238,13 +1228,13 @@ static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap)
while ( ttl-- )
{
- pos = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos);
+ pos = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pos);
if ( pos < 0x40 )
break;
pos &= ~3;
- id = read_pci_config_byte(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
- pos + PCI_CAP_LIST_ID);
+ id = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ pos + PCI_CAP_LIST_ID);
if ( id == 0xff )
break;
@@ -1268,13 +1258,13 @@ int pdev_type(struct pci_dev *dev)
u16 class_device;
u16 status;
- class_device = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE);
+ class_device = pci_conf_read16(dev->bus, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), PCI_CLASS_DEVICE);
if ( class_device == PCI_CLASS_BRIDGE_PCI )
return DEV_TYPE_PCI_BRIDGE;
- status = read_pci_config_16(dev->bus, PCI_SLOT(dev->devfn),
- PCI_FUNC(dev->devfn), PCI_STATUS);
+ status = pci_conf_read16(dev->bus, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), PCI_STATUS);
if ( !(status & PCI_STATUS_CAP_LIST) )
return DEV_TYPE_PCI;
@@ -1302,7 +1292,7 @@ static int domain_context_mapping(
switch ( type )
{
case DEV_TYPE_PCI_BRIDGE:
- sec_bus = read_pci_config_byte(
+ sec_bus = pci_conf_read8(
pdev->bus, PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
@@ -1312,7 +1302,7 @@ static int domain_context_mapping(
bus2bridge[sec_bus].devfn = pdev->devfn;
}
- sub_bus = read_pci_config_byte(
+ sub_bus = pci_conf_read8(
pdev->bus, PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
@@ -1380,12 +1370,16 @@ static int domain_context_unmap_one(
struct iommu *iommu,
u8 bus, u8 devfn)
{
- struct context_entry *context;
+ struct context_entry *context, *context_entries;
unsigned long flags;
+ u64 maddr;
- context = device_to_context_entry(iommu, bus, devfn);
+ maddr = bus_to_context_maddr(iommu, bus);
+ context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
+ context = &context_entries[devfn];
if ( !context )
{
+ unmap_vtd_domain_page(context_entries);
gdprintk(XENLOG_ERR VTDPREFIX,
"domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1394,6 +1388,7 @@ static int domain_context_unmap_one(
if ( !context_present(*context) )
{
+ unmap_vtd_domain_page(context_entries);
gdprintk(XENLOG_WARNING VTDPREFIX,
"domain_context_unmap_one-%x:%x:%x- "
"context NOT present:return\n",
@@ -1411,6 +1406,7 @@ static int domain_context_unmap_one(
iommu_flush_cache_entry(iommu, context);
iommu_flush_context_global(iommu, 0);
iommu_flush_iotlb_global(iommu, 0);
+ unmap_vtd_domain_page(context_entries);
spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
@@ -1429,10 +1425,10 @@ static int domain_context_unmap(
switch ( type )
{
case DEV_TYPE_PCI_BRIDGE:
- sec_bus = read_pci_config_byte(
+ sec_bus = pci_conf_read8(
pdev->bus, PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn), PCI_SECONDARY_BUS);
- sub_bus = read_pci_config_byte(
+ sub_bus = pci_conf_read8(
pdev->bus, PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
@@ -1566,36 +1562,7 @@ void iommu_domain_teardown(struct domain *d)
return;
iommu_domid_release(d);
-
-#if CONFIG_PAGING_LEVELS == 3
- {
- struct hvm_iommu *hd = domain_hvm_iommu(d);
- int level = agaw_to_level(hd->agaw);
- struct dma_pte *pgd = NULL;
-
- switch ( level )
- {
- case VTD_PAGE_TABLE_LEVEL_3:
- if ( hd->pgd )
- free_xenheap_page((void *)hd->pgd);
- break;
- case VTD_PAGE_TABLE_LEVEL_4:
- if ( hd->pgd )
- {
- pgd = hd->pgd;
- if ( pgd[0].val != 0 )
- free_xenheap_page((void*)maddr_to_virt(
- dma_pte_addr(pgd[0])));
- free_xenheap_page((void *)hd->pgd);
- }
- break;
- default:
- gdprintk(XENLOG_ERR VTDPREFIX,
- "Unsupported p2m table sharing level!\n");
- break;
- }
- }
-#endif
+ iommu_free_pgd(d);
return_devices_to_dom0(d);
}
@@ -1621,8 +1588,8 @@ int intel_iommu_map_page(
{
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
- struct dma_pte *pte = NULL;
- struct page_info *pg = NULL;
+ struct dma_pte *page = NULL, *pte = NULL;
+ u64 pg_maddr;
drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
iommu = drhd->iommu;
@@ -1633,15 +1600,15 @@ int intel_iommu_map_page(
return 0;
#endif
- pg = addr_to_dma_page(d, (paddr_t)gfn << PAGE_SHIFT_4K);
- if ( !pg )
+ pg_maddr = addr_to_dma_page_maddr(d, gfn << PAGE_SHIFT_4K);
+ if ( pg_maddr == 0 )
return -ENOMEM;
- pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
- pte += gfn & LEVEL_MASK;
+ page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
+ pte = page + (gfn & LEVEL_MASK);
dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
iommu_flush_cache_entry(iommu, pte);
- unmap_domain_page(pte);
+ unmap_vtd_domain_page(page);
for_each_drhd_unit ( drhd )
{
@@ -1681,9 +1648,9 @@ int iommu_page_mapping(struct domain *domain, paddr_t iova,
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
unsigned long start_pfn, end_pfn;
- struct dma_pte *pte = NULL;
+ struct dma_pte *page = NULL, *pte = NULL;
int index;
- struct page_info *pg = NULL;
+ u64 pg_maddr;
drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
iommu = drhd->iommu;
@@ -1696,15 +1663,15 @@ int iommu_page_mapping(struct domain *domain, paddr_t iova,
index = 0;
while ( start_pfn < end_pfn )
{
- pg = addr_to_dma_page(domain, iova + PAGE_SIZE_4K * index);
- if ( !pg )
+ pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K * index);
+ if ( pg_maddr == 0 )
return -ENOMEM;
- pte = (struct dma_pte *)map_domain_page(page_to_mfn(pg));
- pte += start_pfn & LEVEL_MASK;
+ page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
+ pte = page + (start_pfn & LEVEL_MASK);
dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
dma_set_pte_prot(*pte, prot);
iommu_flush_cache_entry(iommu, pte);
- unmap_domain_page(pte);
+ unmap_vtd_domain_page(page);
start_pfn++;
index++;
}
@@ -1814,7 +1781,7 @@ void __init setup_dom0_devices(void)
{
for ( func = 0; func < 8; func++ )
{
- l = read_pci_config(bus, dev, func, PCI_VENDOR_ID);
+ l = pci_conf_read32(bus, dev, func, PCI_VENDOR_ID);
/* some broken boards return 0 or ~0 if a slot is empty: */
if ( (l == 0xffffffff) || (l == 0x00000000) ||
(l == 0x0000ffff) || (l == 0xffff0000) )
@@ -1897,7 +1864,11 @@ static int init_vtd2_hw(void)
if ( qinval_setup(iommu) != 0 )
dprintk(XENLOG_ERR VTDPREFIX,
"Queued Invalidation hardware not found\n");
+ }
+ for_each_drhd_unit ( drhd )
+ {
+ iommu = drhd->iommu;
if ( intremap_setup(iommu) != 0 )
dprintk(XENLOG_ERR VTDPREFIX,
"Interrupt Remapping hardware not found\n");
@@ -1947,7 +1918,7 @@ int iommu_setup(void)
INIT_LIST_HEAD(&hd->pdev_list);
/* setup clflush size */
- x86_clflush_size = ((cpuid_ebx(1) >> 8) & 0xff) * 8;
+ clflush_size = get_clflush_size();
/* Allocate IO page directory page for the domain. */
drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
@@ -2038,129 +2009,6 @@ int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
return ret;
}
-void iommu_set_pgd(struct domain *d)
-{
- struct hvm_iommu *hd = domain_hvm_iommu(d);
- unsigned long p2m_table;
-
- if ( hd->pgd )
- {
- gdprintk(XENLOG_INFO VTDPREFIX,
- "iommu_set_pgd_1: hd->pgd = %p\n", hd->pgd);
- hd->pgd = NULL;
- }
- p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
-
-#if CONFIG_PAGING_LEVELS == 3
- if ( !hd->pgd )
- {
- int level = agaw_to_level(hd->agaw);
- struct dma_pte *pmd = NULL;
- struct dma_pte *pgd = NULL;
- struct dma_pte *pte = NULL;
- l3_pgentry_t *l3e;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&hd->mapping_lock, flags);
- if ( !hd->pgd )
- {
- pgd = (struct dma_pte *)alloc_xenheap_page();
- if ( !pgd )
- {
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
- gdprintk(XENLOG_ERR VTDPREFIX,
- "Allocate pgd memory failed!\n");
- return;
- }
- memset(pgd, 0, PAGE_SIZE);
- hd->pgd = pgd;
- }
-
- l3e = map_domain_page(p2m_table);
- switch ( level )
- {
- case VTD_PAGE_TABLE_LEVEL_3: /* Weybridge */
- /* We only support 8 entries for the PAE L3 p2m table */
- for ( i = 0; i < 8 ; i++ )
- {
- /* Don't create new L2 entry, use ones from p2m table */
- pgd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
- }
- break;
-
- case VTD_PAGE_TABLE_LEVEL_4: /* Stoakley */
- /* We allocate one more page for the top vtd page table. */
- pmd = (struct dma_pte *)alloc_xenheap_page();
- if ( !pmd )
- {
- unmap_domain_page(l3e);
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
- gdprintk(XENLOG_ERR VTDPREFIX,
- "Allocate pmd memory failed!\n");
- return;
- }
- memset((u8*)pmd, 0, PAGE_SIZE);
- pte = &pgd[0];
- dma_set_pte_addr(*pte, virt_to_maddr(pmd));
- dma_set_pte_readable(*pte);
- dma_set_pte_writable(*pte);
-
- for ( i = 0; i < 8; i++ )
- {
- /* Don't create new L2 entry, use ones from p2m table */
- pmd[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
- }
- break;
- default:
- gdprintk(XENLOG_ERR VTDPREFIX,
- "iommu_set_pgd:Unsupported p2m table sharing level!\n");
- break;
- }
- unmap_domain_page(l3e);
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
- }
-#elif CONFIG_PAGING_LEVELS == 4
- if ( !hd->pgd )
- {
- int level = agaw_to_level(hd->agaw);
- l3_pgentry_t *l3e;
- mfn_t pgd_mfn;
-
- switch ( level )
- {
- case VTD_PAGE_TABLE_LEVEL_3:
- l3e = map_domain_page(p2m_table);
- if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
- {
- gdprintk(XENLOG_ERR VTDPREFIX,
- "iommu_set_pgd: second level wasn't there\n");
- unmap_domain_page(l3e);
- return;
- }
- pgd_mfn = _mfn(l3e_get_pfn(*l3e));
- unmap_domain_page(l3e);
- hd->pgd = maddr_to_virt(pagetable_get_paddr(
- pagetable_from_mfn(pgd_mfn)));
- break;
-
- case VTD_PAGE_TABLE_LEVEL_4:
- pgd_mfn = _mfn(p2m_table);
- hd->pgd = maddr_to_virt(pagetable_get_paddr(
- pagetable_from_mfn(pgd_mfn)));
- break;
- default:
- gdprintk(XENLOG_ERR VTDPREFIX,
- "iommu_set_pgd:Unsupported p2m table sharing level!\n");
- break;
- }
- }
-#endif
- gdprintk(XENLOG_INFO VTDPREFIX,
- "iommu_set_pgd: hd->pgd = %p\n", hd->pgd);
-}
-
-
u8 iommu_state[MAX_IOMMU_REGS * MAX_IOMMUS];
int iommu_suspend(void)
{
diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h
index 82593b455a..56bc550a4f 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -425,7 +425,7 @@ extern struct list_head acpi_rmrr_units;
extern struct list_head acpi_ioapic_units;
struct qi_ctrl {
- struct qinval_entry *qinval; /* queue invalidation page */
+ u64 qinval_maddr; /* queue invalidation page machine address */
int qinval_index; /* queue invalidation index */
spinlock_t qinval_lock; /* lock for queue invalidation page */
spinlock_t qinval_poll_lock; /* lock for queue invalidation poll addr */
@@ -433,7 +433,7 @@ struct qi_ctrl {
};
struct ir_ctrl {
- struct iremap_entry *iremap; /* interrupt remap table */
+ u64 iremap_maddr; /* interrupt remap table machine address */
int iremap_index; /* interrupt remap index */
spinlock_t iremap_lock; /* lock for irq remappping table */
};
diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c
index e7aa6cea83..21025986ff 100644
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -21,10 +21,11 @@
#include <xen/sched.h>
#include <xen/iommu.h>
+#include <xen/time.h>
+#include <xen/pci.h>
#include "iommu.h"
#include "dmar.h"
#include "vtd.h"
-#include "../pci-direct.h"
#include "../pci_regs.h"
#include "msi.h"
#include "extern.h"
@@ -63,13 +64,14 @@ static int qinval_update_qtail(struct iommu *iommu, int index)
static int gen_cc_inv_dsc(struct iommu *iommu, int index,
u16 did, u16 source_id, u8 function_mask, u8 granu)
{
- u64 *ptr64;
unsigned long flags;
- struct qinval_entry * qinval_entry = NULL;
+ struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
- qinval_entry = &qi_ctrl->qinval[index];
+ qinval_entries =
+ (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+ qinval_entry = &qinval_entries[index];
qinval_entry->q.cc_inv_dsc.lo.type = TYPE_INVAL_CONTEXT;
qinval_entry->q.cc_inv_dsc.lo.granu = granu;
qinval_entry->q.cc_inv_dsc.lo.res_1 = 0;
@@ -78,9 +80,10 @@ static int gen_cc_inv_dsc(struct iommu *iommu, int index,
qinval_entry->q.cc_inv_dsc.lo.fm = function_mask;
qinval_entry->q.cc_inv_dsc.lo.res_2 = 0;
qinval_entry->q.cc_inv_dsc.hi.res = 0;
+
+ unmap_vtd_domain_page(qinval_entries);
spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
- ptr64 = (u64 *)qinval_entry;
return 0;
}
@@ -93,7 +96,7 @@ int queue_invalidate_context(struct iommu *iommu,
spin_lock_irqsave(&iommu->register_lock, flags);
index = qinval_next_index(iommu);
- if (index == -1)
+ if ( index == -1 )
return -EBUSY;
ret = gen_cc_inv_dsc(iommu, index, did, source_id,
function_mask, granu);
@@ -106,14 +109,16 @@ static int gen_iotlb_inv_dsc(struct iommu *iommu, int index,
u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
{
unsigned long flags;
- struct qinval_entry * qinval_entry = NULL;
+ struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
if ( index == -1 )
return -1;
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
- qinval_entry = &qi_ctrl->qinval[index];
+ qinval_entries =
+ (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+ qinval_entry = &qinval_entries[index];
qinval_entry->q.iotlb_inv_dsc.lo.type = TYPE_INVAL_IOTLB;
qinval_entry->q.iotlb_inv_dsc.lo.granu = granu;
qinval_entry->q.iotlb_inv_dsc.lo.dr = 0;
@@ -127,6 +132,7 @@ static int gen_iotlb_inv_dsc(struct iommu *iommu, int index,
qinval_entry->q.iotlb_inv_dsc.hi.res_1 = 0;
qinval_entry->q.iotlb_inv_dsc.hi.addr = addr;
+ unmap_vtd_domain_page(qinval_entries);
spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
return 0;
}
@@ -151,15 +157,16 @@ int queue_invalidate_iotlb(struct iommu *iommu,
static int gen_wait_dsc(struct iommu *iommu, int index,
u8 iflag, u8 sw, u8 fn, u32 sdata, volatile u32 *saddr)
{
- u64 *ptr64;
unsigned long flags;
- struct qinval_entry * qinval_entry = NULL;
+ struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
if ( index == -1 )
return -1;
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
- qinval_entry = &qi_ctrl->qinval[index];
+ qinval_entries =
+ (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+ qinval_entry = &qinval_entries[index];
qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
qinval_entry->q.inv_wait_dsc.lo.sw = sw;
@@ -168,8 +175,8 @@ static int gen_wait_dsc(struct iommu *iommu, int index,
qinval_entry->q.inv_wait_dsc.lo.sdata = sdata;
qinval_entry->q.inv_wait_dsc.hi.res_1 = 0;
qinval_entry->q.inv_wait_dsc.hi.saddr = virt_to_maddr(saddr) >> 2;
+ unmap_vtd_domain_page(qinval_entries);
spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
- ptr64 = (u64 *)qinval_entry;
return 0;
}
@@ -177,7 +184,7 @@ static int queue_invalidate_wait(struct iommu *iommu,
u8 iflag, u8 sw, u8 fn, u32 sdata, volatile u32 *saddr)
{
unsigned long flags;
- unsigned long start_time;
+ s_time_t start_time;
int index = -1;
int ret = -1;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
@@ -185,7 +192,7 @@ static int queue_invalidate_wait(struct iommu *iommu,
spin_lock_irqsave(&qi_ctrl->qinval_poll_lock, flags);
spin_lock_irqsave(&iommu->register_lock, flags);
index = qinval_next_index(iommu);
- if (*saddr == 1)
+ if ( *saddr == 1 )
*saddr = 0;
ret = gen_wait_dsc(iommu, index, iflag, sw, fn, sdata, saddr);
ret |= qinval_update_qtail(iommu, index);
@@ -195,9 +202,11 @@ static int queue_invalidate_wait(struct iommu *iommu,
if ( sw )
{
/* In case all wait descriptor writes to same addr with same data */
- start_time = jiffies;
- while ( *saddr != 1 ) {
- if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT)) {
+ start_time = NOW();
+ while ( *saddr != 1 )
+ {
+ if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
+ {
print_qi_regs(iommu);
panic("queue invalidate wait descriptor was not executed\n");
}
@@ -213,7 +222,7 @@ int invalidate_sync(struct iommu *iommu)
int ret = -1;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
- if (qi_ctrl->qinval)
+ if ( qi_ctrl->qinval_maddr == 0 )
{
ret = queue_invalidate_wait(iommu,
0, 1, 1, 1, &qi_ctrl->qinval_poll_status);
@@ -226,14 +235,16 @@ static int gen_dev_iotlb_inv_dsc(struct iommu *iommu, int index,
u32 max_invs_pend, u16 sid, u16 size, u64 addr)
{
unsigned long flags;
- struct qinval_entry * qinval_entry = NULL;
+ struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
if ( index == -1 )
return -1;
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
- qinval_entry = &qi_ctrl->qinval[index];
+ qinval_entries =
+ (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+ qinval_entry = &qinval_entries[index];
qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
@@ -244,6 +255,7 @@ static int gen_dev_iotlb_inv_dsc(struct iommu *iommu, int index,
qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
qinval_entry->q.dev_iotlb_inv_dsc.hi.addr = addr;
+ unmap_vtd_domain_page(qinval_entries);
spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
return 0;
}
@@ -268,14 +280,16 @@ static int gen_iec_inv_dsc(struct iommu *iommu, int index,
u8 granu, u8 im, u16 iidx)
{
unsigned long flags;
- struct qinval_entry * qinval_entry = NULL;
+ struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
if ( index == -1 )
return -1;
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
- qinval_entry = &qi_ctrl->qinval[index];
+ qinval_entries =
+ (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+ qinval_entry = &qinval_entries[index];
qinval_entry->q.iec_inv_dsc.lo.type = TYPE_INVAL_IEC;
qinval_entry->q.iec_inv_dsc.lo.granu = granu;
qinval_entry->q.iec_inv_dsc.lo.res_1 = 0;
@@ -284,6 +298,7 @@ static int gen_iec_inv_dsc(struct iommu *iommu, int index,
qinval_entry->q.iec_inv_dsc.lo.res_2 = 0;
qinval_entry->q.iec_inv_dsc.hi.res = 0;
+ unmap_vtd_domain_page(qinval_entries);
spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
return 0;
}
@@ -349,7 +364,7 @@ static int flush_context_qi(
did = 0;
}
- if (qi_ctrl->qinval)
+ if ( qi_ctrl->qinval_maddr != 0 )
{
ret = queue_invalidate_context(iommu, did, sid, fm,
type >> DMA_CCMD_INVL_GRANU_OFFSET);
@@ -382,7 +397,8 @@ static int flush_iotlb_qi(
did = 0;
}
- if (qi_ctrl->qinval) {
+ if ( qi_ctrl->qinval_maddr != 0 )
+ {
/* use queued invalidation */
if (cap_write_drain(iommu->cap))
dw = 1;
@@ -399,8 +415,7 @@ static int flush_iotlb_qi(
int qinval_setup(struct iommu *iommu)
{
- unsigned long start_time;
- u64 paddr;
+ s_time_t start_time;
u32 status = 0;
struct qi_ctrl *qi_ctrl;
struct iommu_flush *flush;
@@ -411,15 +426,14 @@ int qinval_setup(struct iommu *iommu)
if ( !ecap_queued_inval(iommu->ecap) )
return -ENODEV;
- if (qi_ctrl->qinval == NULL) {
- qi_ctrl->qinval = alloc_xenheap_page();
- if (qi_ctrl->qinval == NULL)
- panic("Cannot allocate memory for qi_ctrl->qinval\n");
- memset((u8*)qi_ctrl->qinval, 0, PAGE_SIZE_4K);
+ if ( qi_ctrl->qinval_maddr == 0 )
+ {
+ qi_ctrl->qinval_maddr = alloc_pgtable_maddr();
+ if ( qi_ctrl->qinval_maddr == 0 )
+ panic("Cannot allocate memory for qi_ctrl->qinval_maddr\n");
flush->context = flush_context_qi;
flush->iotlb = flush_iotlb_qi;
}
- paddr = virt_to_maddr(qi_ctrl->qinval);
/* Setup Invalidation Queue Address(IQA) register with the
* address of the page we just allocated. QS field at
@@ -428,19 +442,20 @@ int qinval_setup(struct iommu *iommu)
* registers are automatically reset to 0 with write
* to IQA register.
*/
- dmar_writeq(iommu->reg, DMAR_IQA_REG, paddr);
+ dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
/* enable queued invalidation hardware */
iommu->gcmd |= DMA_GCMD_QIE;
dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
/* Make sure hardware complete it */
- start_time = jiffies;
- while (1) {
+ start_time = NOW();
+ for ( ; ; )
+ {
status = dmar_readl(iommu->reg, DMAR_GSTS_REG);
- if (status & DMA_GSTS_QIES)
+ if ( status & DMA_GSTS_QIES )
break;
- if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
+ if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
panic("Cannot set QIE field for queue invalidation\n");
cpu_relax();
}
diff --git a/xen/drivers/passthrough/vtd/utils.c b/xen/drivers/passthrough/vtd/utils.c
index 59de4a02e8..3c33a2c300 100644
--- a/xen/drivers/passthrough/vtd/utils.c
+++ b/xen/drivers/passthrough/vtd/utils.c
@@ -20,11 +20,13 @@
#include <xen/sched.h>
#include <xen/delay.h>
#include <xen/iommu.h>
+#include <xen/time.h>
+#include <xen/pci.h>
#include "iommu.h"
#include "dmar.h"
-#include "../pci-direct.h"
#include "../pci_regs.h"
#include "msi.h"
+#include "vtd.h"
#define INTEL 0x8086
#define SEABURG 0x4000
@@ -35,7 +37,7 @@ int is_usb_device(struct pci_dev *pdev)
u8 bus = pdev->bus;
u8 dev = PCI_SLOT(pdev->devfn);
u8 func = PCI_FUNC(pdev->devfn);
- u16 class = read_pci_config_16(bus, dev, func, PCI_CLASS_DEVICE);
+ u16 class = pci_conf_read16(bus, dev, func, PCI_CLASS_DEVICE);
return (class == 0xc03);
}
@@ -44,9 +46,9 @@ int vtd_hw_check(void)
u16 vendor, device;
u8 revision, stepping;
- vendor = read_pci_config_16(0, 0, 0, PCI_VENDOR_ID);
- device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
- revision = read_pci_config_byte(0, 0, 0, PCI_REVISION_ID);
+ vendor = pci_conf_read16(0, 0, 0, PCI_VENDOR_ID);
+ device = pci_conf_read16(0, 0, 0, PCI_DEVICE_ID);
+ revision = pci_conf_read8(0, 0, 0, PCI_REVISION_ID);
stepping = revision & 0xf;
if ( (vendor == INTEL) && (device == SEABURG) )
@@ -68,7 +70,7 @@ int vtd_hw_check(void)
/* Disable vt-d protected memory registers. */
void disable_pmr(struct iommu *iommu)
{
- unsigned long start_time;
+ s_time_t start_time;
unsigned int val;
val = dmar_readl(iommu->reg, DMAR_PMEN_REG);
@@ -76,7 +78,7 @@ void disable_pmr(struct iommu *iommu)
return;
dmar_writel(iommu->reg, DMAR_PMEN_REG, val & ~DMA_PMEN_EPM);
- start_time = jiffies;
+ start_time = NOW();
for ( ; ; )
{
@@ -84,7 +86,7 @@ void disable_pmr(struct iommu *iommu)
if ( (val & DMA_PMEN_PRS) == 0 )
break;
- if ( time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT) )
+ if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
panic("Disable PMRs timeout\n");
cpu_relax();
@@ -101,18 +103,18 @@ static u8 find_cap_offset(u8 bus, u8 dev, u8 func, u8 cap)
u8 pos = PCI_CAPABILITY_LIST;
u16 status;
- status = read_pci_config_16(bus, dev, func, PCI_STATUS);
+ status = pci_conf_read16(bus, dev, func, PCI_STATUS);
if ( (status & PCI_STATUS_CAP_LIST) == 0 )
return 0;
while ( max_cap-- )
{
- pos = read_pci_config_byte(bus, dev, func, pos);
+ pos = pci_conf_read8(bus, dev, func, pos);
if ( pos < 0x40 )
break;
pos &= ~3;
- id = read_pci_config_byte(bus, dev, func, pos + PCI_CAP_LIST_ID);
+ id = pci_conf_read8(bus, dev, func, pos + PCI_CAP_LIST_ID);
if ( id == 0xff )
break;
@@ -141,13 +143,13 @@ void pdev_flr(u8 bus, u8 devfn)
pos = find_cap_offset(bus, dev, func, PCI_CAP_ID_EXP);
if ( pos != 0 )
{
- dev_cap = read_pci_config(bus, dev, func, pos + PCI_EXP_DEVCAP);
+ dev_cap = pci_conf_read32(bus, dev, func, pos + PCI_EXP_DEVCAP);
if ( dev_cap & PCI_EXP_DEVCAP_FLR )
{
- write_pci_config(bus, dev, func,
+ pci_conf_write32(bus, dev, func,
pos + PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_FLR);
do {
- dev_status = read_pci_config(bus, dev, func,
+ dev_status = pci_conf_read32(bus, dev, func,
pos + PCI_EXP_DEVSTA);
} while ( dev_status & PCI_EXP_DEVSTA_TRPND );
@@ -167,23 +169,23 @@ void pdev_flr(u8 bus, u8 devfn)
int i;
u32 config[PCI_CONFIG_DWORD_SIZE];
for ( i = 0; i < PCI_CONFIG_DWORD_SIZE; i++ )
- config[i] = read_pci_config(bus, dev, func, i*4);
+ config[i] = pci_conf_read32(bus, dev, func, i*4);
/* Enter D3hot without soft reset */
- pm_ctl = read_pci_config(bus, dev, func, pos + PCI_PM_CTRL);
+ pm_ctl = pci_conf_read32(bus, dev, func, pos + PCI_PM_CTRL);
pm_ctl |= PCI_PM_CTRL_NO_SOFT_RESET;
pm_ctl &= ~PCI_PM_CTRL_STATE_MASK;
pm_ctl |= PCI_D3hot;
- write_pci_config(bus, dev, func, pos + PCI_PM_CTRL, pm_ctl);
+ pci_conf_write32(bus, dev, func, pos + PCI_PM_CTRL, pm_ctl);
mdelay(10);
/* From D3hot to D0 */
- write_pci_config(bus, dev, func, pos + PCI_PM_CTRL, 0);
+ pci_conf_write32(bus, dev, func, pos + PCI_PM_CTRL, 0);
mdelay(10);
/* Write saved configurations to device */
for ( i = 0; i < PCI_CONFIG_DWORD_SIZE; i++ )
- write_pci_config(bus, dev, func, i*4, config[i]);
+ pci_conf_write32(bus, dev, func, i*4, config[i]);
flr = 1;
}
@@ -243,7 +245,7 @@ u32 get_level_index(unsigned long gmfn, int level)
}
void print_vtd_entries(
- struct domain *d,
+ struct domain *d,
struct iommu *iommu,
int bus, int devfn,
unsigned long gmfn)
@@ -261,37 +263,40 @@ void print_vtd_entries(
printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x gmfn = %lx\n",
d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), gmfn);
- if ( hd->pgd == NULL )
+ if ( hd->pgd_maddr == 0 )
{
- printk(" hg->pgd == NULL\n");
+ printk(" hd->pgd_maddr == 0\n");
return;
}
- printk(" d->pgd = %p virt_to_maddr(hd->pgd) = %lx\n",
- hd->pgd, virt_to_maddr(hd->pgd));
+ printk(" hd->pgd_maddr = %"PRIx64"\n", hd->pgd_maddr);
for_each_drhd_unit ( drhd )
{
printk("---- print_vtd_entries %d ----\n", i++);
- root_entry = iommu->root_entry;
- if ( root_entry == NULL )
+ if ( iommu->root_maddr == 0 )
{
- printk(" root_entry == NULL\n");
+ printk(" iommu->root_maddr = 0\n");
continue;
}
+ root_entry =
+ (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
+
printk(" root_entry = %p\n", root_entry);
printk(" root_entry[%x] = %"PRIx64"\n", bus, root_entry[bus].val);
if ( !root_present(root_entry[bus]) )
{
+ unmap_vtd_domain_page(root_entry);
printk(" root_entry[%x] not present\n", bus);
continue;
}
ctxt_entry =
- maddr_to_virt((root_entry[bus].val >> PAGE_SHIFT) << PAGE_SHIFT);
+ (struct context_entry *)map_vtd_domain_page(root_entry[bus].val);
if ( ctxt_entry == NULL )
{
+ unmap_vtd_domain_page(root_entry);
printk(" ctxt_entry == NULL\n");
continue;
}
@@ -301,6 +306,8 @@ void print_vtd_entries(
devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
if ( !context_present(ctxt_entry[devfn]) )
{
+ unmap_vtd_domain_page(ctxt_entry);
+ unmap_vtd_domain_page(root_entry);
printk(" ctxt_entry[%x] not present\n", devfn);
continue;
}
@@ -308,6 +315,8 @@ void print_vtd_entries(
if ( level != VTD_PAGE_TABLE_LEVEL_3 &&
level != VTD_PAGE_TABLE_LEVEL_4)
{
+ unmap_vtd_domain_page(ctxt_entry);
+ unmap_vtd_domain_page(root_entry);
printk("Unsupported VTD page table level (%d)!\n", level);
continue;
}
@@ -319,6 +328,8 @@ void print_vtd_entries(
printk(" l%d = %p\n", level, l);
if ( l == NULL )
{
+ unmap_vtd_domain_page(ctxt_entry);
+ unmap_vtd_domain_page(root_entry);
printk(" l%d == NULL\n", level);
break;
}
@@ -329,6 +340,8 @@ void print_vtd_entries(
pte.val = l[l_index];
if ( !dma_pte_present(pte) )
{
+ unmap_vtd_domain_page(ctxt_entry);
+ unmap_vtd_domain_page(root_entry);
printk(" l%d[%x] not present\n", level, l_index);
break;
}
diff --git a/xen/drivers/passthrough/vtd/vtd.h b/xen/drivers/passthrough/vtd/vtd.h
index 38805c38ea..8e3fbb3bef 100644
--- a/xen/drivers/passthrough/vtd/vtd.h
+++ b/xen/drivers/passthrough/vtd/vtd.h
@@ -42,4 +42,13 @@ struct IO_APIC_route_remap_entry {
};
};
+unsigned int get_clflush_size(void);
+u64 alloc_pgtable_maddr(void);
+void free_pgtable_maddr(u64 maddr);
+void *map_vtd_domain_page(u64 maddr);
+void unmap_vtd_domain_page(void *va);
+
+void iommu_flush_cache_entry(struct iommu *iommu, void *addr);
+void iommu_flush_cache_page(struct iommu *iommu, void *addr);
+
#endif // _VTD_H_
diff --git a/xen/drivers/passthrough/vtd/x86/Makefile b/xen/drivers/passthrough/vtd/x86/Makefile
new file mode 100644
index 0000000000..85243e3aa7
--- /dev/null
+++ b/xen/drivers/passthrough/vtd/x86/Makefile
@@ -0,0 +1 @@
+obj-y += vtd.o
diff --git a/xen/drivers/passthrough/vtd/x86/vtd.c b/xen/drivers/passthrough/vtd/x86/vtd.c
new file mode 100644
index 0000000000..22f2e23f8f
--- /dev/null
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c
@@ -0,0 +1,303 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Allen Kay <allen.m.kay@intel.com>
+ * Copyright (C) Weidong Han <weidong.han@intel.com>
+ */
+
+#include <xen/sched.h>
+#include <xen/domain_page.h>
+#include <asm/paging.h>
+#include <xen/iommu.h>
+#include "../iommu.h"
+#include "../dmar.h"
+#include "../vtd.h"
+
+void *map_vtd_domain_page(u64 maddr)
+{
+ return map_domain_page(maddr >> PAGE_SHIFT_4K);
+}
+
+void unmap_vtd_domain_page(void *va)
+{
+ unmap_domain_page(va);
+}
+
+/* Allocate page table, return its machine address */
+u64 alloc_pgtable_maddr(void)
+{
+ struct page_info *pg;
+ u64 *vaddr;
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;
+
+ pg = alloc_domheap_page(NULL, 0);
+ vaddr = map_domain_page(page_to_mfn(pg));
+ if ( !vaddr )
+ return 0;
+ memset(vaddr, 0, PAGE_SIZE);
+
+ drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
+ iommu = drhd->iommu;
+ iommu_flush_cache_page(iommu, vaddr);
+ unmap_domain_page(vaddr);
+
+ return page_to_maddr(pg);
+}
+
+void free_pgtable_maddr(u64 maddr)
+{
+ if ( maddr != 0 )
+ free_domheap_page(maddr_to_page(maddr));
+}
+
+unsigned int get_clflush_size(void)
+{
+ return ((cpuid_ebx(1) >> 8) & 0xff) * 8;
+}
+
+struct hvm_irq_dpci *domain_get_irq_dpci(struct domain *domain)
+{
+ if ( !domain )
+ return NULL;
+
+ return domain->arch.hvm_domain.irq.dpci;
+}
+
+int domain_set_irq_dpci(struct domain *domain, struct hvm_irq_dpci *dpci)
+{
+ if ( !domain || !dpci )
+ return 0;
+
+ domain->arch.hvm_domain.irq.dpci = dpci;
+ return 1;
+}
+
+void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq)
+{
+ struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+ struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d);
+ struct dev_intx_gsi_link *digl, *tmp;
+ int i;
+
+ ASSERT(isairq < NR_ISAIRQS);
+ if ( !vtd_enabled || !dpci ||
+ !test_bit(isairq, dpci->isairq_map) )
+ return;
+
+ /* Multiple mirq may be mapped to one isa irq */
+ for ( i = 0; i < NR_IRQS; i++ )
+ {
+ if ( !dpci->mirq[i].valid )
+ continue;
+
+ list_for_each_entry_safe ( digl, tmp,
+ &dpci->mirq[i].digl_list, list )
+ {
+ if ( hvm_irq->pci_link.route[digl->link] == isairq )
+ {
+ hvm_pci_intx_deassert(d, digl->device, digl->intx);
+ spin_lock(&dpci->dirq_lock);
+ if ( --dpci->mirq[i].pending == 0 )
+ {
+ spin_unlock(&dpci->dirq_lock);
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "hvm_dpci_isairq_eoi:: mirq = %x\n", i);
+ stop_timer(&dpci->hvm_timer[irq_to_vector(i)]);
+ pirq_guest_eoi(d, i);
+ }
+ else
+ spin_unlock(&dpci->dirq_lock);
+ }
+ }
+ }
+}
+
+void iommu_set_pgd(struct domain *d)
+{
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ unsigned long p2m_table;
+ int level = agaw_to_level(hd->agaw);
+ l3_pgentry_t *l3e;
+
+ p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
+
+ if ( paging_mode_hap(d) )
+ {
+ int level = agaw_to_level(hd->agaw);
+ struct dma_pte *dpte = NULL;
+ mfn_t pgd_mfn;
+
+ switch ( level )
+ {
+ case VTD_PAGE_TABLE_LEVEL_3:
+ dpte = map_domain_page(p2m_table);
+ if ( !dma_pte_present(*dpte) )
+ {
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "iommu_set_pgd: second level wasn't there\n");
+ unmap_domain_page(dpte);
+ return;
+ }
+ pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
+ hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+ unmap_domain_page(dpte);
+ break;
+ case VTD_PAGE_TABLE_LEVEL_4:
+ pgd_mfn = _mfn(p2m_table);
+ hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+ break;
+ default:
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "iommu_set_pgd:Unsupported p2m table sharing level!\n");
+ break;
+ }
+ }
+ else
+ {
+#if CONFIG_PAGING_LEVELS == 3
+ struct dma_pte *pte = NULL, *pgd_vaddr = NULL, *pmd_vaddr = NULL;
+ int i;
+ u64 pmd_maddr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&hd->mapping_lock, flags);
+ hd->pgd_maddr = alloc_pgtable_maddr();
+ if ( hd->pgd_maddr == 0 )
+ {
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "Allocate pgd memory failed!\n");
+ return;
+ }
+
+ pgd_vaddr = map_vtd_domain_page(hd->pgd_maddr);
+ l3e = map_domain_page(p2m_table);
+ switch ( level )
+ {
+ case VTD_PAGE_TABLE_LEVEL_3: /* Weybridge */
+ /* We only support 8 entries for the PAE L3 p2m table */
+ for ( i = 0; i < 8 ; i++ )
+ {
+ /* Don't create new L2 entry, use ones from p2m table */
+ pgd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
+ }
+ break;
+
+ case VTD_PAGE_TABLE_LEVEL_4: /* Stoakley */
+ /* We allocate one more page for the top vtd page table. */
+ pmd_maddr = alloc_pgtable_maddr();
+ if ( pmd_maddr == 0 )
+ {
+ unmap_vtd_domain_page(pgd_vaddr);
+ unmap_domain_page(l3e);
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "Allocate pmd memory failed!\n");
+ return;
+ }
+
+ pte = &pgd_vaddr[0];
+ dma_set_pte_addr(*pte, pmd_maddr);
+ dma_set_pte_readable(*pte);
+ dma_set_pte_writable(*pte);
+
+ pmd_vaddr = map_vtd_domain_page(pmd_maddr);
+ for ( i = 0; i < 8; i++ )
+ {
+ /* Don't create new L2 entry, use ones from p2m table */
+ pmd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
+ }
+
+ unmap_vtd_domain_page(pmd_vaddr);
+ break;
+ default:
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "iommu_set_pgd:Unsupported p2m table sharing level!\n");
+ break;
+ }
+
+ unmap_vtd_domain_page(pgd_vaddr);
+ unmap_domain_page(l3e);
+ spin_unlock_irqrestore(&hd->mapping_lock, flags);
+
+#elif CONFIG_PAGING_LEVELS == 4
+ mfn_t pgd_mfn;
+
+ switch ( level )
+ {
+ case VTD_PAGE_TABLE_LEVEL_3:
+ l3e = map_domain_page(p2m_table);
+ if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
+ {
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "iommu_set_pgd: second level wasn't there\n");
+ unmap_domain_page(l3e);
+ return;
+ }
+
+ pgd_mfn = _mfn(l3e_get_pfn(*l3e));
+ hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+ unmap_domain_page(l3e);
+ break;
+ case VTD_PAGE_TABLE_LEVEL_4:
+ pgd_mfn = _mfn(p2m_table);
+ hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
+ break;
+ default:
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "iommu_set_pgd:Unsupported p2m table sharing level!\n");
+ break;
+ }
+#endif
+ }
+}
+
+void iommu_free_pgd(struct domain *d)
+{
+#if CONFIG_PAGING_LEVELS == 3
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ int level = agaw_to_level(hd->agaw);
+ struct dma_pte *pgd_vaddr = NULL;
+
+ switch ( level )
+ {
+ case VTD_PAGE_TABLE_LEVEL_3:
+ if ( hd->pgd_maddr != 0 )
+ {
+ free_pgtable_maddr(hd->pgd_maddr);
+ hd->pgd_maddr = 0;
+ }
+ break;
+ case VTD_PAGE_TABLE_LEVEL_4:
+ if ( hd->pgd_maddr != 0 )
+ {
+ pgd_vaddr = (struct dma_pte*)map_vtd_domain_page(hd->pgd_maddr);
+ if ( pgd_vaddr[0].val != 0 )
+ free_pgtable_maddr(pgd_vaddr[0].val);
+ unmap_vtd_domain_page(pgd_vaddr);
+ free_pgtable_maddr(hd->pgd_maddr);
+ hd->pgd_maddr = 0;
+ }
+ break;
+ default:
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "Unsupported p2m table sharing level!\n");
+ break;
+ }
+#endif
+}
+
diff --git a/xen/include/asm-ia64/domain.h b/xen/include/asm-ia64/domain.h
index 7bbd56f7c6..db05a1119d 100644
--- a/xen/include/asm-ia64/domain.h
+++ b/xen/include/asm-ia64/domain.h
@@ -18,6 +18,8 @@ struct p2m_entry;
struct tlb_track;
#endif
+extern unsigned long volatile jiffies;
+
struct vcpu;
extern void relinquish_vcpu_resources(struct vcpu *v);
extern int vcpu_late_initialise(struct vcpu *v);
diff --git a/xen/include/asm-ia64/linux-xen/asm/sn/README.origin b/xen/include/asm-ia64/linux-xen/asm/sn/README.origin
index 762d1f5a9c..ba80c66319 100644
--- a/xen/include/asm-ia64/linux-xen/asm/sn/README.origin
+++ b/xen/include/asm-ia64/linux-xen/asm/sn/README.origin
@@ -12,5 +12,6 @@ intr.h -> linux/include/asm-ia64/sn/intr.h
io.h -> linux/include/asm-ia64/sn/io.h
nodepda.h -> linux/include/asm-ia64/sn/nodepda.h
pcibr_provider.h -> linux/include/asm-ia64/sn/pcibr_provider.h
+pcidev.h -> linux/include/asm-ia64/sn/pcidev.h
rw_mmr.h -> linux/include/asm-ia64/sn/rw_mmr.h
types.h -> linux/include/asm-ia64/sn/types.h
diff --git a/xen/include/asm-ia64/linux-xen/asm/sn/pcibr_provider.h b/xen/include/asm-ia64/linux-xen/asm/sn/pcibr_provider.h
index 8f88ecd3a7..dc953eb2f4 100644
--- a/xen/include/asm-ia64/linux-xen/asm/sn/pcibr_provider.h
+++ b/xen/include/asm-ia64/linux-xen/asm/sn/pcibr_provider.h
@@ -10,7 +10,7 @@
#ifdef XEN
#include <linux/spinlock.h>
-#include <linux/pci.h>
+#include <linux/linux-pci.h>
#endif
#include <asm/sn/intr.h>
#include <asm/sn/pcibus_provider_defs.h>
diff --git a/xen/include/asm-ia64/linux/asm/sn/pcidev.h b/xen/include/asm-ia64/linux-xen/asm/sn/pcidev.h
index eac3561574..17ae495e6a 100644
--- a/xen/include/asm-ia64/linux/asm/sn/pcidev.h
+++ b/xen/include/asm-ia64/linux-xen/asm/sn/pcidev.h
@@ -8,7 +8,11 @@
#ifndef _ASM_IA64_SN_PCI_PCIDEV_H
#define _ASM_IA64_SN_PCI_PCIDEV_H
+#ifdef XEN
+#include <linux/linux-pci.h>
+#else
#include <linux/pci.h>
+#endif
/*
* In ia64, pci_dev->sysdata must be a *pci_controller. To provide access to
diff --git a/xen/include/asm-ia64/linux-xen/linux/interrupt.h b/xen/include/asm-ia64/linux-xen/linux/interrupt.h
index d6bbeb7772..6f1aece436 100644
--- a/xen/include/asm-ia64/linux-xen/linux/interrupt.h
+++ b/xen/include/asm-ia64/linux-xen/linux/interrupt.h
@@ -144,6 +144,7 @@ extern void FASTCALL(raise_softirq(unsigned int nr));
#endif
+#ifndef XEN
/* Tasklets --- multithreaded analogue of BHs.
Main feature differing them of generic softirqs: tasklet
@@ -254,6 +255,7 @@ extern void tasklet_kill(struct tasklet_struct *t);
extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
extern void tasklet_init(struct tasklet_struct *t,
void (*func)(unsigned long), unsigned long data);
+#endif
/*
* Autoprobing for irqs:
diff --git a/xen/include/asm-ia64/linux-xen/linux/pci.h b/xen/include/asm-ia64/linux-xen/linux/linux-pci.h
index 118201a9a7..118201a9a7 100644
--- a/xen/include/asm-ia64/linux-xen/linux/pci.h
+++ b/xen/include/asm-ia64/linux-xen/linux/linux-pci.h
diff --git a/xen/include/asm-ia64/linux/asm/sn/README.origin b/xen/include/asm-ia64/linux/asm/sn/README.origin
index 7a4f1c645d..495171ee81 100644
--- a/xen/include/asm-ia64/linux/asm/sn/README.origin
+++ b/xen/include/asm-ia64/linux/asm/sn/README.origin
@@ -10,7 +10,6 @@ l1.h -> linux/include/asm-ia64/sn/l1.h
leds.h -> linux/include/asm-ia64/sn/leds.h
module.h -> linux/include/asm-ia64/sn/module.h
pcibus_provider_defs.h -> linux/include/asm-ia64/sn/pcibus_provider_defs.h
-pcidev.h -> linux/include/asm-ia64/sn/pcidev.h
pda.h -> linux/include/asm-ia64/sn/pda.h
pic.h -> linux/include/asm-ia64/sn/pic.h
shub_mmr.h -> linux/include/asm-ia64/sn/shub_mmr.h
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 42b8da7b36..99281c68a3 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -138,27 +138,6 @@ struct hap_domain {
};
/************************************************/
-/* p2m handling */
-/************************************************/
-struct p2m_domain {
- /* Lock that protects updates to the p2m */
- spinlock_t lock;
- int locker; /* processor which holds the lock */
- const char *locker_function; /* Func that took it */
-
- /* Pages used to construct the p2m */
- struct list_head pages;
-
- /* Functions to call to get or free pages for the p2m */
- struct page_info * (*alloc_page )(struct domain *d);
- void (*free_page )(struct domain *d,
- struct page_info *pg);
-
- /* Highest guest frame that's ever been mapped in the p2m */
- unsigned long max_mapped_pfn;
-};
-
-/************************************************/
/* common paging data structure */
/************************************************/
struct log_dirty_domain {
@@ -208,6 +187,8 @@ struct paging_vcpu {
struct shadow_vcpu shadow;
};
+struct p2m_domain;
+
struct arch_domain
{
l1_pgentry_t *mm_perdomain_pt;
@@ -228,11 +209,12 @@ struct arch_domain
/* I/O-port admin-specified access capabilities. */
struct rangeset *ioport_caps;
+ uint32_t pci_cf8;
struct hvm_domain hvm_domain;
struct paging_domain paging;
- struct p2m_domain p2m ;
+ struct p2m_domain *p2m;
/* Shadow translated domain: P2M mapping */
pagetable_t phys_table;
diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
index 1361806574..0c23c7d949 100644
--- a/xen/include/asm-x86/hvm/domain.h
+++ b/xen/include/asm-x86/hvm/domain.h
@@ -28,6 +28,8 @@
#include <asm/hvm/vioapic.h>
#include <asm/hvm/io.h>
#include <xen/hvm/iommu.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <asm/hvm/svm/vmcb.h>
#include <public/hvm/params.h>
#include <public/hvm/save.h>
@@ -60,8 +62,6 @@ struct hvm_domain {
uint64_t params[HVM_NR_PARAMS];
- unsigned long vmx_apic_access_mfn;
-
/* Memory ranges with pinned cache attributes. */
struct list_head pinned_cacheattr_ranges;
@@ -74,11 +74,13 @@ struct hvm_domain {
/* Pass-through */
struct hvm_iommu hvm_iommu;
-#if CONFIG_PAGING_LEVELS == 3
- bool_t amd_npt_4gb_warning;
-#endif
bool_t hap_enabled;
bool_t qemu_mapcache_invalidate;
+
+ union {
+ struct vmx_domain vmx;
+ struct svm_domain svm;
+ };
};
#endif /* __ASM_X86_HVM_DOMAIN_H__ */
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index fe58567263..aee2b150ae 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -138,7 +138,7 @@ void hvm_domain_destroy(struct domain *d);
int hvm_vcpu_initialise(struct vcpu *v);
void hvm_vcpu_destroy(struct vcpu *v);
-void hvm_vcpu_reset(struct vcpu *vcpu);
+void hvm_vcpu_down(struct vcpu *v);
void hvm_send_assist_req(struct vcpu *v);
@@ -224,8 +224,6 @@ hvm_inject_exception(unsigned int trapnr, int errcode, unsigned long cr2)
hvm_funcs.inject_exception(trapnr, errcode, cr2);
}
-int hvm_bringup_ap(int vcpuid, int trampoline_vector);
-
static inline int hvm_event_pending(struct vcpu *v)
{
return hvm_funcs.event_pending(v);
diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h
index 9ef4f645a1..249fd2bcc3 100644
--- a/xen/include/asm-x86/hvm/io.h
+++ b/xen/include/asm-x86/hvm/io.h
@@ -30,13 +30,14 @@
#define HVM_PORTIO 0
#define HVM_BUFFERED_IO 2
-typedef unsigned long (*hvm_mmio_read_t)(struct vcpu *v,
- unsigned long addr,
- unsigned long length);
-typedef void (*hvm_mmio_write_t)(struct vcpu *v,
+typedef int (*hvm_mmio_read_t)(struct vcpu *v,
unsigned long addr,
unsigned long length,
- unsigned long val);
+ unsigned long *val);
+typedef int (*hvm_mmio_write_t)(struct vcpu *v,
+ unsigned long addr,
+ unsigned long length,
+ unsigned long val);
typedef int (*hvm_mmio_check_t)(struct vcpu *v, unsigned long addr);
typedef int (*portio_action_t)(
@@ -64,7 +65,7 @@ struct hvm_mmio_handler {
};
int hvm_io_intercept(ioreq_t *p, int type);
-int register_io_handler(
+void register_io_handler(
struct domain *d, unsigned long addr, unsigned long size,
void *action, int type);
@@ -81,18 +82,18 @@ static inline int hvm_buffered_io_intercept(ioreq_t *p)
int hvm_mmio_intercept(ioreq_t *p);
int hvm_buffered_io_send(ioreq_t *p);
-static inline int register_portio_handler(
+static inline void register_portio_handler(
struct domain *d, unsigned long addr,
unsigned long size, portio_action_t action)
{
- return register_io_handler(d, addr, size, action, HVM_PORTIO);
+ register_io_handler(d, addr, size, action, HVM_PORTIO);
}
-static inline int register_buffered_io_handler(
+static inline void register_buffered_io_handler(
struct domain *d, unsigned long addr,
unsigned long size, mmio_action_t action)
{
- return register_io_handler(d, addr, size, action, HVM_BUFFERED_IO);
+ register_io_handler(d, addr, size, action, HVM_BUFFERED_IO);
}
void send_timeoffset_req(unsigned long timeoff);
diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
index 8899f27a73..d64913ce58 100644
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -57,6 +57,7 @@ int amd_iommu_unmap_page(struct domain *d, unsigned long gfn);
void *amd_iommu_get_vptr_from_page_table_entry(u32 *entry);
int amd_iommu_reserve_domain_unity_map(struct domain *domain,
unsigned long phys_addr, unsigned long size, int iw, int ir);
+int amd_iommu_sync_p2m(struct domain *d);
/* device table functions */
void amd_iommu_set_dev_table_entry(u32 *dte, u64 root_ptr,
diff --git a/xen/include/asm-x86/hvm/svm/vmcb.h b/xen/include/asm-x86/hvm/svm/vmcb.h
index 04fd5e0f12..55e98afabd 100644
--- a/xen/include/asm-x86/hvm/svm/vmcb.h
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h
@@ -444,6 +444,12 @@ struct vmcb_struct {
u64 res16[301];
} __attribute__ ((packed));
+struct svm_domain {
+#if CONFIG_PAGING_LEVELS == 3
+ bool_t npt_4gb_warning;
+#endif
+};
+
struct arch_svm_struct {
struct vmcb_struct *vmcb;
u64 vmcb_pa;
diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h
index d3281d20dc..c4592d9c1e 100644
--- a/xen/include/asm-x86/hvm/vcpu.h
+++ b/xen/include/asm-x86/hvm/vcpu.h
@@ -26,9 +26,6 @@
#include <asm/hvm/svm/vmcb.h>
#include <asm/mtrr.h>
-#define HVM_VCPU_INIT_SIPI_SIPI_STATE_NORM 0
-#define HVM_VCPU_INIT_SIPI_SIPI_STATE_WAIT_SIPI 1
-
enum hvm_io_state {
HVMIO_none = 0,
HVMIO_dispatched,
@@ -58,9 +55,6 @@ struct hvm_vcpu {
spinlock_t tm_lock;
struct list_head tm_list;
- /* For AP startup */
- unsigned long init_sipi_sipi_state;
-
int xen_port;
bool_t flag_dr_dirty;
diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h
index 163dcdfcc6..cc8a70608d 100644
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -21,6 +21,7 @@
#ifndef __ASM_X86_HVM_VLAPIC_H__
#define __ASM_X86_HVM_VLAPIC_H__
+#include <xen/softirq.h>
#include <asm/msr.h>
#include <public/hvm/ioreq.h>
#include <asm/hvm/vpt.h>
@@ -58,6 +59,7 @@ struct vlapic {
struct periodic_time pt;
s_time_t timer_last_update;
struct page_info *regs_page;
+ struct tasklet init_tasklet;
};
static inline uint32_t vlapic_get_reg(struct vlapic *vlapic, uint32_t reg)
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 9ce2d2a38b..4ac9b43e4f 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -53,6 +53,23 @@ struct vmx_msr_state {
unsigned long msrs[VMX_MSR_COUNT];
};
+#define EPT_DEFAULT_MT 6
+#define EPT_DEFAULT_GAW 3
+
+struct vmx_domain {
+ unsigned long apic_access_mfn;
+ unsigned long vpid_base;
+ union {
+ struct {
+ u64 etmt :3,
+ gaw :3,
+ rsvd :6,
+ asr :52;
+ };
+ u64 eptp;
+ } ept_control;
+};
+
struct arch_vmx_struct {
/* Virtual address of VMCS. */
struct vmcs_struct *vmcs;
@@ -71,6 +88,9 @@ struct arch_vmx_struct {
/* Cache of cpu execution control. */
u32 exec_control;
+ u32 secondary_exec_control;
+
+ u16 vpid;
/* PMU */
struct vpmu_struct vpmu;
@@ -108,6 +128,8 @@ void vmx_vmcs_exit(struct vcpu *v);
#define CPU_BASED_MWAIT_EXITING 0x00000400
#define CPU_BASED_RDPMC_EXITING 0x00000800
#define CPU_BASED_RDTSC_EXITING 0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
+#define CPU_BASED_CR3_STORE_EXITING 0x00010000
#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
#define CPU_BASED_CR8_STORE_EXITING 0x00100000
#define CPU_BASED_TPR_SHADOW 0x00200000
@@ -136,6 +158,8 @@ extern u32 vmx_vmexit_control;
extern u32 vmx_vmentry_control;
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
extern u32 vmx_secondary_exec_control;
@@ -151,6 +175,12 @@ extern bool_t cpu_has_vmx_ins_outs_instr_info;
(vmx_pin_based_exec_control & PIN_BASED_VIRTUAL_NMIS)
#define cpu_has_vmx_msr_bitmap \
(vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP)
+#define cpu_has_vmx_secondary_exec_control \
+ (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
+#define cpu_has_vmx_ept \
+ (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)
+#define cpu_has_vmx_vpid \
+ (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define VMX_INTR_SHADOW_STI 0x00000001
@@ -160,6 +190,7 @@ extern bool_t cpu_has_vmx_ins_outs_instr_info;
/* VMCS field encodings. */
enum vmcs_field {
+ VIRTUAL_PROCESSOR_ID = 0x00000000,
GUEST_ES_SELECTOR = 0x00000800,
GUEST_CS_SELECTOR = 0x00000802,
GUEST_SS_SELECTOR = 0x00000804,
@@ -192,11 +223,23 @@ enum vmcs_field {
VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
APIC_ACCESS_ADDR = 0x00002014,
- APIC_ACCESS_ADDR_HIGH = 0x00002015,
+ APIC_ACCESS_ADDR_HIGH = 0x00002015,
+ EPT_POINTER = 0x0000201a,
+ EPT_POINTER_HIGH = 0x0000201b,
+ GUEST_PHYSICAL_ADDRESS = 0x00002400,
+ GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800,
VMCS_LINK_POINTER_HIGH = 0x00002801,
GUEST_IA32_DEBUGCTL = 0x00002802,
GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
+ GUEST_PDPTR0 = 0x0000280a,
+ GUEST_PDPTR0_HIGH = 0x0000280b,
+ GUEST_PDPTR1 = 0x0000280c,
+ GUEST_PDPTR1_HIGH = 0x0000280d,
+ GUEST_PDPTR2 = 0x0000280e,
+ GUEST_PDPTR2_HIGH = 0x0000280f,
+ GUEST_PDPTR3 = 0x00002810,
+ GUEST_PDPTR3_HIGH = 0x00002811,
PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
EXCEPTION_BITMAP = 0x00004004,
@@ -287,6 +330,8 @@ enum vmcs_field {
HOST_RIP = 0x00006c16,
};
+#define VMCS_VPID_WIDTH 16
+
void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr);
int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val);
int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val);
diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h
index 1d2f37d4d1..c94549caec 100644
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -23,9 +23,27 @@
#include <asm/types.h>
#include <asm/regs.h>
#include <asm/processor.h>
-#include <asm/hvm/vmx/vmcs.h>
#include <asm/i387.h>
+#include <asm/hvm/support.h>
#include <asm/hvm/trace.h>
+#include <asm/hvm/vmx/vmcs.h>
+
+typedef union {
+ struct {
+ u64 r : 1,
+ w : 1,
+ x : 1,
+ emt : 4,
+ sp_avail : 1,
+ avail1 : 4,
+ mfn : 45,
+ rsvd : 5,
+ avail2 : 2;
+ };
+ u64 epte;
+} ept_entry_t;
+
+#define EPT_TABLE_ORDER 9
void vmx_asm_vmexit_handler(struct cpu_user_regs);
void vmx_asm_do_vmentry(void);
@@ -80,6 +98,8 @@ void vmx_realmode(struct cpu_user_regs *regs);
#define EXIT_REASON_MACHINE_CHECK 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EPT_VIOLATION 48
+#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_WBINVD 54
/*
@@ -143,12 +163,15 @@ void vmx_realmode(struct cpu_user_regs *regs);
#define VMREAD_OPCODE ".byte 0x0f,0x78\n"
#define VMRESUME_OPCODE ".byte 0x0f,0x01,0xc3\n"
#define VMWRITE_OPCODE ".byte 0x0f,0x79\n"
+#define INVEPT_OPCODE ".byte 0x66,0x0f,0x38,0x80\n" /* m128,r64/32 */
+#define INVVPID_OPCODE ".byte 0x66,0x0f,0x38,0x81\n" /* m128,r64/32 */
#define VMXOFF_OPCODE ".byte 0x0f,0x01,0xc4\n"
#define VMXON_OPCODE ".byte 0xf3,0x0f,0xc7\n"
+#define MODRM_EAX_08 ".byte 0x08\n" /* ECX, [EAX] */
#define MODRM_EAX_06 ".byte 0x30\n" /* [EAX], with reg/opcode: /6 */
#define MODRM_EAX_07 ".byte 0x38\n" /* [EAX], with reg/opcode: /7 */
-#define MODRM_EAX_ECX ".byte 0xc1\n" /* [EAX], [ECX] */
+#define MODRM_EAX_ECX ".byte 0xc1\n" /* EAX, ECX */
static inline void __vmptrld(u64 addr)
{
@@ -232,6 +255,60 @@ static inline void __vm_clear_bit(unsigned long field, unsigned int bit)
__vmwrite(field, __vmread(field) & ~(1UL << bit));
}
+static inline void __invept(int ext, u64 eptp, u64 gpa)
+{
+ struct {
+ u64 eptp, gpa;
+ } operand = {eptp, gpa};
+
+ asm volatile ( INVEPT_OPCODE
+ MODRM_EAX_08
+ /* CF==1 or ZF==1 --> rc = -1 */
+ "ja 1f ; ud2 ; 1:\n"
+ :
+ : "a" (&operand), "c" (ext)
+ : "memory" );
+}
+
+static inline void __invvpid(int ext, u16 vpid, u64 gva)
+{
+ struct {
+ u64 vpid:16;
+ u64 rsvd:48;
+ u64 gva;
+ } __attribute__ ((packed)) operand = {vpid, 0, gva};
+
+ asm volatile ( INVVPID_OPCODE
+ MODRM_EAX_08
+ /* CF==1 or ZF==1 --> rc = -1 */
+ "ja 1f ; ud2 ; 1:\n"
+ :
+ : "a" (&operand), "c" (ext)
+ : "memory" );
+}
+
+static inline void ept_sync_all(void)
+{
+ if ( !current->domain->arch.hvm_domain.hap_enabled )
+ return;
+
+ __invept(2, 0, 0);
+}
+
+void ept_sync_domain(struct domain *d);
+
+static inline void vpid_sync_vcpu_all(struct vcpu *v)
+{
+ if ( cpu_has_vmx_vpid )
+ __invvpid(1, v->arch.hvm_vmx.vpid, 0);
+}
+
+static inline void vpid_sync_all(void)
+{
+ if ( cpu_has_vmx_vpid )
+ __invvpid(2, 0, 0);
+}
+
static inline void __vmxoff(void)
{
asm volatile (
@@ -265,4 +342,49 @@ void vmx_inject_hw_exception(struct vcpu *v, int trap, int error_code);
void vmx_inject_extint(struct vcpu *v, int trap);
void vmx_inject_nmi(struct vcpu *v);
+void ept_p2m_init(struct domain *d);
+
+/* EPT violation qualifications definitions */
+/* bit offset 0 in exit qualification */
+#define _EPT_READ_VIOLATION 0
+#define EPT_READ_VIOLATION (1UL<<_EPT_READ_VIOLATION)
+/* bit offset 1 in exit qualification */
+#define _EPT_WRITE_VIOLATION 1
+#define EPT_WRITE_VIOLATION (1UL<<_EPT_WRITE_VIOLATION)
+/* bit offset 2 in exit qualification */
+#define _EPT_EXEC_VIOLATION 2
+#define EPT_EXEC_VIOLATION (1UL<<_EPT_EXEC_VIOLATION)
+
+/* bit offset 3 in exit qualification */
+#define _EPT_EFFECTIVE_READ 3
+#define EPT_EFFECTIVE_READ (1UL<<_EPT_EFFECTIVE_READ)
+/* bit offset 4 in exit qualification */
+#define _EPT_EFFECTIVE_WRITE 4
+#define EPT_EFFECTIVE_WRITE (1UL<<_EPT_EFFECTIVE_WRITE)
+/* bit offset 5 in exit qualification */
+#define _EPT_EFFECTIVE_EXEC 5
+#define EPT_EFFECTIVE_EXEC (1UL<<_EPT_EFFECTIVE_EXEC)
+
+/* bit offset 6 in exit qualification */
+#define _EPT_GAW_VIOLATION 6
+#define EPT_GAW_VIOLATION (1UL<<_EPT_GAW_VIOLATION)
+
+/* bits offset 7 & 8 in exit qualification */
+#define _EPT_GLA_VALIDITY 7
+#define EPT_GLA_VALIDITY_MASK (3UL<<_EPT_GLA_VALIDITY)
+/* gla != gpa, when load PDPTR */
+#define EPT_GLA_VALIDITY_PDPTR_LOAD (0UL<<_EPT_GLA_VALIDITY)
+/* gla != gpa, during guest page table walking */
+#define EPT_GLA_VALIDITY_GPT_WALK (1UL<<_EPT_GLA_VALIDITY)
+/* reserved */
+#define EPT_GLA_VALIDITY_RSVD (2UL<<_EPT_GLA_VALIDITY)
+/* gla == gpa, normal case */
+#define EPT_GLA_VALIDITY_MATCH (3UL<<_EPT_GLA_VALIDITY)
+
+#define EPT_EFFECTIVE_MASK (EPT_EFFECTIVE_READ | \
+ EPT_EFFECTIVE_WRITE | \
+ EPT_EFFECTIVE_EXEC)
+
+#define EPT_PAGETABLE_ENTRIES 512
+
#endif /* __ASM_X86_HVM_VMX_VMX_H__ */
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 7826db4feb..f4fdad1327 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -69,8 +69,7 @@ struct page_info
#define PGT_l2_page_table (2U<<29) /* using this page as an L2 page table? */
#define PGT_l3_page_table (3U<<29) /* using this page as an L3 page table? */
#define PGT_l4_page_table (4U<<29) /* using this page as an L4 page table? */
-#define PGT_gdt_page (5U<<29) /* using this page in a GDT? */
-#define PGT_ldt_page (6U<<29) /* using this page in an LDT? */
+#define PGT_seg_desc_page (5U<<29) /* using this page in a GDT/LDT? */
#define PGT_writable_page (7U<<29) /* has writable mappings of this page? */
#define PGT_type_mask (7U<<29) /* Bits 29-31. */
diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
index 90233e07b9..cbf91e8621 100644
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -73,6 +73,5 @@ static inline __attribute__((pure)) int phys_to_nid(paddr_t addr)
#define clear_node_cpumask(cpu) do {} while (0)
#endif
-#define NUMA_NO_NODE 0xff
#endif
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index 38a9cec7bd..cb5c882b03 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -26,6 +26,8 @@
#ifndef _XEN_P2M_H
#define _XEN_P2M_H
+#include <xen/config.h>
+#include <xen/paging.h>
/*
* The phys_to_machine_mapping maps guest physical frame numbers
@@ -86,54 +88,52 @@ typedef enum {
#define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
+struct p2m_domain {
+ /* Lock that protects updates to the p2m */
+ spinlock_t lock;
+ int locker; /* processor which holds the lock */
+ const char *locker_function; /* Func that took it */
+
+ /* Pages used to construct the p2m */
+ struct list_head pages;
+
+ /* Functions to call to get or free pages for the p2m */
+ struct page_info * (*alloc_page )(struct domain *d);
+ void (*free_page )(struct domain *d,
+ struct page_info *pg);
+ int (*set_entry )(struct domain *d, unsigned long gfn,
+ mfn_t mfn, p2m_type_t p2mt);
+ mfn_t (*get_entry )(struct domain *d, unsigned long gfn,
+ p2m_type_t *p2mt);
+ mfn_t (*get_entry_current)(unsigned long gfn,
+ p2m_type_t *p2mt);
+ void (*change_entry_type_global)(struct domain *d,
+ p2m_type_t ot,
+ p2m_type_t nt);
+
+ /* Highest guest frame that's ever been mapped in the p2m */
+ unsigned long max_mapped_pfn;
+};
+
/* Extract the type from the PTE flags that store it */
static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
{
/* Type is stored in the "available" bits, 9, 10 and 11 */
return (flags >> 9) & 0x7;
}
-
-/* Read the current domain's p2m table (through the linear mapping). */
+
+/* Read the current domain's p2m table. */
static inline mfn_t gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
{
- mfn_t mfn = _mfn(INVALID_MFN);
- p2m_type_t p2mt = p2m_mmio_dm;
- /* XXX This is for compatibility with the old model, where anything not
- * XXX marked as RAM was considered to be emulated MMIO space.
- * XXX Once we start explicitly registering MMIO regions in the p2m
- * XXX we will return p2m_invalid for unmapped gfns */
-
- if ( gfn <= current->domain->arch.p2m.max_mapped_pfn )
- {
- l1_pgentry_t l1e = l1e_empty();
- int ret;
-
- ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
- / sizeof(l1_pgentry_t));
-
- /* Need to __copy_from_user because the p2m is sparse and this
- * part might not exist */
- ret = __copy_from_user(&l1e,
- &phys_to_machine_mapping[gfn],
- sizeof(l1e));
-
- if ( ret == 0 ) {
- p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
- ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
- if ( p2m_is_valid(p2mt) )
- mfn = _mfn(l1e_get_pfn(l1e));
- else
- /* XXX see above */
- p2mt = p2m_mmio_dm;
- }
- }
-
- *t = p2mt;
- return mfn;
+ return current->domain->arch.p2m->get_entry_current(gfn, t);
}
/* Read another domain's P2M table, mapping pages as we go */
-mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t);
+static inline
+mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t)
+{
+ return d->arch.p2m->get_entry(d, gfn, t);
+}
/* General conversion function from gfn to mfn */
#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), (g), (t))
@@ -149,7 +149,7 @@ static inline mfn_t _gfn_to_mfn(struct domain *d,
}
if ( likely(current->domain == d) )
return gfn_to_mfn_current(gfn, t);
- else
+ else
return gfn_to_mfn_foreign(d, gfn, t);
}
@@ -185,7 +185,7 @@ gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
/* Init the datastructures for later use by the p2m code */
-void p2m_init(struct domain *d);
+int p2m_init(struct domain *d);
/* Allocate a new p2m table for a domain.
*
@@ -199,6 +199,7 @@ int p2m_alloc_table(struct domain *d,
/* Return all the p2m resources to Xen. */
void p2m_teardown(struct domain *d);
+void p2m_final_teardown(struct domain *d);
/* Add a page to a domain's p2m table */
int guest_physmap_add_entry(struct domain *d, unsigned long gfn,
@@ -220,6 +221,7 @@ void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
/* Change types across all p2m entries in a domain */
void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
+void p2m_change_entry_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
/* Compare-exchange the type of a single p2m entry */
p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index 4c7e6f1327..f7512a7b26 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -183,7 +183,7 @@ void paging_vcpu_init(struct vcpu *v);
/* Set up the paging-assistance-specific parts of a domain struct at
* start of day. Called for every domain from arch_domain_create() */
-void paging_domain_init(struct domain *d);
+int paging_domain_init(struct domain *d);
/* Handler for paging-control ops: operations from user-space to enable
* and disable ephemeral shadow modes (test mode and log-dirty mode) and
diff --git a/xen/include/public/hvm/params.h b/xen/include/public/hvm/params.h
index aad2196db3..f222cbac17 100644
--- a/xen/include/public/hvm/params.h
+++ b/xen/include/public/hvm/params.h
@@ -84,6 +84,12 @@
/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
#define HVM_PARAM_HPET_ENABLED 11
-#define HVM_NR_PARAMS 12
+/* Identity-map page directory used by Intel EPT when CR0.PG=0. */
+#define HVM_PARAM_IDENT_PT 12
+
+/* Device Model domain, defaults to 0. */
+#define HVM_PARAM_DM_DOMAIN 13
+
+#define HVM_NR_PARAMS 14
#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
diff --git a/xen/include/xen/hvm/iommu.h b/xen/include/xen/hvm/iommu.h
index 8b11d44c16..1577ad4255 100644
--- a/xen/include/xen/hvm/iommu.h
+++ b/xen/include/xen/hvm/iommu.h
@@ -38,7 +38,7 @@ struct g2m_ioport {
struct hvm_iommu {
spinlock_t iommu_list_lock; /* protect iommu specific lists */
struct list_head pdev_list; /* direct accessed pci devices */
- struct dma_pte *pgd; /* io page directory root */
+ u64 pgd_maddr; /* io page directory machine address */
spinlock_t mapping_lock; /* io page table lock */
int agaw; /* adjusted guest address width, 0 is level 2 30-bit */
struct list_head g2m_ioport_list; /* guest to machine ioport mapping */
@@ -48,9 +48,10 @@ struct hvm_iommu {
int domain_id;
int paging_mode;
void *root_table;
+ bool_t p2m_synchronized;
/* iommu_ops */
struct iommu_ops *platform_ops;
};
-#endif // __ASM_X86_HVM_IOMMU_H__
+#endif /* __ASM_X86_HVM_IOMMU_H__ */
diff --git a/xen/include/xen/hypercall.h b/xen/include/xen/hypercall.h
index 5313b9a1d8..7d58109ec2 100644
--- a/xen/include/xen/hypercall.h
+++ b/xen/include/xen/hypercall.h
@@ -30,6 +30,7 @@ do_sched_op(
int cmd,
XEN_GUEST_HANDLE(void) arg);
+extern spinlock_t domctl_lock;
extern long
do_domctl(
XEN_GUEST_HANDLE(xen_domctl_t) u_domctl);
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 362bffd155..bde167694e 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -67,7 +67,7 @@ struct iommu {
u64 ecap;
spinlock_t lock; /* protect context, domain ids */
spinlock_t register_lock; /* protect iommu register handling */
- struct root_entry *root_entry; /* virtual address */
+ u64 root_maddr; /* root entry machine address */
unsigned int vector;
struct intel_iommu *intel;
};
@@ -85,6 +85,7 @@ int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
int iommu_unmap_page(struct domain *d, unsigned long gfn);
void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry);
void iommu_set_pgd(struct domain *d);
+void iommu_free_pgd(struct domain *d);
void iommu_domain_teardown(struct domain *d);
int hvm_do_IRQ_dpci(struct domain *d, unsigned int irq);
int dpci_ioport_intercept(ioreq_t *p);
@@ -98,6 +99,9 @@ void io_apic_write_remap_rte(unsigned int apic,
struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu);
struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu);
struct iommu_flush *iommu_get_flush(struct iommu *iommu);
+void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq);
+struct hvm_irq_dpci *domain_get_irq_dpci(struct domain *domain);
+int domain_set_irq_dpci(struct domain *domain, struct hvm_irq_dpci *dpci);
#define PT_IRQ_TIME_OUT MILLISECS(8)
#define VTDPREFIX "[VT-D]"
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 2742563fd8..1341bb0fa0 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -54,14 +54,11 @@ void free_xenheap_pages(void *v, unsigned int order);
void init_domheap_pages(paddr_t ps, paddr_t pe);
struct page_info *alloc_domheap_pages(
struct domain *d, unsigned int order, unsigned int memflags);
-struct page_info *__alloc_domheap_pages(
- struct domain *d, unsigned int cpu, unsigned int order,
- unsigned int memflags);
void free_domheap_pages(struct page_info *pg, unsigned int order);
unsigned long avail_domheap_pages_region(
unsigned int node, unsigned int min_width, unsigned int max_width);
unsigned long avail_domheap_pages(void);
-#define alloc_domheap_page(d) (alloc_domheap_pages(d,0,0))
+#define alloc_domheap_page(d,f) (alloc_domheap_pages(d,0,f))
#define free_domheap_page(p) (free_domheap_pages(p,0))
void scrub_heap_pages(void);
@@ -75,6 +72,8 @@ int assign_pages(
/* memflags: */
#define _MEMF_no_refcount 0
#define MEMF_no_refcount (1U<<_MEMF_no_refcount)
+#define _MEMF_node 8
+#define MEMF_node(n) ((((n)+1)&0xff)<<_MEMF_node)
#define _MEMF_bits 24
#define MEMF_bits(n) ((n)<<_MEMF_bits)
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
index 9585fc9c48..80aa3586be 100644
--- a/xen/include/xen/numa.h
+++ b/xen/include/xen/numa.h
@@ -8,6 +8,13 @@
#define NODES_SHIFT 0
#endif
+#define NUMA_NO_NODE 0xFF
+
#define MAX_NUMNODES (1 << NODES_SHIFT)
+#define vcpu_to_node(v) (cpu_to_node((v)->processor))
+
+#define domain_to_node(d) \
+ (((d)->vcpu[0] != NULL) ? vcpu_to_node((d)->vcpu[0]) : NUMA_NO_NODE)
+
#endif /* _XEN_NUMA_H */
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
new file mode 100644
index 0000000000..2e4d78357c
--- /dev/null
+++ b/xen/include/xen/pci.h
@@ -0,0 +1,29 @@
+/******************************************************************************
+ * pci.h
+ *
+ * PCI access functions.
+ */
+
+#ifndef __XEN_PCI_H__
+#define __XEN_PCI_H__
+
+#include <xen/config.h>
+#include <xen/types.h>
+
+uint8_t pci_conf_read8(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg);
+uint16_t pci_conf_read16(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg);
+uint32_t pci_conf_read32(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg);
+void pci_conf_write8(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg,
+ uint8_t data);
+void pci_conf_write16(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg,
+ uint16_t data);
+void pci_conf_write32(
+ unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg,
+ uint32_t data);
+
+#endif /* __XEN_PCI_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 777195e448..ff5241a532 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -25,8 +25,6 @@
DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
#endif
-extern unsigned long volatile jiffies;
-
/* A global pointer to the initial domain (DOM0). */
extern struct domain *dom0;
@@ -140,8 +138,9 @@ struct vcpu
};
/* Per-domain lock can be recursively acquired in fault handlers. */
-#define LOCK_BIGLOCK(_d) spin_lock_recursive(&(_d)->big_lock)
-#define UNLOCK_BIGLOCK(_d) spin_unlock_recursive(&(_d)->big_lock)
+#define domain_lock(d) spin_lock_recursive(&(d)->domain_lock)
+#define domain_unlock(d) spin_unlock_recursive(&(d)->domain_lock)
+#define domain_is_locked(d) spin_is_locked(&(d)->domain_lock)
struct domain
{
@@ -149,7 +148,7 @@ struct domain
shared_info_t *shared_info; /* shared data area */
- spinlock_t big_lock;
+ spinlock_t domain_lock;
spinlock_t page_alloc_lock; /* protects all the following fields */
struct list_head page_list; /* linked list, of size tot_pages */
diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h
index 7734b3374d..0b59f63b22 100644
--- a/xen/include/xen/softirq.h
+++ b/xen/include/xen/softirq.h
@@ -1,24 +1,17 @@
-#ifndef __XEN_SOFTIRQ_H__
+#if !defined(__XEN_SOFTIRQ_H__) && !defined(__ASSEMBLY__)
#define __XEN_SOFTIRQ_H__
-/* Common softirqs come first in the following list. */
-#define TIMER_SOFTIRQ 0
-#define SCHEDULE_SOFTIRQ 1
-#define NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ 2
-#define KEYPRESS_SOFTIRQ 3
-#define NMI_SOFTIRQ 4
-#define PAGE_SCRUB_SOFTIRQ 5
-#define TRACE_SOFTIRQ 6
-#define RCU_SOFTIRQ 7
-#define STOPMACHINE_SOFTIRQ 8
-
-#define NR_COMMON_SOFTIRQS 9
-
-#include <asm/softirq.h>
-
-#define NR_SOFTIRQS (NR_COMMON_SOFTIRQS + NR_ARCH_SOFTIRQS)
-
-#ifndef __ASSEMBLY__
+/* Low-latency softirqs come first in the following list. */
+enum {
+ TIMER_SOFTIRQ = 0,
+ SCHEDULE_SOFTIRQ,
+ NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ,
+ PAGE_SCRUB_SOFTIRQ,
+ RCU_SOFTIRQ,
+ STOPMACHINE_SOFTIRQ,
+ TASKLET_SOFTIRQ,
+ NR_COMMON_SOFTIRQS
+};
#include <xen/config.h>
#include <xen/lib.h>
@@ -26,11 +19,15 @@
#include <asm/bitops.h>
#include <asm/current.h>
#include <asm/hardirq.h>
+#include <asm/softirq.h>
+
+#define NR_SOFTIRQS (NR_COMMON_SOFTIRQS + NR_ARCH_SOFTIRQS)
typedef void (*softirq_handler)(void);
asmlinkage void do_softirq(void);
-extern void open_softirq(int nr, softirq_handler handler);
+void open_softirq(int nr, softirq_handler handler);
+void softirq_init(void);
static inline void cpumask_raise_softirq(cpumask_t mask, unsigned int nr)
{
@@ -56,6 +53,26 @@ static inline void raise_softirq(unsigned int nr)
set_bit(nr, &softirq_pending(smp_processor_id()));
}
-#endif /* __ASSEMBLY__ */
+/*
+ * TASKLETS -- dynamically-allocatable tasks run in softirq context
+ * on at most one CPU at a time.
+ */
+struct tasklet
+{
+ struct list_head list;
+ bool_t is_scheduled;
+ bool_t is_running;
+ bool_t is_dead;
+ void (*func)(unsigned long);
+ unsigned long data;
+};
+
+#define DECLARE_TASKLET(name, func, data) \
+ struct tasklet name = { LIST_HEAD_INIT(name.list), 0, 0, 0, func, data }
+
+void tasklet_schedule(struct tasklet *t);
+void tasklet_kill(struct tasklet *t);
+void tasklet_init(
+ struct tasklet *t, void (*func)(unsigned long), unsigned long data);
#endif /* __XEN_SOFTIRQ_H__ */
diff --git a/xen/include/xen/xencomm.h b/xen/include/xen/xencomm.h
index 9b46c89dbf..f044c74f99 100644
--- a/xen/include/xen/xencomm.h
+++ b/xen/include/xen/xencomm.h
@@ -114,4 +114,12 @@ static inline unsigned long xencomm_inline_addr(const void *handle)
xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off); \
})
+#ifdef CONFIG_XENCOMM_MARK_DIRTY
+extern void xencomm_mark_dirty(unsigned long addr, unsigned int len);
+#else
+static inline void xencomm_mark_dirty(unsigned long addr, unsigned int len)
+{
+}
+#endif
+
#endif /* __XENCOMM_H__ */
diff --git a/xen/xsm/acm/acm_chinesewall_hooks.c b/xen/xsm/acm/acm_chinesewall_hooks.c
index 65e60e7cb4..977c45ff2a 100644
--- a/xen/xsm/acm/acm_chinesewall_hooks.c
+++ b/xen/xsm/acm/acm_chinesewall_hooks.c
@@ -637,8 +637,12 @@ static void chwall_domain_destroy(void *object_ssid, struct domain *d)
static int chwall_is_default_policy(void)
{
- return ( (chwall_bin_pol.max_types == 1 ) &&
- (chwall_bin_pol.max_ssidrefs == 2 ) );
+ static const domaintype_t def_policy[2] = { 0x0, 0x0 };
+ return ( ( chwall_bin_pol.max_types == 1 ) &&
+ ( chwall_bin_pol.max_ssidrefs == 2 ) &&
+ ( memcmp(chwall_bin_pol.ssidrefs,
+ def_policy,
+ sizeof(def_policy)) == 0 ) );
}
diff --git a/xen/xsm/acm/acm_simple_type_enforcement_hooks.c b/xen/xsm/acm/acm_simple_type_enforcement_hooks.c
index 01eae51bb2..2810597c39 100644
--- a/xen/xsm/acm/acm_simple_type_enforcement_hooks.c
+++ b/xen/xsm/acm/acm_simple_type_enforcement_hooks.c
@@ -108,7 +108,7 @@ static int share_common_type(struct domain *subj, struct domain *obj)
int acm_init_ste_policy(void)
{
/* minimal startup policy; policy write-locked already */
- ste_bin_pol.max_types = 1;
+ ste_bin_pol.max_types = 2;
ste_bin_pol.max_ssidrefs = 1 + dom0_ste_ssidref;
ste_bin_pol.ssidrefs =
(domaintype_t *)xmalloc_array(domaintype_t,
@@ -123,7 +123,9 @@ int acm_init_ste_policy(void)
ste_bin_pol.max_ssidrefs);
/* initialize state so that dom0 can start up and communicate with itself */
+ ste_bin_pol.ssidrefs[ste_bin_pol.max_types - 1 ] = 1;
ste_bin_pol.ssidrefs[ste_bin_pol.max_types * dom0_ste_ssidref] = 1;
+ ste_bin_pol.ssidrefs[ste_bin_pol.max_types * dom0_ste_ssidref + 1] = 1;
/* init stats */
atomic_set(&(ste_bin_pol.ec_eval_count), 0);
@@ -868,8 +870,12 @@ ste_authorization(ssidref_t ssidref1, ssidref_t ssidref2)
static int
ste_is_default_policy(void)
{
- return ((ste_bin_pol.max_types == 1) &&
- (ste_bin_pol.max_ssidrefs == 2));
+ const static domaintype_t def_policy[4] = { 0x0, 0x1, 0x1, 0x1};
+ return ((ste_bin_pol.max_types == 2) &&
+ (ste_bin_pol.max_ssidrefs == 2) &&
+ (memcmp(ste_bin_pol.ssidrefs,
+ def_policy,
+ sizeof(def_policy)) == 0));
}
/* now define the hook structure similarly to LSM */