aboutsummaryrefslogtreecommitdiffstats
path: root/xen
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2013-01-23 14:06:20 +0100
committerJan Beulich <jbeulich@suse.com>2013-01-23 14:06:20 +0100
commit4b28bf6ae90bd83fd1113d8bdc53c3266ffeb328 (patch)
tree43489d3149200ddb5e03ad01309023727326ff30 /xen
parenta8d2b06db7826063df9d04be9d6f928bf2189bd0 (diff)
downloadxen-4b28bf6ae90bd83fd1113d8bdc53c3266ffeb328.tar.gz
xen-4b28bf6ae90bd83fd1113d8bdc53c3266ffeb328.tar.bz2
xen-4b28bf6ae90bd83fd1113d8bdc53c3266ffeb328.zip
x86: re-introduce map_domain_page() et al
This is being done mostly in the form previously used on x86-32, utilizing the second L3 page table slot within the per-domain mapping area for those mappings. It remains to be determined whether that concept is really suitable, or whether instead re-implementing at least the non-global variant from scratch would be better. Also add the helpers {clear,copy}_domain_page() as well as initial uses of them. One question is whether, to exercise the non-trivial code paths, we shouldn't make the trivial shortcuts conditional upon NDEBUG being defined. See the debugging patch at the end of the series. Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: Keir Fraser <keir@xen.org>
Diffstat (limited to 'xen')
-rw-r--r--xen/arch/x86/Makefile1
-rw-r--r--xen/arch/x86/domain.c25
-rw-r--r--xen/arch/x86/domain_page.c471
-rw-r--r--xen/arch/x86/mm.c16
-rw-r--r--xen/include/asm-x86/config.h37
-rw-r--r--xen/include/asm-x86/domain.h28
-rw-r--r--xen/include/xen/domain_page.h10
7 files changed, 544 insertions, 44 deletions
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 9d5a254eea..0fe547b313 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -19,6 +19,7 @@ obj-bin-y += dmi_scan.init.o
obj-y += domctl.o
obj-y += domain.o
obj-bin-y += domain_build.init.o
+obj-y += domain_page.o
obj-y += e820.o
obj-y += extable.o
obj-y += flushtlb.o
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index a1e16887ef..9582843000 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -397,10 +397,14 @@ int vcpu_initialise(struct vcpu *v)
return -ENOMEM;
clear_page(page_to_virt(pg));
perdomain_pt_page(d, idx) = pg;
- d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+idx]
+ d->arch.mm_perdomain_l2[0][l2_table_offset(PERDOMAIN_VIRT_START)+idx]
= l2e_from_page(pg, __PAGE_HYPERVISOR);
}
+ rc = mapcache_vcpu_init(v);
+ if ( rc )
+ return rc;
+
paging_vcpu_init(v);
v->arch.perdomain_ptes = perdomain_ptes(d, v);
@@ -526,8 +530,8 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
if ( pg == NULL )
goto fail;
- d->arch.mm_perdomain_l2 = page_to_virt(pg);
- clear_page(d->arch.mm_perdomain_l2);
+ d->arch.mm_perdomain_l2[0] = page_to_virt(pg);
+ clear_page(d->arch.mm_perdomain_l2[0]);
pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
if ( pg == NULL )
@@ -535,8 +539,10 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
d->arch.mm_perdomain_l3 = page_to_virt(pg);
clear_page(d->arch.mm_perdomain_l3);
d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
- l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
- __PAGE_HYPERVISOR);
+ l3e_from_pfn(virt_to_mfn(d->arch.mm_perdomain_l2[0]),
+ __PAGE_HYPERVISOR);
+
+ mapcache_domain_init(d);
HYPERVISOR_COMPAT_VIRT_START(d) =
is_hvm_domain(d) ? ~0u : __HYPERVISOR_COMPAT_VIRT_START;
@@ -609,8 +615,9 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags)
free_xenheap_page(d->shared_info);
if ( paging_initialised )
paging_final_teardown(d);
- if ( d->arch.mm_perdomain_l2 )
- free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2));
+ mapcache_domain_exit(d);
+ if ( d->arch.mm_perdomain_l2[0] )
+ free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2[0]));
if ( d->arch.mm_perdomain_l3 )
free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3));
if ( d->arch.mm_perdomain_pt_pages )
@@ -633,13 +640,15 @@ void arch_domain_destroy(struct domain *d)
paging_final_teardown(d);
+ mapcache_domain_exit(d);
+
for ( i = 0; i < PDPT_L2_ENTRIES; ++i )
{
if ( perdomain_pt_page(d, i) )
free_domheap_page(perdomain_pt_page(d, i));
}
free_domheap_page(virt_to_page(d->arch.mm_perdomain_pt_pages));
- free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2));
+ free_domheap_page(virt_to_page(d->arch.mm_perdomain_l2[0]));
free_domheap_page(virt_to_page(d->arch.mm_perdomain_l3));
free_xenheap_page(d->shared_info);
diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
new file mode 100644
index 0000000000..6a219947d5
--- /dev/null
+++ b/xen/arch/x86/domain_page.c
@@ -0,0 +1,471 @@
+/******************************************************************************
+ * domain_page.h
+ *
+ * Allow temporary mapping of domain pages.
+ *
+ * Copyright (c) 2003-2006, Keir Fraser <keir@xensource.com>
+ */
+
+#include <xen/domain_page.h>
+#include <xen/mm.h>
+#include <xen/perfc.h>
+#include <xen/pfn.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#include <asm/flushtlb.h>
+#include <asm/hardirq.h>
+
+static inline struct vcpu *mapcache_current_vcpu(void)
+{
+ /* In the common case we use the mapcache of the running VCPU. */
+ struct vcpu *v = current;
+
+ /*
+ * When current isn't properly set up yet, this is equivalent to
+ * running in an idle vCPU (callers must check for NULL).
+ */
+ if ( v == (struct vcpu *)0xfffff000 )
+ return NULL;
+
+ /*
+ * If guest_table is NULL, and we are running a paravirtualised guest,
+ * then it means we are running on the idle domain's page table and must
+ * therefore use its mapcache.
+ */
+ if ( unlikely(pagetable_is_null(v->arch.guest_table)) && !is_hvm_vcpu(v) )
+ {
+ /* If we really are idling, perform lazy context switch now. */
+ if ( (v = idle_vcpu[smp_processor_id()]) == current )
+ sync_local_execstate();
+ /* We must now be running on the idle page table. */
+ ASSERT(read_cr3() == __pa(idle_pg_table));
+ }
+
+ return v;
+}
+
+#define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER)
+#define MAPCACHE_L2_ENTRIES (mapcache_l2_entry(MAPCACHE_ENTRIES - 1) + 1)
+#define DCACHE_L1ENT(dc, idx) \
+ ((dc)->l1tab[(idx) >> PAGETABLE_ORDER] \
+ [(idx) & ((1 << PAGETABLE_ORDER) - 1)])
+
+void *map_domain_page(unsigned long mfn)
+{
+ unsigned long flags;
+ unsigned int idx, i;
+ struct vcpu *v;
+ struct mapcache_domain *dcache;
+ struct mapcache_vcpu *vcache;
+ struct vcpu_maphash_entry *hashent;
+
+ if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
+ return mfn_to_virt(mfn);
+
+ v = mapcache_current_vcpu();
+ if ( !v || is_hvm_vcpu(v) )
+ return mfn_to_virt(mfn);
+
+ dcache = &v->domain->arch.pv_domain.mapcache;
+ vcache = &v->arch.pv_vcpu.mapcache;
+ if ( !dcache->l1tab )
+ return mfn_to_virt(mfn);
+
+ perfc_incr(map_domain_page_count);
+
+ local_irq_save(flags);
+
+ hashent = &vcache->hash[MAPHASH_HASHFN(mfn)];
+ if ( hashent->mfn == mfn )
+ {
+ idx = hashent->idx;
+ ASSERT(idx < dcache->entries);
+ hashent->refcnt++;
+ ASSERT(hashent->refcnt);
+ ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, idx)) == mfn);
+ goto out;
+ }
+
+ spin_lock(&dcache->lock);
+
+ /* Has some other CPU caused a wrap? We must flush if so. */
+ if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
+ {
+ vcache->shadow_epoch = dcache->epoch;
+ if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) )
+ {
+ perfc_incr(domain_page_tlb_flush);
+ flush_tlb_local();
+ }
+ }
+
+ idx = find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cursor);
+ if ( unlikely(idx >= dcache->entries) )
+ {
+ unsigned long accum = 0;
+
+ /* /First/, clean the garbage map and update the inuse list. */
+ for ( i = 0; i < BITS_TO_LONGS(dcache->entries); i++ )
+ {
+ dcache->inuse[i] &= ~xchg(&dcache->garbage[i], 0);
+ accum |= ~dcache->inuse[i];
+ }
+
+ if ( accum )
+ idx = find_first_zero_bit(dcache->inuse, dcache->entries);
+ else
+ {
+ /* Replace a hash entry instead. */
+ i = MAPHASH_HASHFN(mfn);
+ do {
+ hashent = &vcache->hash[i];
+ if ( hashent->idx != MAPHASHENT_NOTINUSE && !hashent->refcnt )
+ {
+ idx = hashent->idx;
+ ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, idx)) ==
+ hashent->mfn);
+ l1e_write(&DCACHE_L1ENT(dcache, idx), l1e_empty());
+ hashent->idx = MAPHASHENT_NOTINUSE;
+ hashent->mfn = ~0UL;
+ break;
+ }
+ if ( ++i == MAPHASH_ENTRIES )
+ i = 0;
+ } while ( i != MAPHASH_HASHFN(mfn) );
+ }
+ BUG_ON(idx >= dcache->entries);
+
+ /* /Second/, flush TLBs. */
+ perfc_incr(domain_page_tlb_flush);
+ flush_tlb_local();
+ vcache->shadow_epoch = ++dcache->epoch;
+ dcache->tlbflush_timestamp = tlbflush_current_time();
+ }
+
+ set_bit(idx, dcache->inuse);
+ dcache->cursor = idx + 1;
+
+ spin_unlock(&dcache->lock);
+
+ l1e_write(&DCACHE_L1ENT(dcache, idx),
+ l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
+
+ out:
+ local_irq_restore(flags);
+ return (void *)MAPCACHE_VIRT_START + pfn_to_paddr(idx);
+}
+
+void unmap_domain_page(const void *ptr)
+{
+ unsigned int idx;
+ struct vcpu *v;
+ struct mapcache_domain *dcache;
+ unsigned long va = (unsigned long)ptr, mfn, flags;
+ struct vcpu_maphash_entry *hashent;
+
+ if ( va >= DIRECTMAP_VIRT_START )
+ return;
+
+ ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
+
+ v = mapcache_current_vcpu();
+ ASSERT(v && !is_hvm_vcpu(v));
+
+ dcache = &v->domain->arch.pv_domain.mapcache;
+ ASSERT(dcache->l1tab);
+
+ idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
+ mfn = l1e_get_pfn(DCACHE_L1ENT(dcache, idx));
+ hashent = &v->arch.pv_vcpu.mapcache.hash[MAPHASH_HASHFN(mfn)];
+
+ local_irq_save(flags);
+
+ if ( hashent->idx == idx )
+ {
+ ASSERT(hashent->mfn == mfn);
+ ASSERT(hashent->refcnt);
+ hashent->refcnt--;
+ }
+ else if ( !hashent->refcnt )
+ {
+ if ( hashent->idx != MAPHASHENT_NOTINUSE )
+ {
+ /* /First/, zap the PTE. */
+ ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, hashent->idx)) ==
+ hashent->mfn);
+ l1e_write(&DCACHE_L1ENT(dcache, hashent->idx), l1e_empty());
+ /* /Second/, mark as garbage. */
+ set_bit(hashent->idx, dcache->garbage);
+ }
+
+ /* Add newly-freed mapping to the maphash. */
+ hashent->mfn = mfn;
+ hashent->idx = idx;
+ }
+ else
+ {
+ /* /First/, zap the PTE. */
+ l1e_write(&DCACHE_L1ENT(dcache, idx), l1e_empty());
+ /* /Second/, mark as garbage. */
+ set_bit(idx, dcache->garbage);
+ }
+
+ local_irq_restore(flags);
+}
+
+void clear_domain_page(unsigned long mfn)
+{
+ void *ptr = map_domain_page(mfn);
+
+ clear_page(ptr);
+ unmap_domain_page(ptr);
+}
+
+void copy_domain_page(unsigned long dmfn, unsigned long smfn)
+{
+ const void *src = map_domain_page(smfn);
+ void *dst = map_domain_page(dmfn);
+
+ copy_page(dst, src);
+ unmap_domain_page(dst);
+ unmap_domain_page(src);
+}
+
+int mapcache_domain_init(struct domain *d)
+{
+ struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
+ unsigned int i, bitmap_pages, memf = MEMF_node(domain_to_node(d));
+ unsigned long *end;
+
+ if ( is_hvm_domain(d) || is_idle_domain(d) )
+ return 0;
+
+ if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
+ return 0;
+
+ dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + 1);
+ d->arch.mm_perdomain_l2[MAPCACHE_SLOT] = alloc_xenheap_pages(0, memf);
+ if ( !dcache->l1tab || !d->arch.mm_perdomain_l2[MAPCACHE_SLOT] )
+ return -ENOMEM;
+
+ clear_page(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]);
+ d->arch.mm_perdomain_l3[l3_table_offset(MAPCACHE_VIRT_START)] =
+ l3e_from_paddr(__pa(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]),
+ __PAGE_HYPERVISOR);
+
+ BUILD_BUG_ON(MAPCACHE_VIRT_END + 3 +
+ 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long)) >
+ MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
+ bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
+ dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
+ dcache->garbage = dcache->inuse +
+ (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
+ end = dcache->garbage + bitmap_pages * PAGE_SIZE / sizeof(long);
+
+ for ( i = l2_table_offset((unsigned long)dcache->inuse);
+ i <= l2_table_offset((unsigned long)(end - 1)); ++i )
+ {
+ ASSERT(i <= MAPCACHE_L2_ENTRIES);
+ dcache->l1tab[i] = alloc_xenheap_pages(0, memf);
+ if ( !dcache->l1tab[i] )
+ return -ENOMEM;
+ clear_page(dcache->l1tab[i]);
+ d->arch.mm_perdomain_l2[MAPCACHE_SLOT][i] =
+ l2e_from_paddr(__pa(dcache->l1tab[i]), __PAGE_HYPERVISOR);
+ }
+
+ spin_lock_init(&dcache->lock);
+
+ return 0;
+}
+
+void mapcache_domain_exit(struct domain *d)
+{
+ struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
+
+ if ( is_hvm_domain(d) )
+ return;
+
+ if ( dcache->l1tab )
+ {
+ unsigned long i;
+
+ for ( i = (unsigned long)dcache->inuse; ; i += PAGE_SIZE )
+ {
+ l1_pgentry_t *pl1e;
+
+ if ( l2_table_offset(i) > MAPCACHE_L2_ENTRIES ||
+ !dcache->l1tab[l2_table_offset(i)] )
+ break;
+
+ pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)];
+ if ( l1e_get_flags(*pl1e) )
+ free_domheap_page(l1e_get_page(*pl1e));
+ }
+
+ for ( i = 0; i < MAPCACHE_L2_ENTRIES + 1; ++i )
+ free_xenheap_page(dcache->l1tab[i]);
+
+ xfree(dcache->l1tab);
+ }
+ free_xenheap_page(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]);
+}
+
+int mapcache_vcpu_init(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
+ unsigned long i;
+ unsigned int memf = MEMF_node(vcpu_to_node(v));
+
+ if ( is_hvm_vcpu(v) || !dcache->l1tab )
+ return 0;
+
+ while ( dcache->entries < d->max_vcpus * MAPCACHE_VCPU_ENTRIES )
+ {
+ unsigned int ents = dcache->entries + MAPCACHE_VCPU_ENTRIES;
+ l1_pgentry_t *pl1e;
+
+ /* Populate page tables. */
+ if ( !dcache->l1tab[i = mapcache_l2_entry(ents - 1)] )
+ {
+ dcache->l1tab[i] = alloc_xenheap_pages(0, memf);
+ if ( !dcache->l1tab[i] )
+ return -ENOMEM;
+ clear_page(dcache->l1tab[i]);
+ d->arch.mm_perdomain_l2[MAPCACHE_SLOT][i] =
+ l2e_from_paddr(__pa(dcache->l1tab[i]), __PAGE_HYPERVISOR);
+ }
+
+ /* Populate bit maps. */
+ i = (unsigned long)(dcache->inuse + BITS_TO_LONGS(ents));
+ pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)];
+ if ( !l1e_get_flags(*pl1e) )
+ {
+ struct page_info *pg = alloc_domheap_page(NULL, memf);
+
+ if ( !pg )
+ return -ENOMEM;
+ clear_domain_page(page_to_mfn(pg));
+ *pl1e = l1e_from_page(pg, __PAGE_HYPERVISOR);
+
+ i = (unsigned long)(dcache->garbage + BITS_TO_LONGS(ents));
+ pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)];
+ ASSERT(!l1e_get_flags(*pl1e));
+
+ pg = alloc_domheap_page(NULL, memf);
+ if ( !pg )
+ return -ENOMEM;
+ clear_domain_page(page_to_mfn(pg));
+ *pl1e = l1e_from_page(pg, __PAGE_HYPERVISOR);
+ }
+
+ dcache->entries = ents;
+ }
+
+ /* Mark all maphash entries as not in use. */
+ BUILD_BUG_ON(MAPHASHENT_NOTINUSE < MAPCACHE_ENTRIES);
+ for ( i = 0; i < MAPHASH_ENTRIES; i++ )
+ {
+ struct vcpu_maphash_entry *hashent = &v->arch.pv_vcpu.mapcache.hash[i];
+
+ hashent->mfn = ~0UL; /* never valid to map */
+ hashent->idx = MAPHASHENT_NOTINUSE;
+ }
+
+ return 0;
+}
+
+#define GLOBALMAP_BITS (GLOBALMAP_GBYTES << (30 - PAGE_SHIFT))
+static unsigned long inuse[BITS_TO_LONGS(GLOBALMAP_BITS)];
+static unsigned long garbage[BITS_TO_LONGS(GLOBALMAP_BITS)];
+static unsigned int inuse_cursor;
+static DEFINE_SPINLOCK(globalmap_lock);
+
+void *map_domain_page_global(unsigned long mfn)
+{
+ l1_pgentry_t *pl1e;
+ unsigned int idx, i;
+ unsigned long va;
+
+ ASSERT(!in_irq() && local_irq_is_enabled());
+
+ if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
+ return mfn_to_virt(mfn);
+
+ spin_lock(&globalmap_lock);
+
+ idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor);
+ va = GLOBALMAP_VIRT_START + pfn_to_paddr(idx);
+ if ( unlikely(va >= GLOBALMAP_VIRT_END) )
+ {
+ /* /First/, clean the garbage map and update the inuse list. */
+ for ( i = 0; i < ARRAY_SIZE(garbage); i++ )
+ inuse[i] &= ~xchg(&garbage[i], 0);
+
+ /* /Second/, flush all TLBs to get rid of stale garbage mappings. */
+ flush_tlb_all();
+
+ idx = find_first_zero_bit(inuse, GLOBALMAP_BITS);
+ va = GLOBALMAP_VIRT_START + pfn_to_paddr(idx);
+ if ( unlikely(va >= GLOBALMAP_VIRT_END) )
+ {
+ spin_unlock(&globalmap_lock);
+ return NULL;
+ }
+ }
+
+ set_bit(idx, inuse);
+ inuse_cursor = idx + 1;
+
+ spin_unlock(&globalmap_lock);
+
+ pl1e = virt_to_xen_l1e(va);
+ if ( !pl1e )
+ return NULL;
+ l1e_write(pl1e, l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
+
+ return (void *)va;
+}
+
+void unmap_domain_page_global(const void *ptr)
+{
+ unsigned long va = (unsigned long)ptr;
+ l1_pgentry_t *pl1e;
+
+ if ( va >= DIRECTMAP_VIRT_START )
+ return;
+
+ ASSERT(va >= GLOBALMAP_VIRT_START && va < GLOBALMAP_VIRT_END);
+
+ /* /First/, we zap the PTE. */
+ pl1e = virt_to_xen_l1e(va);
+ BUG_ON(!pl1e);
+ l1e_write(pl1e, l1e_empty());
+
+ /* /Second/, we add to the garbage map. */
+ set_bit(PFN_DOWN(va - GLOBALMAP_VIRT_START), garbage);
+}
+
+/* Translate a map-domain-page'd address to the underlying MFN */
+unsigned long domain_page_map_to_mfn(const void *ptr)
+{
+ unsigned long va = (unsigned long)ptr;
+ const l1_pgentry_t *pl1e;
+
+ if ( va >= DIRECTMAP_VIRT_START )
+ return virt_to_mfn(ptr);
+
+ if ( va >= GLOBALMAP_VIRT_START && va < GLOBALMAP_VIRT_END )
+ {
+ pl1e = virt_to_xen_l1e(va);
+ BUG_ON(!pl1e);
+ }
+ else
+ {
+ ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
+ pl1e = &__linear_l1_table[l1_linear_offset(va)];
+ }
+
+ return l1e_get_pfn(*pl1e);
+}
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index e043ba10b3..9627b076c6 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -2661,9 +2661,6 @@ static inline int vcpumask_to_pcpumask(
}
}
-#define fixmap_domain_page(mfn) mfn_to_virt(mfn)
-#define fixunmap_domain_page(ptr) ((void)(ptr))
-
long do_mmuext_op(
XEN_GUEST_HANDLE_PARAM(mmuext_op_t) uops,
unsigned int count,
@@ -2983,7 +2980,6 @@ long do_mmuext_op(
case MMUEXT_CLEAR_PAGE: {
struct page_info *page;
- unsigned char *ptr;
page = get_page_from_gfn(d, op.arg1.mfn, NULL, P2M_ALLOC);
if ( !page || !get_page_type(page, PGT_writable_page) )
@@ -2998,9 +2994,7 @@ long do_mmuext_op(
/* A page is dirtied when it's being cleared. */
paging_mark_dirty(d, page_to_mfn(page));
- ptr = fixmap_domain_page(page_to_mfn(page));
- clear_page(ptr);
- fixunmap_domain_page(ptr);
+ clear_domain_page(page_to_mfn(page));
put_page_and_type(page);
break;
@@ -3008,8 +3002,6 @@ long do_mmuext_op(
case MMUEXT_COPY_PAGE:
{
- const unsigned char *src;
- unsigned char *dst;
struct page_info *src_page, *dst_page;
src_page = get_page_from_gfn(d, op.arg2.src_mfn, NULL, P2M_ALLOC);
@@ -3034,11 +3026,7 @@ long do_mmuext_op(
/* A page is dirtied when it's being copied to. */
paging_mark_dirty(d, page_to_mfn(dst_page));
- src = __map_domain_page(src_page);
- dst = fixmap_domain_page(page_to_mfn(dst_page));
- copy_page(dst, src);
- fixunmap_domain_page(dst);
- unmap_domain_page(src);
+ copy_domain_page(page_to_mfn(dst_page), page_to_mfn(src_page));
put_page_and_type(dst_page);
put_page(src_page);
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index fad918918c..3a4223952d 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -27,6 +27,7 @@
#define CONFIG_DISCONTIGMEM 1
#define CONFIG_NUMA_EMU 1
#define CONFIG_PAGEALLOC_MAX_ORDER (2 * PAGETABLE_ORDER)
+#define CONFIG_DOMAIN_PAGE 1
/* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */
#define CONFIG_X86_L1_CACHE_SHIFT 7
@@ -147,12 +148,14 @@ extern unsigned char boot_edid_info[128];
* 0xffff82c000000000 - 0xffff82c3ffffffff [16GB, 2^34 bytes, PML4:261]
* vmap()/ioremap()/fixmap area.
* 0xffff82c400000000 - 0xffff82c43fffffff [1GB, 2^30 bytes, PML4:261]
- * Compatibility machine-to-phys translation table.
+ * Global domain page map area.
* 0xffff82c440000000 - 0xffff82c47fffffff [1GB, 2^30 bytes, PML4:261]
- * High read-only compatibility machine-to-phys translation table.
+ * Compatibility machine-to-phys translation table.
* 0xffff82c480000000 - 0xffff82c4bfffffff [1GB, 2^30 bytes, PML4:261]
+ * High read-only compatibility machine-to-phys translation table.
+ * 0xffff82c4c0000000 - 0xffff82c4ffffffff [1GB, 2^30 bytes, PML4:261]
* Xen text, static data, bss.
- * 0xffff82c4c0000000 - 0xffff82dffbffffff [109GB - 64MB, PML4:261]
+ * 0xffff82c500000000 - 0xffff82dffbffffff [108GB - 64MB, PML4:261]
* Reserved for future use.
* 0xffff82dffc000000 - 0xffff82dfffffffff [64MB, 2^26 bytes, PML4:261]
* Super-page information array.
@@ -201,18 +204,24 @@ extern unsigned char boot_edid_info[128];
/* Slot 259: linear page table (shadow table). */
#define SH_LINEAR_PT_VIRT_START (PML4_ADDR(259))
#define SH_LINEAR_PT_VIRT_END (SH_LINEAR_PT_VIRT_START + PML4_ENTRY_BYTES)
-/* Slot 260: per-domain mappings. */
+/* Slot 260: per-domain mappings (including map cache). */
#define PERDOMAIN_VIRT_START (PML4_ADDR(260))
-#define PERDOMAIN_VIRT_END (PERDOMAIN_VIRT_START + (PERDOMAIN_MBYTES<<20))
-#define PERDOMAIN_MBYTES (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
+#define PERDOMAIN_SLOT_MBYTES (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
+#define PERDOMAIN_SLOTS 2
+#define PERDOMAIN_VIRT_SLOT(s) (PERDOMAIN_VIRT_START + (s) * \
+ (PERDOMAIN_SLOT_MBYTES << 20))
/* Slot 261: machine-to-phys conversion table (256GB). */
#define RDWR_MPT_VIRT_START (PML4_ADDR(261))
#define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + MPT_VIRT_SIZE)
/* Slot 261: vmap()/ioremap()/fixmap area (16GB). */
#define VMAP_VIRT_START RDWR_MPT_VIRT_END
#define VMAP_VIRT_END (VMAP_VIRT_START + GB(16))
+/* Slot 261: global domain page map area (1GB). */
+#define GLOBALMAP_GBYTES 1
+#define GLOBALMAP_VIRT_START VMAP_VIRT_END
+#define GLOBALMAP_VIRT_END (GLOBALMAP_VIRT_START + (GLOBALMAP_GBYTES<<30))
/* Slot 261: compatibility machine-to-phys conversion table (1GB). */
-#define RDWR_COMPAT_MPT_VIRT_START VMAP_VIRT_END
+#define RDWR_COMPAT_MPT_VIRT_START GLOBALMAP_VIRT_END
#define RDWR_COMPAT_MPT_VIRT_END (RDWR_COMPAT_MPT_VIRT_START + GB(1))
/* Slot 261: high read-only compat machine-to-phys conversion table (1GB). */
#define HIRO_COMPAT_MPT_VIRT_START RDWR_COMPAT_MPT_VIRT_END
@@ -279,9 +288,9 @@ extern unsigned long xen_phys_start;
/* GDT/LDT shadow mapping area. The first per-domain-mapping sub-area. */
#define GDT_LDT_VCPU_SHIFT 5
#define GDT_LDT_VCPU_VA_SHIFT (GDT_LDT_VCPU_SHIFT + PAGE_SHIFT)
-#define GDT_LDT_MBYTES PERDOMAIN_MBYTES
+#define GDT_LDT_MBYTES PERDOMAIN_SLOT_MBYTES
#define MAX_VIRT_CPUS (GDT_LDT_MBYTES << (20-GDT_LDT_VCPU_VA_SHIFT))
-#define GDT_LDT_VIRT_START PERDOMAIN_VIRT_START
+#define GDT_LDT_VIRT_START PERDOMAIN_VIRT_SLOT(0)
#define GDT_LDT_VIRT_END (GDT_LDT_VIRT_START + (GDT_LDT_MBYTES << 20))
/* The address of a particular VCPU's GDT or LDT. */
@@ -290,8 +299,16 @@ extern unsigned long xen_phys_start;
#define LDT_VIRT_START(v) \
(GDT_VIRT_START(v) + (64*1024))
+/* map_domain_page() map cache. The last per-domain-mapping sub-area. */
+#define MAPCACHE_VCPU_ENTRIES (CONFIG_PAGING_LEVELS * CONFIG_PAGING_LEVELS)
+#define MAPCACHE_ENTRIES (MAX_VIRT_CPUS * MAPCACHE_VCPU_ENTRIES)
+#define MAPCACHE_SLOT (PERDOMAIN_SLOTS - 1)
+#define MAPCACHE_VIRT_START PERDOMAIN_VIRT_SLOT(MAPCACHE_SLOT)
+#define MAPCACHE_VIRT_END (MAPCACHE_VIRT_START + \
+ MAPCACHE_ENTRIES * PAGE_SIZE)
+
#define PDPT_L1_ENTRIES \
- ((PERDOMAIN_VIRT_END - PERDOMAIN_VIRT_START) >> PAGE_SHIFT)
+ ((PERDOMAIN_VIRT_SLOT(PERDOMAIN_SLOTS - 1) - PERDOMAIN_VIRT_START) >> PAGE_SHIFT)
#define PDPT_L2_ENTRIES \
((PDPT_L1_ENTRIES + (1 << PAGETABLE_ORDER) - 1) >> PAGETABLE_ORDER)
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 1b2a770dae..03d9e5a5ca 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -39,7 +39,7 @@ struct trap_bounce {
#define MAPHASH_ENTRIES 8
#define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1))
-#define MAPHASHENT_NOTINUSE ((u16)~0U)
+#define MAPHASHENT_NOTINUSE ((u32)~0U)
struct mapcache_vcpu {
/* Shadow of mapcache_domain.epoch. */
unsigned int shadow_epoch;
@@ -47,16 +47,15 @@ struct mapcache_vcpu {
/* Lock-free per-VCPU hash of recently-used mappings. */
struct vcpu_maphash_entry {
unsigned long mfn;
- uint16_t idx;
- uint16_t refcnt;
+ uint32_t idx;
+ uint32_t refcnt;
} hash[MAPHASH_ENTRIES];
};
-#define MAPCACHE_ORDER 10
-#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
struct mapcache_domain {
/* The PTEs that provide the mappings, and a cursor into the array. */
- l1_pgentry_t *l1tab;
+ l1_pgentry_t **l1tab;
+ unsigned int entries;
unsigned int cursor;
/* Protects map_domain_page(). */
@@ -67,12 +66,13 @@ struct mapcache_domain {
u32 tlbflush_timestamp;
/* Which mappings are in use, and which are garbage to reap next epoch? */
- unsigned long inuse[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
- unsigned long garbage[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
+ unsigned long *inuse;
+ unsigned long *garbage;
};
-void mapcache_domain_init(struct domain *);
-void mapcache_vcpu_init(struct vcpu *);
+int mapcache_domain_init(struct domain *);
+void mapcache_domain_exit(struct domain *);
+int mapcache_vcpu_init(struct vcpu *);
/* x86/64: toggle guest between kernel and user modes. */
void toggle_guest_mode(struct vcpu *);
@@ -229,6 +229,9 @@ struct pv_domain
* unmask the event channel */
bool_t auto_unmask;
+ /* map_domain_page() mapping cache. */
+ struct mapcache_domain mapcache;
+
/* Pseudophysical e820 map (XENMEM_memory_map). */
spinlock_t e820_lock;
struct e820entry *e820;
@@ -238,7 +241,7 @@ struct pv_domain
struct arch_domain
{
struct page_info **mm_perdomain_pt_pages;
- l2_pgentry_t *mm_perdomain_l2;
+ l2_pgentry_t *mm_perdomain_l2[PERDOMAIN_SLOTS];
l3_pgentry_t *mm_perdomain_l3;
unsigned int hv_compat_vstart;
@@ -324,6 +327,9 @@ struct arch_domain
struct pv_vcpu
{
+ /* map_domain_page() mapping cache. */
+ struct mapcache_vcpu mapcache;
+
struct trap_info *trap_ctxt;
unsigned long gdt_frames[FIRST_RESERVED_GDT_PAGE];
diff --git a/xen/include/xen/domain_page.h b/xen/include/xen/domain_page.h
index 9feddae84c..b7a710bc16 100644
--- a/xen/include/xen/domain_page.h
+++ b/xen/include/xen/domain_page.h
@@ -25,11 +25,16 @@ void *map_domain_page(unsigned long mfn);
*/
void unmap_domain_page(const void *va);
+/*
+ * Clear a given page frame, or copy between two of them.
+ */
+void clear_domain_page(unsigned long mfn);
+void copy_domain_page(unsigned long dmfn, unsigned long smfn);
/*
* Given a VA from map_domain_page(), return its underlying MFN.
*/
-unsigned long domain_page_map_to_mfn(void *va);
+unsigned long domain_page_map_to_mfn(const void *va);
/*
* Similar to the above calls, except the mapping is accessible in all
@@ -107,6 +112,9 @@ domain_mmap_cache_destroy(struct domain_mmap_cache *cache)
#define map_domain_page(mfn) mfn_to_virt(mfn)
#define __map_domain_page(pg) page_to_virt(pg)
#define unmap_domain_page(va) ((void)(va))
+#define clear_domain_page(mfn) clear_page(mfn_to_virt(mfn))
+#define copy_domain_page(dmfn, smfn) copy_page(mfn_to_virt(dmfn), \
+ mfn_to_virt(smfn))
#define domain_page_map_to_mfn(va) virt_to_mfn((unsigned long)(va))
#define map_domain_page_global(mfn) mfn_to_virt(mfn)