aboutsummaryrefslogtreecommitdiffstats
path: root/xen/arch/x86/domain_page.c
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2013-01-23 14:06:20 +0100
committerJan Beulich <jbeulich@suse.com>2013-01-23 14:06:20 +0100
commit4b28bf6ae90bd83fd1113d8bdc53c3266ffeb328 (patch)
tree43489d3149200ddb5e03ad01309023727326ff30 /xen/arch/x86/domain_page.c
parenta8d2b06db7826063df9d04be9d6f928bf2189bd0 (diff)
downloadxen-4b28bf6ae90bd83fd1113d8bdc53c3266ffeb328.tar.gz
xen-4b28bf6ae90bd83fd1113d8bdc53c3266ffeb328.tar.bz2
xen-4b28bf6ae90bd83fd1113d8bdc53c3266ffeb328.zip
x86: re-introduce map_domain_page() et al
This is being done mostly in the form previously used on x86-32, utilizing the second L3 page table slot within the per-domain mapping area for those mappings. It remains to be determined whether that concept is really suitable, or whether instead re-implementing at least the non-global variant from scratch would be better. Also add the helpers {clear,copy}_domain_page() as well as initial uses of them. One question is whether, to exercise the non-trivial code paths, we shouldn't make the trivial shortcuts conditional upon NDEBUG being defined. See the debugging patch at the end of the series. Signed-off-by: Jan Beulich <jbeulich@suse.com> Acked-by: Keir Fraser <keir@xen.org>
Diffstat (limited to 'xen/arch/x86/domain_page.c')
-rw-r--r--xen/arch/x86/domain_page.c471
1 files changed, 471 insertions, 0 deletions
diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
new file mode 100644
index 0000000000..6a219947d5
--- /dev/null
+++ b/xen/arch/x86/domain_page.c
@@ -0,0 +1,471 @@
+/******************************************************************************
+ * domain_page.h
+ *
+ * Allow temporary mapping of domain pages.
+ *
+ * Copyright (c) 2003-2006, Keir Fraser <keir@xensource.com>
+ */
+
+#include <xen/domain_page.h>
+#include <xen/mm.h>
+#include <xen/perfc.h>
+#include <xen/pfn.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#include <asm/flushtlb.h>
+#include <asm/hardirq.h>
+
+static inline struct vcpu *mapcache_current_vcpu(void)
+{
+ /* In the common case we use the mapcache of the running VCPU. */
+ struct vcpu *v = current;
+
+ /*
+ * When current isn't properly set up yet, this is equivalent to
+ * running in an idle vCPU (callers must check for NULL).
+ */
+ if ( v == (struct vcpu *)0xfffff000 )
+ return NULL;
+
+ /*
+ * If guest_table is NULL, and we are running a paravirtualised guest,
+ * then it means we are running on the idle domain's page table and must
+ * therefore use its mapcache.
+ */
+ if ( unlikely(pagetable_is_null(v->arch.guest_table)) && !is_hvm_vcpu(v) )
+ {
+ /* If we really are idling, perform lazy context switch now. */
+ if ( (v = idle_vcpu[smp_processor_id()]) == current )
+ sync_local_execstate();
+ /* We must now be running on the idle page table. */
+ ASSERT(read_cr3() == __pa(idle_pg_table));
+ }
+
+ return v;
+}
+
+#define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER)
+#define MAPCACHE_L2_ENTRIES (mapcache_l2_entry(MAPCACHE_ENTRIES - 1) + 1)
+#define DCACHE_L1ENT(dc, idx) \
+ ((dc)->l1tab[(idx) >> PAGETABLE_ORDER] \
+ [(idx) & ((1 << PAGETABLE_ORDER) - 1)])
+
+void *map_domain_page(unsigned long mfn)
+{
+ unsigned long flags;
+ unsigned int idx, i;
+ struct vcpu *v;
+ struct mapcache_domain *dcache;
+ struct mapcache_vcpu *vcache;
+ struct vcpu_maphash_entry *hashent;
+
+ if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
+ return mfn_to_virt(mfn);
+
+ v = mapcache_current_vcpu();
+ if ( !v || is_hvm_vcpu(v) )
+ return mfn_to_virt(mfn);
+
+ dcache = &v->domain->arch.pv_domain.mapcache;
+ vcache = &v->arch.pv_vcpu.mapcache;
+ if ( !dcache->l1tab )
+ return mfn_to_virt(mfn);
+
+ perfc_incr(map_domain_page_count);
+
+ local_irq_save(flags);
+
+ hashent = &vcache->hash[MAPHASH_HASHFN(mfn)];
+ if ( hashent->mfn == mfn )
+ {
+ idx = hashent->idx;
+ ASSERT(idx < dcache->entries);
+ hashent->refcnt++;
+ ASSERT(hashent->refcnt);
+ ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, idx)) == mfn);
+ goto out;
+ }
+
+ spin_lock(&dcache->lock);
+
+ /* Has some other CPU caused a wrap? We must flush if so. */
+ if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
+ {
+ vcache->shadow_epoch = dcache->epoch;
+ if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) )
+ {
+ perfc_incr(domain_page_tlb_flush);
+ flush_tlb_local();
+ }
+ }
+
+ idx = find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cursor);
+ if ( unlikely(idx >= dcache->entries) )
+ {
+ unsigned long accum = 0;
+
+ /* /First/, clean the garbage map and update the inuse list. */
+ for ( i = 0; i < BITS_TO_LONGS(dcache->entries); i++ )
+ {
+ dcache->inuse[i] &= ~xchg(&dcache->garbage[i], 0);
+ accum |= ~dcache->inuse[i];
+ }
+
+ if ( accum )
+ idx = find_first_zero_bit(dcache->inuse, dcache->entries);
+ else
+ {
+ /* Replace a hash entry instead. */
+ i = MAPHASH_HASHFN(mfn);
+ do {
+ hashent = &vcache->hash[i];
+ if ( hashent->idx != MAPHASHENT_NOTINUSE && !hashent->refcnt )
+ {
+ idx = hashent->idx;
+ ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, idx)) ==
+ hashent->mfn);
+ l1e_write(&DCACHE_L1ENT(dcache, idx), l1e_empty());
+ hashent->idx = MAPHASHENT_NOTINUSE;
+ hashent->mfn = ~0UL;
+ break;
+ }
+ if ( ++i == MAPHASH_ENTRIES )
+ i = 0;
+ } while ( i != MAPHASH_HASHFN(mfn) );
+ }
+ BUG_ON(idx >= dcache->entries);
+
+ /* /Second/, flush TLBs. */
+ perfc_incr(domain_page_tlb_flush);
+ flush_tlb_local();
+ vcache->shadow_epoch = ++dcache->epoch;
+ dcache->tlbflush_timestamp = tlbflush_current_time();
+ }
+
+ set_bit(idx, dcache->inuse);
+ dcache->cursor = idx + 1;
+
+ spin_unlock(&dcache->lock);
+
+ l1e_write(&DCACHE_L1ENT(dcache, idx),
+ l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
+
+ out:
+ local_irq_restore(flags);
+ return (void *)MAPCACHE_VIRT_START + pfn_to_paddr(idx);
+}
+
+void unmap_domain_page(const void *ptr)
+{
+ unsigned int idx;
+ struct vcpu *v;
+ struct mapcache_domain *dcache;
+ unsigned long va = (unsigned long)ptr, mfn, flags;
+ struct vcpu_maphash_entry *hashent;
+
+ if ( va >= DIRECTMAP_VIRT_START )
+ return;
+
+ ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
+
+ v = mapcache_current_vcpu();
+ ASSERT(v && !is_hvm_vcpu(v));
+
+ dcache = &v->domain->arch.pv_domain.mapcache;
+ ASSERT(dcache->l1tab);
+
+ idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
+ mfn = l1e_get_pfn(DCACHE_L1ENT(dcache, idx));
+ hashent = &v->arch.pv_vcpu.mapcache.hash[MAPHASH_HASHFN(mfn)];
+
+ local_irq_save(flags);
+
+ if ( hashent->idx == idx )
+ {
+ ASSERT(hashent->mfn == mfn);
+ ASSERT(hashent->refcnt);
+ hashent->refcnt--;
+ }
+ else if ( !hashent->refcnt )
+ {
+ if ( hashent->idx != MAPHASHENT_NOTINUSE )
+ {
+ /* /First/, zap the PTE. */
+ ASSERT(l1e_get_pfn(DCACHE_L1ENT(dcache, hashent->idx)) ==
+ hashent->mfn);
+ l1e_write(&DCACHE_L1ENT(dcache, hashent->idx), l1e_empty());
+ /* /Second/, mark as garbage. */
+ set_bit(hashent->idx, dcache->garbage);
+ }
+
+ /* Add newly-freed mapping to the maphash. */
+ hashent->mfn = mfn;
+ hashent->idx = idx;
+ }
+ else
+ {
+ /* /First/, zap the PTE. */
+ l1e_write(&DCACHE_L1ENT(dcache, idx), l1e_empty());
+ /* /Second/, mark as garbage. */
+ set_bit(idx, dcache->garbage);
+ }
+
+ local_irq_restore(flags);
+}
+
+void clear_domain_page(unsigned long mfn)
+{
+ void *ptr = map_domain_page(mfn);
+
+ clear_page(ptr);
+ unmap_domain_page(ptr);
+}
+
+void copy_domain_page(unsigned long dmfn, unsigned long smfn)
+{
+ const void *src = map_domain_page(smfn);
+ void *dst = map_domain_page(dmfn);
+
+ copy_page(dst, src);
+ unmap_domain_page(dst);
+ unmap_domain_page(src);
+}
+
+int mapcache_domain_init(struct domain *d)
+{
+ struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
+ unsigned int i, bitmap_pages, memf = MEMF_node(domain_to_node(d));
+ unsigned long *end;
+
+ if ( is_hvm_domain(d) || is_idle_domain(d) )
+ return 0;
+
+ if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
+ return 0;
+
+ dcache->l1tab = xzalloc_array(l1_pgentry_t *, MAPCACHE_L2_ENTRIES + 1);
+ d->arch.mm_perdomain_l2[MAPCACHE_SLOT] = alloc_xenheap_pages(0, memf);
+ if ( !dcache->l1tab || !d->arch.mm_perdomain_l2[MAPCACHE_SLOT] )
+ return -ENOMEM;
+
+ clear_page(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]);
+ d->arch.mm_perdomain_l3[l3_table_offset(MAPCACHE_VIRT_START)] =
+ l3e_from_paddr(__pa(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]),
+ __PAGE_HYPERVISOR);
+
+ BUILD_BUG_ON(MAPCACHE_VIRT_END + 3 +
+ 2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long)) >
+ MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
+ bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
+ dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
+ dcache->garbage = dcache->inuse +
+ (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
+ end = dcache->garbage + bitmap_pages * PAGE_SIZE / sizeof(long);
+
+ for ( i = l2_table_offset((unsigned long)dcache->inuse);
+ i <= l2_table_offset((unsigned long)(end - 1)); ++i )
+ {
+ ASSERT(i <= MAPCACHE_L2_ENTRIES);
+ dcache->l1tab[i] = alloc_xenheap_pages(0, memf);
+ if ( !dcache->l1tab[i] )
+ return -ENOMEM;
+ clear_page(dcache->l1tab[i]);
+ d->arch.mm_perdomain_l2[MAPCACHE_SLOT][i] =
+ l2e_from_paddr(__pa(dcache->l1tab[i]), __PAGE_HYPERVISOR);
+ }
+
+ spin_lock_init(&dcache->lock);
+
+ return 0;
+}
+
+void mapcache_domain_exit(struct domain *d)
+{
+ struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
+
+ if ( is_hvm_domain(d) )
+ return;
+
+ if ( dcache->l1tab )
+ {
+ unsigned long i;
+
+ for ( i = (unsigned long)dcache->inuse; ; i += PAGE_SIZE )
+ {
+ l1_pgentry_t *pl1e;
+
+ if ( l2_table_offset(i) > MAPCACHE_L2_ENTRIES ||
+ !dcache->l1tab[l2_table_offset(i)] )
+ break;
+
+ pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)];
+ if ( l1e_get_flags(*pl1e) )
+ free_domheap_page(l1e_get_page(*pl1e));
+ }
+
+ for ( i = 0; i < MAPCACHE_L2_ENTRIES + 1; ++i )
+ free_xenheap_page(dcache->l1tab[i]);
+
+ xfree(dcache->l1tab);
+ }
+ free_xenheap_page(d->arch.mm_perdomain_l2[MAPCACHE_SLOT]);
+}
+
+int mapcache_vcpu_init(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
+ unsigned long i;
+ unsigned int memf = MEMF_node(vcpu_to_node(v));
+
+ if ( is_hvm_vcpu(v) || !dcache->l1tab )
+ return 0;
+
+ while ( dcache->entries < d->max_vcpus * MAPCACHE_VCPU_ENTRIES )
+ {
+ unsigned int ents = dcache->entries + MAPCACHE_VCPU_ENTRIES;
+ l1_pgentry_t *pl1e;
+
+ /* Populate page tables. */
+ if ( !dcache->l1tab[i = mapcache_l2_entry(ents - 1)] )
+ {
+ dcache->l1tab[i] = alloc_xenheap_pages(0, memf);
+ if ( !dcache->l1tab[i] )
+ return -ENOMEM;
+ clear_page(dcache->l1tab[i]);
+ d->arch.mm_perdomain_l2[MAPCACHE_SLOT][i] =
+ l2e_from_paddr(__pa(dcache->l1tab[i]), __PAGE_HYPERVISOR);
+ }
+
+ /* Populate bit maps. */
+ i = (unsigned long)(dcache->inuse + BITS_TO_LONGS(ents));
+ pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)];
+ if ( !l1e_get_flags(*pl1e) )
+ {
+ struct page_info *pg = alloc_domheap_page(NULL, memf);
+
+ if ( !pg )
+ return -ENOMEM;
+ clear_domain_page(page_to_mfn(pg));
+ *pl1e = l1e_from_page(pg, __PAGE_HYPERVISOR);
+
+ i = (unsigned long)(dcache->garbage + BITS_TO_LONGS(ents));
+ pl1e = &dcache->l1tab[l2_table_offset(i)][l1_table_offset(i)];
+ ASSERT(!l1e_get_flags(*pl1e));
+
+ pg = alloc_domheap_page(NULL, memf);
+ if ( !pg )
+ return -ENOMEM;
+ clear_domain_page(page_to_mfn(pg));
+ *pl1e = l1e_from_page(pg, __PAGE_HYPERVISOR);
+ }
+
+ dcache->entries = ents;
+ }
+
+ /* Mark all maphash entries as not in use. */
+ BUILD_BUG_ON(MAPHASHENT_NOTINUSE < MAPCACHE_ENTRIES);
+ for ( i = 0; i < MAPHASH_ENTRIES; i++ )
+ {
+ struct vcpu_maphash_entry *hashent = &v->arch.pv_vcpu.mapcache.hash[i];
+
+ hashent->mfn = ~0UL; /* never valid to map */
+ hashent->idx = MAPHASHENT_NOTINUSE;
+ }
+
+ return 0;
+}
+
+#define GLOBALMAP_BITS (GLOBALMAP_GBYTES << (30 - PAGE_SHIFT))
+static unsigned long inuse[BITS_TO_LONGS(GLOBALMAP_BITS)];
+static unsigned long garbage[BITS_TO_LONGS(GLOBALMAP_BITS)];
+static unsigned int inuse_cursor;
+static DEFINE_SPINLOCK(globalmap_lock);
+
+void *map_domain_page_global(unsigned long mfn)
+{
+ l1_pgentry_t *pl1e;
+ unsigned int idx, i;
+ unsigned long va;
+
+ ASSERT(!in_irq() && local_irq_is_enabled());
+
+ if ( mfn <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
+ return mfn_to_virt(mfn);
+
+ spin_lock(&globalmap_lock);
+
+ idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor);
+ va = GLOBALMAP_VIRT_START + pfn_to_paddr(idx);
+ if ( unlikely(va >= GLOBALMAP_VIRT_END) )
+ {
+ /* /First/, clean the garbage map and update the inuse list. */
+ for ( i = 0; i < ARRAY_SIZE(garbage); i++ )
+ inuse[i] &= ~xchg(&garbage[i], 0);
+
+ /* /Second/, flush all TLBs to get rid of stale garbage mappings. */
+ flush_tlb_all();
+
+ idx = find_first_zero_bit(inuse, GLOBALMAP_BITS);
+ va = GLOBALMAP_VIRT_START + pfn_to_paddr(idx);
+ if ( unlikely(va >= GLOBALMAP_VIRT_END) )
+ {
+ spin_unlock(&globalmap_lock);
+ return NULL;
+ }
+ }
+
+ set_bit(idx, inuse);
+ inuse_cursor = idx + 1;
+
+ spin_unlock(&globalmap_lock);
+
+ pl1e = virt_to_xen_l1e(va);
+ if ( !pl1e )
+ return NULL;
+ l1e_write(pl1e, l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
+
+ return (void *)va;
+}
+
+void unmap_domain_page_global(const void *ptr)
+{
+ unsigned long va = (unsigned long)ptr;
+ l1_pgentry_t *pl1e;
+
+ if ( va >= DIRECTMAP_VIRT_START )
+ return;
+
+ ASSERT(va >= GLOBALMAP_VIRT_START && va < GLOBALMAP_VIRT_END);
+
+ /* /First/, we zap the PTE. */
+ pl1e = virt_to_xen_l1e(va);
+ BUG_ON(!pl1e);
+ l1e_write(pl1e, l1e_empty());
+
+ /* /Second/, we add to the garbage map. */
+ set_bit(PFN_DOWN(va - GLOBALMAP_VIRT_START), garbage);
+}
+
+/* Translate a map-domain-page'd address to the underlying MFN */
+unsigned long domain_page_map_to_mfn(const void *ptr)
+{
+ unsigned long va = (unsigned long)ptr;
+ const l1_pgentry_t *pl1e;
+
+ if ( va >= DIRECTMAP_VIRT_START )
+ return virt_to_mfn(ptr);
+
+ if ( va >= GLOBALMAP_VIRT_START && va < GLOBALMAP_VIRT_END )
+ {
+ pl1e = virt_to_xen_l1e(va);
+ BUG_ON(!pl1e);
+ }
+ else
+ {
+ ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
+ pl1e = &__linear_l1_table[l1_linear_offset(va)];
+ }
+
+ return l1e_get_pfn(*pl1e);
+}