aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeorge Dunlap <george.dunlap@eu.citrix.com>2011-01-21 15:37:36 +0000
committerGeorge Dunlap <george.dunlap@eu.citrix.com>2011-01-21 15:37:36 +0000
commitcf956b11ce576b4f54dbf2ca3f11717c0960431d (patch)
tree19a4a2bc29b996724e1271124f7ab56d087491d4
parentc856ec5f96dd41e1d23528f6051ae6c29f49c716 (diff)
downloadxen-cf956b11ce576b4f54dbf2ca3f11717c0960431d.tar.gz
xen-cf956b11ce576b4f54dbf2ca3f11717c0960431d.tar.bz2
xen-cf956b11ce576b4f54dbf2ca3f11717c0960431d.zip
p2m: Allow non-leaf entries to be replaced by leaf entries
Allow l2 and l3 p2m tables to be replaced with 2MB and 1GB pages respectively, freeing the p2m table page properly. This allows, for example, a sequence of 512 singleton zero pages to be replaced with a superpage populate-on-demand entry. Changes: * Add a p2m_free_ptp() corresponding to p2m_alloc_ptp(), which will handle everything related to the freeing properly. * Add p2m_free_entry(), based on ept_free_entry(), which will free intermediate tables recursively. * For both ept and p2m, when replacing non-leaf entries with leaf entries, keep old entry and call *_free_entry() after new entry has been written and proper flushes have been done. Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com> Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>
-rw-r--r--xen/arch/x86/mm/hap/hap.c8
-rw-r--r--xen/arch/x86/mm/hap/p2m-ept.c26
-rw-r--r--xen/arch/x86/mm/p2m.c61
-rw-r--r--xen/include/asm-x86/p2m.h1
4 files changed, 80 insertions, 16 deletions
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index 42bc674962..8d1bc70efa 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -333,9 +333,11 @@ static void hap_free_p2m_page(struct domain *d, struct page_info *pg)
ASSERT(page_get_owner(pg) == d);
/* Should have just the one ref we gave it in alloc_p2m_page() */
- if ( (pg->count_info & PGC_count_mask) != 1 )
- HAP_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
- pg->count_info, pg->u.inuse.type_info);
+ if ( (pg->count_info & PGC_count_mask) != 1 ) {
+ HAP_ERROR("Odd p2m page %p count c=%#lx t=%"PRtype_info"\n",
+ pg, pg->count_info, pg->u.inuse.type_info);
+ WARN();
+ }
pg->count_info &= ~PGC_count_mask;
/* Free should not decrement domain's total allocation, since
* these pages were allocated without an owner. */
diff --git a/xen/arch/x86/mm/hap/p2m-ept.c b/xen/arch/x86/mm/hap/p2m-ept.c
index fe1d5cd168..49f379a2ef 100644
--- a/xen/arch/x86/mm/hap/p2m-ept.c
+++ b/xen/arch/x86/mm/hap/p2m-ept.c
@@ -166,8 +166,6 @@ static int ept_set_middle_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry)
/* free ept sub tree behind an entry */
void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level)
{
- struct domain *d = p2m->domain;
-
/* End if the entry is a leaf entry. */
if ( level == 0 || !is_epte_present(ept_entry) ||
is_epte_superpage(ept_entry) )
@@ -180,8 +178,8 @@ void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level)
ept_free_entry(p2m, epte + i, level - 1);
unmap_domain_page(epte);
}
-
- d->arch.paging.free_page(d, mfn_to_page(ept_entry->mfn));
+
+ p2m_free_ptp(p2m, mfn_to_page(ept_entry->mfn));
}
static int ept_split_super_page(struct p2m_domain *p2m, ept_entry_t *ept_entry,
@@ -317,6 +315,7 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
int vtd_pte_present = 0;
int needs_sync = 1;
struct domain *d = p2m->domain;
+ ept_entry_t old_entry = { .epte = 0 };
/*
* the caller must make sure:
@@ -357,8 +356,12 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
vtd_pte_present = is_epte_present(ept_entry) ? 1 : 0;
/*
- * When we are here, we must be on a leaf ept entry
- * with i == target or i > target.
+ * If we're here with i > target, we must be at a leaf node, and
+ * we need to break up the superpage.
+ *
+ * If we're here with i == target and i > 0, we need to check to see
+ * if we're replacing a non-leaf entry (i.e., pointing to an N-1 table)
+ * with a leaf entry (a 1GiB or 2MiB page), and handle things appropriately.
*/
if ( i == target )
@@ -370,6 +373,10 @@ ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
if ( !is_epte_present(ept_entry) )
needs_sync = 0;
+ /* If we're replacing a non-leaf entry with a leaf entry (1GiB or 2MiB),
+ * the intermediate tables will be freed below after the ept flush */
+ old_entry = *ept_entry;
+
if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) ||
(p2mt == p2m_ram_paging_in_start) )
{
@@ -487,6 +494,13 @@ out:
}
}
+ /* Release the old intermediate tables, if any. This has to be the
+ last thing we do, after the ept_sync_domain() and removal
+ from the iommu tables, so as to avoid a potential
+ use-after-free. */
+ if ( is_epte_present(&old_entry) )
+ ept_free_entry(p2m, &old_entry, target);
+
return rv;
}
diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c
index 3d54beb14d..6d9e8ac52a 100644
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -153,11 +153,45 @@ p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type)
page_list_add_tail(pg, &p2m->pages);
pg->u.inuse.type_info = type | 1 | PGT_validated;
- pg->count_info |= 1;
return pg;
}
+void
+p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg)
+{
+ ASSERT(pg);
+ ASSERT(p2m);
+ ASSERT(p2m->domain);
+ ASSERT(p2m->domain->arch.paging.free_page);
+
+ page_list_del(pg, &p2m->pages);
+ p2m->domain->arch.paging.free_page(p2m->domain, pg);
+
+ return;
+}
+
+/* Free intermediate tables from a p2m sub-tree */
+void
+p2m_free_entry(struct p2m_domain *p2m, l1_pgentry_t *p2m_entry, int page_order)
+{
+ /* End if the entry is a leaf entry. */
+ if ( page_order == 0
+ || !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT)
+ || (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
+ return;
+
+ if ( page_order > 9 )
+ {
+ l1_pgentry_t *l3_table = map_domain_page(l1e_get_pfn(*p2m_entry));
+ for ( int i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+ p2m_free_entry(p2m, l3_table + i, page_order - 9);
+ unmap_domain_page(l3_table);
+ }
+
+ p2m_free_ptp(p2m, mfn_to_page(_mfn(l1e_get_pfn(*p2m_entry))));
+}
+
// Walk one level of the P2M table, allocating a new table if required.
// Returns 0 on error.
//
@@ -1316,6 +1350,7 @@ p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
*/
if ( page_order == 18 )
{
+ l1_pgentry_t old_entry = l1e_empty();
p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
L3_PAGETABLE_SHIFT - PAGE_SHIFT,
L3_PAGETABLE_ENTRIES);
@@ -1323,10 +1358,11 @@ p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
!(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
{
- P2M_ERROR("configure P2M table L3 entry with large page\n");
- domain_crash(p2m->domain);
- goto out;
+ /* We're replacing a non-SP page with a superpage. Make sure to
+ * handle freeing the table properly. */
+ old_entry = *p2m_entry;
}
+
ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
l3e_content = mfn_valid(mfn)
? l3e_from_pfn(mfn_x(mfn),
@@ -1335,7 +1371,11 @@ p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
entry_content.l1 = l3e_content.l3;
paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
table_mfn, entry_content, 3);
+ /* NB: paging_write_p2m_entry() handles tlb flushes properly */
+ /* Free old intermediate tables if necessary */
+ if ( l1e_get_flags(old_entry) & _PAGE_PRESENT )
+ p2m_free_entry(p2m, &old_entry, page_order);
}
/*
* When using PAE Xen, we only allow 33 bits of pseudo-physical
@@ -1372,9 +1412,11 @@ p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
/* level 1 entry */
paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
table_mfn, entry_content, 1);
+ /* NB: paging_write_p2m_entry() handles tlb flushes properly */
}
else if ( page_order == 9 )
{
+ l1_pgentry_t old_entry = l1e_empty();
p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
L2_PAGETABLE_SHIFT - PAGE_SHIFT,
L2_PAGETABLE_ENTRIES);
@@ -1384,9 +1426,9 @@ p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
!(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
{
- P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
- domain_crash(p2m->domain);
- goto out;
+ /* We're replacing a non-SP page with a superpage. Make sure to
+ * handle freeing the table properly. */
+ old_entry = *p2m_entry;
}
ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
@@ -1400,6 +1442,11 @@ p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn,
entry_content.l1 = l2e_content.l2;
paging_write_p2m_entry(p2m->domain, gfn, p2m_entry,
table_mfn, entry_content, 2);
+ /* NB: paging_write_p2m_entry() handles tlb flushes properly */
+
+ /* Free old intermediate tables if necessary */
+ if ( l1e_get_flags(old_entry) & _PAGE_PRESENT )
+ p2m_free_entry(p2m, &old_entry, page_order);
}
/* Track the highest gfn for which we have ever had a valid mapping */
diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
index dbd1e27d5a..2e6941e13e 100644
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -541,6 +541,7 @@ static inline void p2m_mem_access_check(unsigned long gpa, bool_t gla_valid,
#endif
struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type);
+void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg);
#endif /* _XEN_P2M_H */