aboutsummaryrefslogtreecommitdiffstats
path: root/xen/arch
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2009-12-17 06:27:56 +0000
committerKeir Fraser <keir.fraser@citrix.com>2009-12-17 06:27:56 +0000
commit3e3ccc354377859506ad948df84572813204c99c (patch)
treeafd84dd673b7dd062f83a15cdab332a84b6555a6 /xen/arch
parentaf909e7e16dd67452bde91bb71c8111c95c43983 (diff)
downloadxen-3e3ccc354377859506ad948df84572813204c99c.tar.gz
xen-3e3ccc354377859506ad948df84572813204c99c.tar.bz2
xen-3e3ccc354377859506ad948df84572813204c99c.zip
The internal Xen x86 emulator is fixed to handle shared/sharable pages corretly.
If pages cannot be unshared immediately (due to lack of free memory required to create private copies) the VCPU under emulation is paused, and the emulator returns X86EMUL_RETRY, which will get resolved after some memory is freed back to Xen (possibly through host paging). Signed-off-by: Grzegorz Milos <Grzegorz.Milos@citrix.com>
Diffstat (limited to 'xen/arch')
-rw-r--r--xen/arch/x86/hvm/emulate.c14
-rw-r--r--xen/arch/x86/hvm/hvm.c23
-rw-r--r--xen/arch/x86/hvm/intercept.c19
-rw-r--r--xen/arch/x86/hvm/io.c16
-rw-r--r--xen/arch/x86/mm/guest_walk.c92
-rw-r--r--xen/arch/x86/mm/hap/guest_walk.c18
6 files changed, 121 insertions, 61 deletions
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index a636c9bef6..88b1eb1946 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -62,12 +62,14 @@ static int hvmemul_do_io(
int rc;
/* Check for paged out page */
- ram_mfn = gfn_to_mfn(current->domain, ram_gfn, &p2mt);
+ ram_mfn = gfn_to_mfn_unshare(current->domain, ram_gfn, &p2mt, 0);
if ( p2m_is_paging(p2mt) )
{
p2m_mem_paging_populate(curr->domain, ram_gfn);
return X86EMUL_RETRY;
}
+ if ( p2m_is_shared(p2mt) )
+ return X86EMUL_RETRY;
/*
* Weird-sized accesses have undefined behaviour: we discard writes
@@ -282,7 +284,7 @@ static int hvmemul_linear_to_phys(
}
else if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == INVALID_GFN )
{
- if ( pfec == PFEC_page_paged )
+ if ( pfec == PFEC_page_paged || pfec == PFEC_page_shared )
return X86EMUL_RETRY;
hvm_inject_exception(TRAP_page_fault, pfec, addr);
return X86EMUL_EXCEPTION;
@@ -299,7 +301,7 @@ static int hvmemul_linear_to_phys(
/* Is it contiguous with the preceding PFNs? If not then we're done. */
if ( (npfn == INVALID_GFN) || (npfn != (pfn + (reverse ? -i : i))) )
{
- if ( pfec == PFEC_page_paged )
+ if ( pfec == PFEC_page_paged || pfec == PFEC_page_shared )
return X86EMUL_RETRY;
done /= bytes_per_rep;
if ( done == 0 )
@@ -441,6 +443,8 @@ static int __hvmemul_read(
return hvmemul_do_mmio(gpa, &reps, bytes, 0, IOREQ_READ, 0, p_data);
case HVMCOPY_gfn_paged_out:
return X86EMUL_RETRY;
+ case HVMCOPY_gfn_shared:
+ return X86EMUL_RETRY;
default:
break;
}
@@ -533,6 +537,8 @@ static int hvmemul_write(
IOREQ_WRITE, 0, p_data);
case HVMCOPY_gfn_paged_out:
return X86EMUL_RETRY;
+ case HVMCOPY_gfn_shared:
+ return X86EMUL_RETRY;
default:
break;
}
@@ -708,6 +714,8 @@ static int hvmemul_rep_movs(
if ( rc == HVMCOPY_gfn_paged_out )
return X86EMUL_RETRY;
+ if ( rc == HVMCOPY_gfn_shared )
+ return X86EMUL_RETRY;
if ( rc != HVMCOPY_okay )
{
gdprintk(XENLOG_WARNING, "Failed memory-to-memory REP MOVS: sgpa=%"
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 4fc1b93488..e18e94f349 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -311,7 +311,7 @@ static int hvm_set_ioreq_page(
unsigned long mfn;
void *va;
- mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
+ mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn, &p2mt, 0));
if ( !p2m_is_ram(p2mt) )
return -EINVAL;
if ( p2m_is_paging(p2mt) )
@@ -319,6 +319,8 @@ static int hvm_set_ioreq_page(
p2m_mem_paging_populate(d, gmfn);
return -ENOENT;
}
+ if ( p2m_is_shared(p2mt) )
+ return -ENOENT;
ASSERT(mfn_valid(mfn));
page = mfn_to_page(mfn);
@@ -1323,7 +1325,7 @@ static void *hvm_map_entry(unsigned long va)
* we still treat it as a kernel-mode read (i.e. no access checks). */
pfec = PFEC_page_present;
gfn = paging_gva_to_gfn(current, va, &pfec);
- if ( pfec == PFEC_page_paged )
+ if ( pfec == PFEC_page_paged || pfec == PFEC_page_shared )
return NULL;
mfn = mfn_x(gfn_to_mfn_unshare(current->domain, gfn, &p2mt, 0));
if ( p2m_is_paging(p2mt) )
@@ -1557,6 +1559,8 @@ void hvm_task_switch(
goto out;
if ( rc == HVMCOPY_gfn_paged_out )
goto out;
+ if ( rc == HVMCOPY_gfn_shared )
+ goto out;
eflags = regs->eflags;
if ( taskswitch_reason == TSW_iret )
@@ -1595,6 +1599,8 @@ void hvm_task_switch(
goto out;
if ( rc == HVMCOPY_gfn_paged_out )
goto out;
+ if ( rc == HVMCOPY_gfn_shared )
+ goto out;
rc = hvm_copy_from_guest_virt(
&tss, tr.base, sizeof(tss), PFEC_page_present);
@@ -1602,6 +1608,11 @@ void hvm_task_switch(
goto out;
if ( rc == HVMCOPY_gfn_paged_out )
goto out;
+ /* Note: this could be optimised, if the callee functions knew we want RO
+ * access */
+ if ( rc == HVMCOPY_gfn_shared )
+ goto out;
+
if ( hvm_set_cr3(tss.cr3) )
goto out;
@@ -1639,6 +1650,8 @@ void hvm_task_switch(
exn_raised = 1;
if ( rc == HVMCOPY_gfn_paged_out )
goto out;
+ if ( rc == HVMCOPY_gfn_shared )
+ goto out;
if ( (tss.trace & 1) && !exn_raised )
hvm_inject_exception(TRAP_debug, tss_sel & 0xfff8, 0);
@@ -1700,6 +1713,8 @@ static enum hvm_copy_result __hvm_copy(
{
if ( pfec == PFEC_page_paged )
return HVMCOPY_gfn_paged_out;
+ if ( pfec == PFEC_page_shared )
+ return HVMCOPY_gfn_shared;
if ( flags & HVMCOPY_fault )
hvm_inject_exception(TRAP_page_fault, pfec, addr);
return HVMCOPY_bad_gva_to_gfn;
@@ -1710,13 +1725,15 @@ static enum hvm_copy_result __hvm_copy(
gfn = addr >> PAGE_SHIFT;
}
- mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
+ mfn = mfn_x(gfn_to_mfn_unshare(current->domain, gfn, &p2mt, 0));
if ( p2m_is_paging(p2mt) )
{
p2m_mem_paging_populate(curr->domain, gfn);
return HVMCOPY_gfn_paged_out;
}
+ if ( p2m_is_shared(p2mt) )
+ return HVMCOPY_gfn_shared;
if ( p2m_is_grant(p2mt) )
return HVMCOPY_unhandleable;
if ( !p2m_is_ram(p2mt) )
diff --git a/xen/arch/x86/hvm/intercept.c b/xen/arch/x86/hvm/intercept.c
index 841dec8921..4af9e3d112 100644
--- a/xen/arch/x86/hvm/intercept.c
+++ b/xen/arch/x86/hvm/intercept.c
@@ -72,12 +72,17 @@ static int hvm_mmio_access(struct vcpu *v,
{
for ( i = 0; i < p->count; i++ )
{
+ int ret;
+
rc = read_handler(v, p->addr + (sign * i * p->size), p->size,
&data);
if ( rc != X86EMUL_OKAY )
break;
- if ( hvm_copy_to_guest_phys(p->data + (sign * i * p->size), &data,
- p->size) == HVMCOPY_gfn_paged_out )
+ ret = hvm_copy_to_guest_phys(p->data + (sign * i * p->size),
+ &data,
+ p->size);
+ if ( (ret == HVMCOPY_gfn_paged_out) ||
+ (ret == HVMCOPY_gfn_shared) )
{
rc = X86EMUL_RETRY;
break;
@@ -88,9 +93,13 @@ static int hvm_mmio_access(struct vcpu *v,
{
for ( i = 0; i < p->count; i++ )
{
- if ( hvm_copy_from_guest_phys(&data,
- p->data + (sign * i * p->size),
- p->size) == HVMCOPY_gfn_paged_out )
+ int ret;
+
+ ret = hvm_copy_from_guest_phys(&data,
+ p->data + (sign * i * p->size),
+ p->size);
+ if ( (ret == HVMCOPY_gfn_paged_out) ||
+ (ret == HVMCOPY_gfn_shared) )
{
rc = X86EMUL_RETRY;
break;
diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
index 29079f296d..3c31a6d2e4 100644
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -263,8 +263,11 @@ static int dpci_ioport_read(uint32_t mport, ioreq_t *p)
if ( p->data_is_ptr )
{
- if ( hvm_copy_to_guest_phys(p->data + (sign * i * p->size), &data,
- p->size) == HVMCOPY_gfn_paged_out )
+ int ret;
+ ret = hvm_copy_to_guest_phys(p->data + (sign * i * p->size), &data,
+ p->size);
+ if ( (ret == HVMCOPY_gfn_paged_out) ||
+ (ret == HVMCOPY_gfn_shared) )
return X86EMUL_RETRY;
}
else
@@ -284,8 +287,13 @@ static int dpci_ioport_write(uint32_t mport, ioreq_t *p)
data = p->data;
if ( p->data_is_ptr )
{
- if ( hvm_copy_from_guest_phys(&data, p->data + (sign * i * p->size),
- p->size) == HVMCOPY_gfn_paged_out )
+ int ret;
+
+ ret = hvm_copy_from_guest_phys(&data,
+ p->data + (sign * i * p->size),
+ p->size);
+ if ( (ret == HVMCOPY_gfn_paged_out) &&
+ (ret == HVMCOPY_gfn_shared) )
return X86EMUL_RETRY;
}
diff --git a/xen/arch/x86/mm/guest_walk.c b/xen/arch/x86/mm/guest_walk.c
index b0f42f328a..5a4f491e2b 100644
--- a/xen/arch/x86/mm/guest_walk.c
+++ b/xen/arch/x86/mm/guest_walk.c
@@ -86,6 +86,36 @@ static uint32_t set_ad_bits(void *guest_p, void *walk_p, int set_dirty)
return 0;
}
+static inline void *map_domain_gfn(struct domain *d,
+ gfn_t gfn,
+ mfn_t *mfn,
+ p2m_type_t *p2mt,
+ uint32_t *rc)
+{
+ /* Translate the gfn, unsharing if shared */
+ *mfn = gfn_to_mfn_unshare(d, gfn_x(gfn), p2mt, 0);
+ if ( p2m_is_paging(*p2mt) )
+ {
+ p2m_mem_paging_populate(d, gfn_x(gfn));
+
+ *rc = _PAGE_PAGED;
+ return NULL;
+ }
+ if ( p2m_is_shared(*p2mt) )
+ {
+ *rc = _PAGE_SHARED;
+ return NULL;
+ }
+ if ( !p2m_is_ram(*p2mt) )
+ {
+ *rc |= _PAGE_PRESENT;
+ return NULL;
+ }
+ ASSERT(mfn_valid(mfn_x(*mfn)));
+
+ return map_domain_page(mfn_x(*mfn));
+}
+
/* Walk the guest pagetables, after the manner of a hardware walker. */
uint32_t
@@ -124,23 +154,14 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw,
if ( rc & _PAGE_PRESENT ) goto out;
/* Map the l3 table */
- gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt);
- if ( p2m_is_paging(p2mt) )
- {
- p2m_mem_paging_populate(d, gfn_x(guest_l4e_get_gfn(gw->l4e)));
-
- rc = _PAGE_PAGED;
- goto out;
- }
- if ( !p2m_is_ram(p2mt) )
- {
- rc |= _PAGE_PRESENT;
+ l3p = map_domain_gfn(d,
+ guest_l4e_get_gfn(gw->l4e),
+ &gw->l3mfn,
+ &p2mt,
+ &rc);
+ if(l3p == NULL)
goto out;
- }
- ASSERT(mfn_valid(mfn_x(gw->l3mfn)));
-
/* Get the l3e and check its flags*/
- l3p = map_domain_page(mfn_x(gw->l3mfn));
gw->l3e = l3p[guest_l3_table_offset(va)];
gflags = guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
rc |= ((gflags & mflags) ^ mflags);
@@ -160,23 +181,14 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw,
#endif /* PAE or 64... */
/* Map the l2 table */
- gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt);
- if ( p2m_is_paging(p2mt) )
- {
- p2m_mem_paging_populate(d, gfn_x(guest_l3e_get_gfn(gw->l3e)));
-
- rc = _PAGE_PAGED;
- goto out;
- }
- if ( !p2m_is_ram(p2mt) )
- {
- rc |= _PAGE_PRESENT;
+ l2p = map_domain_gfn(d,
+ guest_l3e_get_gfn(gw->l3e),
+ &gw->l2mfn,
+ &p2mt,
+ &rc);
+ if(l2p == NULL)
goto out;
- }
- ASSERT(mfn_valid(mfn_x(gw->l2mfn)));
-
/* Get the l2e */
- l2p = map_domain_page(mfn_x(gw->l2mfn));
gw->l2e = l2p[guest_l2_table_offset(va)];
#else /* 32-bit only... */
@@ -225,21 +237,13 @@ guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw,
else
{
/* Not a superpage: carry on and find the l1e. */
- gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(gw->l2e), &p2mt);
- if ( p2m_is_paging(p2mt) )
- {
- p2m_mem_paging_populate(d, gfn_x(guest_l2e_get_gfn(gw->l2e)));
-
- rc = _PAGE_PAGED;
+ l1p = map_domain_gfn(d,
+ guest_l2e_get_gfn(gw->l2e),
+ &gw->l1mfn,
+ &p2mt,
+ &rc);
+ if(l1p == NULL)
goto out;
- }
- if ( !p2m_is_ram(p2mt) )
- {
- rc |= _PAGE_PRESENT;
- goto out;
- }
- ASSERT(mfn_valid(mfn_x(gw->l1mfn)));
- l1p = map_domain_page(mfn_x(gw->l1mfn));
gw->l1e = l1p[guest_l1_table_offset(va)];
gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
rc |= ((gflags & mflags) ^ mflags);
diff --git a/xen/arch/x86/mm/hap/guest_walk.c b/xen/arch/x86/mm/hap/guest_walk.c
index be8a85e071..ac9b975f02 100644
--- a/xen/arch/x86/mm/hap/guest_walk.c
+++ b/xen/arch/x86/mm/hap/guest_walk.c
@@ -32,6 +32,7 @@
#if GUEST_PAGING_LEVELS <= CONFIG_PAGING_LEVELS
#include <asm/guest_pt.h>
+#include <asm/p2m.h>
unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
struct vcpu *v, unsigned long gva, uint32_t *pfec)
@@ -45,7 +46,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
/* Get the top-level table's MFN */
cr3 = v->arch.hvm_vcpu.guest_cr[3];
- top_mfn = gfn_to_mfn(v->domain, _gfn(cr3 >> PAGE_SHIFT), &p2mt);
+ top_mfn = gfn_to_mfn_unshare(v->domain, cr3 >> PAGE_SHIFT, &p2mt, 0);
if ( p2m_is_paging(p2mt) )
{
// if ( p2m_is_paged(p2mt) )
@@ -54,6 +55,11 @@ unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
pfec[0] = PFEC_page_paged;
return INVALID_GFN;
}
+ if ( p2m_is_shared(p2mt) )
+ {
+ pfec[0] = PFEC_page_shared;
+ return INVALID_GFN;
+ }
if ( !p2m_is_ram(p2mt) )
{
pfec[0] &= ~PFEC_page_present;
@@ -73,7 +79,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
if ( missing == 0 )
{
gfn_t gfn = guest_l1e_get_gfn(gw.l1e);
- gfn_to_mfn(v->domain, gfn, &p2mt);
+ gfn_to_mfn_unshare(v->domain, gfn_x(gfn), &p2mt, 0);
if ( p2m_is_paging(p2mt) )
{
// if ( p2m_is_paged(p2mt) )
@@ -82,6 +88,11 @@ unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
pfec[0] = PFEC_page_paged;
return INVALID_GFN;
}
+ if ( p2m_is_shared(p2mt) )
+ {
+ pfec[0] = PFEC_page_shared;
+ return INVALID_GFN;
+ }
return gfn_x(gfn);
}
@@ -92,6 +103,9 @@ unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
if ( missing & _PAGE_PAGED )
pfec[0] = PFEC_page_paged;
+ if ( missing & _PAGE_SHARED )
+ pfec[0] = PFEC_page_shared;
+
return INVALID_GFN;
}