/******************************************************************************
* arch/x86/shadow_64.c
*
* Copyright (c) 2005 Michael A Fetterman
* Based on an earlier implementation by Ian Pratt et al
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Jun Nakajima <jun.nakajima@intel.com>
* Chengyuan Li <chengyuan.li@intel.com>
*
* Extended to support 64-bit guests.
*/
#include <xen/config.h>
#include <xen/types.h>
#include <xen/mm.h>
#include <xen/domain_page.h>
#include <asm/shadow.h>
#include <asm/page.h>
#include <xen/event.h>
#include <xen/sched.h>
#include <xen/trace.h>
extern void free_shadow_pages(struct domain *d);
#if SHADOW_DEBUG
static void mark_shadows_as_reflecting_snapshot(struct domain *d, unsigned long gpfn);
#endif
#if CONFIG_PAGING_LEVELS == 3
#include <asm/shadow_64.h>
static unsigned long shadow_l3_table(
struct domain *d, unsigned long gpfn, unsigned long gmfn);
#endif
#if CONFIG_PAGING_LEVELS == 4
#include <asm/shadow_64.h>
static unsigned long shadow_l4_table(
struct domain *d, unsigned long gpfn, unsigned long gmfn);
static void shadow_map_into_current(struct vcpu *v,
unsigned long va, unsigned int from, unsigned int to);
#endif
/********
There's a per-domain shadow table spin lock which works fine for SMP
hosts. We don't have to worry about interrupts as no shadow operations
happen in an interrupt context. It's probably not quite ready for SMP
guest operation as we have to worry about synchonisation between gpte
and spte updates. Its possible that this might only happen in a
hypercall context, in which case we'll probably at have a per-domain
hypercall lock anyhow (at least initially).
********/
static inline int
shadow_promote(struct domain *d, unsigned long gpfn, unsigned long gmfn,
unsigned long new_type)
{
struct pfn_info *page = pfn_to_page(gmfn);
int pinned = 0, okay = 1;
if ( page_out_of_sync(page) )
{
// Don't know how long ago this snapshot was taken.
// Can't trust it to be recent enough.
//
__shadow_sync_mfn(d, gmfn);
}
if ( !shadow_mode_refcounts(d) )
return 1;
if ( unlikely(page_is_page_table(page)) )
return 1;
FSH_LOG("%s: gpfn=%lx gmfn=%lx nt=%08lx", __func__, gpfn, gmfn, new_type);
if ( !shadow_remove_all_write_access(d, gpfn, gmfn) )
{
FSH_LOG("%s: couldn't find/remove all write accesses, gpfn=%lx gmfn=%lx",
__func__, gpfn, gmfn);
#if 1 || defined(LIVE_DANGEROUSLY)
set_bit(_PGC_page_table, &page->count_info);
return 1;
#endif
return 0;
}
// To convert this page to use as a page table, the writable count
// should now be zero. Test this by grabbing the page as an page table,
// and then immediately releasing. This will also deal with any
// necessary TLB flushing issues for us.
//
// The cruft here about pinning doesn't really work right. This
// needs rethinking/rewriting... Need to gracefully deal with the
// TLB flushes required when promoting a writable page, and also deal
// with any outstanding (external) writable refs to this page (by
// refusing to promote it). The pinning headache complicates this
// code -- it would all get much simpler if we stop using
// shadow_lock() and move the shadow code to BIGLOCK().
//
if ( unlikely(!get_page(page, d)) )
BUG(); // XXX -- needs more thought for a graceful failure
if ( unlikely(test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info)) )
{
pinned = 1;
put_page_and_type(page);
}
if ( get_page_type(page, PGT_base_page_table) )
{
set_bit(_PGC_page_table, &page->count_info);
put_page_type(page);
}
else
{
printk("shadow_promote: get_page_type failed "
"dom%d gpfn=%lx gmfn=%lx t=%08lx\n",
d->domain_id, gpfn, gmfn, new_type);
okay = 0;
}
// Now put the type back to writable...
if ( unlikely(!get_page_type(page, PGT_writable_page)) )
BUG(); // XXX -- needs more thought for a graceful failure
if ( unlikely(pinned) )
{
if ( unlikely(test_and_set_bit(_PGT_pinned,
&page->u.inuse.type_info)) )
BUG(); // hmm... someone pinned this again?
}
else
put_page_and_type(page);
return okay;
}
/*
* Things in shadow mode that collect get_page() refs to the domain's
* pages are:
* - PGC_allocated takes a gen count, just like normal.
* - A writable page can be pinned (paravirtualized guests may consider
* these pages to be L1s or L2s, and don't know the difference).
* Pinning a page takes a gen count (but, for domains in shadow mode,
* it *doesn't* take a type count)
* - CR3 grabs a ref to whatever it points at, just like normal.
* - Shadow mode grabs an initial gen count for itself, as a placehold
* for whatever references will exist.
* - Shadow PTEs that point to a page take a gen count, just like regular
* PTEs. However, they don't get a type count, as get_page_type() is
* hardwired to keep writable pages' counts at 1 for domains in shadow
* mode.
* - Whenever we shadow a page, the entry in the shadow hash grabs a
* general ref to the page.
* - Whenever a page goes out of sync, the out of sync entry grabs a
* general ref to the page.
*/
/*
* pfn_info fields for pages allocated as shadow pages:
*
* All 32 bits of count_info are a simple count of refs to this shadow
* from a) other shadow pages, b) current CR3's (aka ed->arch.shadow_table),
* c) if it's a pinned shadow root pgtable, d) outstanding out-of-sync
* references.
*
* u.inuse._domain is left NULL, to prevent accidently allow some random
* domain from gaining permissions to map this page.
*
* u.inuse.type_info & PGT_type_mask remembers what kind of page is being
* shadowed.
* u.inuse.type_info & PGT_mfn_mask holds the mfn of the page being shadowed.
* u.inuse.type_info & PGT_pinned says that an extra reference to this shadow
* is currently exists because this is a shadow of a root page, and we
* don't want to let those disappear just because no CR3 is currently pointing
* at it.
*
* tlbflush_timestamp holds a min & max index of valid page table entries
* within the shadow page.
*/
static inline unsigned long
alloc_shadow_page(struct domain *d,
unsigned long gpfn, unsigned long gmfn,
u32 psh_type)
{
struct pfn_info *page;
unsigned long smfn;
int pin = 0;