aboutsummaryrefslogtreecommitdiffstats
path: root/xen
diff options
context:
space:
mode:
authoriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-05-18 21:04:01 +0000
committeriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-05-18 21:04:01 +0000
commitdcc772f1c332e526018dce7b652fa4002669cc1d (patch)
treee24a79221e2cd9e1ed11be25b4d0597af35ca149 /xen
parent9083a541fd10e818db2ca8766ec2fda6183b5ca6 (diff)
downloadxen-dcc772f1c332e526018dce7b652fa4002669cc1d.tar.gz
xen-dcc772f1c332e526018dce7b652fa4002669cc1d.tar.bz2
xen-dcc772f1c332e526018dce7b652fa4002669cc1d.zip
bitkeeper revision 1.908 (40aa7a41_qzAxT0SBKFNAXKT6FF62g)
live migrate now works on SMP
Diffstat (limited to 'xen')
-rw-r--r--xen/arch/i386/smp.c2
-rw-r--r--xen/arch/i386/traps.c1
-rw-r--r--xen/common/domain.c10
-rw-r--r--xen/common/shadow.c160
-rw-r--r--xen/include/xen/shadow.h589
-rw-r--r--xen/net/dev.c12
6 files changed, 398 insertions, 376 deletions
diff --git a/xen/arch/i386/smp.c b/xen/arch/i386/smp.c
index 4989fc5085..363d61626e 100644
--- a/xen/arch/i386/smp.c
+++ b/xen/arch/i386/smp.c
@@ -212,7 +212,7 @@ static inline void send_IPI_allbutself(int vector)
*/
static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED;
-static volatile unsigned long flush_cpumask;
+volatile unsigned long flush_cpumask;
asmlinkage void smp_invalidate_interrupt(void)
{
diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c
index 7250074420..d30c324804 100644
--- a/xen/arch/i386/traps.c
+++ b/xen/arch/i386/traps.c
@@ -167,7 +167,6 @@ void show_registers(struct pt_regs *regs)
regs->xfs & 0xffff, regs->xgs & 0xffff, ss);
show_stack(&regs->esp);
- show_trace(&regs->esp);
}
diff --git a/xen/common/domain.c b/xen/common/domain.c
index f952ce577e..ee11f20fcd 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -526,6 +526,16 @@ unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes)
/* initialise to machine_to_phys_mapping table to likely pfn */
machine_to_phys_mapping[page-frame_table] = alloc_pfns;
+
+#ifndef NDEBUG
+ {
+ // initialise with magic marker if in DEBUG mode
+ void * a = map_domain_mem( (page-frame_table)<<PAGE_SHIFT );
+ memset( a, 0x80 | (char) p->domain, PAGE_SIZE );
+ unmap_domain_mem( a );
+ }
+#endif
+
}
p->tot_pages = nr_pages;
diff --git a/xen/common/shadow.c b/xen/common/shadow.c
index 44945556e1..216c3deda1 100644
--- a/xen/common/shadow.c
+++ b/xen/common/shadow.c
@@ -28,7 +28,19 @@ hypercall lock anyhow (at least initially).
********/
-static spinlock_t cpu_stall_lock;
+
+/**
+
+FIXME:
+
+1. Flush needs to avoid blowing away the L2 page that another CPU may be using!
+
+fix using cpu_raise_softirq
+
+have a flag to count in, (after switching to init's PTs)
+spinlock, reload cr3_shadow, unlock
+
+**/
static inline void free_shadow_page( struct mm_struct *m,
struct pfn_info *pfn_info )
@@ -115,7 +127,7 @@ static inline int shadow_page_op( struct mm_struct *m, unsigned int op,
for (i=0;i<ENTRIES_PER_L1_PAGETABLE;i++)
{
- if ( spl1e[i] & _PAGE_RW )
+ if ( (spl1e[i] & _PAGE_PRESENT ) && (spl1e[i] & _PAGE_RW) )
{
work++;
spl1e[i] &= ~_PAGE_RW;
@@ -124,6 +136,8 @@ static inline int shadow_page_op( struct mm_struct *m, unsigned int op,
unmap_domain_mem( spl1e );
}
}
+ break;
+
}
return work;
}
@@ -161,7 +175,6 @@ static void __scan_shadow_table( struct mm_struct *m, unsigned int op )
void shadow_mode_init(void)
{
- spin_lock_init( &cpu_stall_lock );
}
int shadow_mode_enable( struct task_struct *p, unsigned int mode )
@@ -184,9 +197,9 @@ int shadow_mode_enable( struct task_struct *p, unsigned int mode )
// allocate space for first lot of extra nodes
m->shadow_ht_extras = kmalloc( sizeof(void*) +
- (shadow_ht_extra_size *
- sizeof(struct shadow_status)),
- GFP_KERNEL );
+ (shadow_ht_extra_size *
+ sizeof(struct shadow_status)),
+ GFP_KERNEL );
if( ! m->shadow_ht_extras )
goto nomem;
@@ -225,7 +238,7 @@ int shadow_mode_enable( struct task_struct *p, unsigned int mode )
__shadow_mk_pagetable( m );
return 0;
- nomem:
+nomem:
return -ENOMEM;
}
@@ -263,7 +276,7 @@ void shadow_mode_disable( struct task_struct *p )
}
static int shadow_mode_table_op( struct task_struct *p,
- dom0_shadow_control_t *sc )
+ dom0_shadow_control_t *sc )
{
unsigned int op = sc->op;
struct mm_struct *m = &p->mm;
@@ -273,6 +286,8 @@ static int shadow_mode_table_op( struct task_struct *p,
// tables right now. Calling flush on yourself would be really
// stupid.
+ ASSERT(spin_is_locked(&p->mm.shadow_lock));
+
if ( m == &current->mm )
{
printk("Don't try and flush your own page tables!\n");
@@ -291,48 +306,49 @@ static int shadow_mode_table_op( struct task_struct *p,
case DOM0_SHADOW_CONTROL_OP_CLEAN:
{
- int i,j,zero=1;
+ int i,j,zero=1;
- __scan_shadow_table( m, op );
+ __scan_shadow_table( m, op );
+ // __free_shadow_table( m );
- if( p->tot_pages > sc->pages ||
- !sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap )
- {
- rc = -EINVAL;
- goto out;
- }
+ if( p->tot_pages > sc->pages ||
+ !sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
- sc->pages = p->tot_pages;
+ sc->pages = p->tot_pages;
#define chunk (8*1024) // do this in 1KB chunks for L1 cache
- for(i=0;i<p->tot_pages;i+=chunk)
- {
- int bytes = (( ((p->tot_pages-i) > (chunk))?
- (chunk):(p->tot_pages-i) ) + 7) / 8;
+ for(i=0;i<p->tot_pages;i+=chunk)
+ {
+ int bytes = (( ((p->tot_pages-i) > (chunk))?
+ (chunk):(p->tot_pages-i) ) + 7) / 8;
- copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
- p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
- bytes );
+ copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
+ p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
+ bytes );
- for(j=0; zero && j<bytes/sizeof(unsigned long);j++)
- {
- if( p->mm.shadow_dirty_bitmap[j] != 0 )
- zero = 0;
- }
-
- memset( p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
- 0, bytes);
- }
+ for(j=0; zero && j<bytes/sizeof(unsigned long);j++)
+ {
+ if( p->mm.shadow_dirty_bitmap[j] != 0 )
+ zero = 0;
+ }
- if (zero)
- {
- /* might as well stop the domain as an optimization. */
- if ( p->state != TASK_STOPPED )
- send_guest_virq(p, VIRQ_STOP);
- }
-
- break;
+ memset( p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
+ 0, bytes);
+ }
+
+ if (zero)
+ {
+ /* might as well stop the domain as an optimization. */
+ if ( p->state != TASK_STOPPED )
+ send_guest_virq(p, VIRQ_STOP);
+ }
+
+ break;
}
}
@@ -352,50 +368,10 @@ out:
int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc )
{
unsigned int cmd = sc->op;
- int rc = 0, cpu;
-
- // don't call if already shadowed...
-
- /* The following is pretty hideous because we don't have a way of
- synchronously pausing a domain. If it's assigned to the curernt CPU,
- we don't have to worry -- it can't possibly actually be running.
- If its on another CPU, for the moment, we do something really gross:
- we cause the other CPU to spin regardless of what domain it is running.
-
- I know this is really grim, but it only lasts a few 10's of
- microseconds. It needs fixing as soon as the last of the Linux-isms
- get removed from the task structure...
-
- Oh, and let's hope someone doesn't repin the CPU while we're here.
- Also, prey someone else doesn't do this in another domain.
- At least there's only one dom0 at the moment...
-
- */
+ int rc = 0;
-printk("XXX\n");
spin_lock(&p->mm.shadow_lock);
-printk("SMC irq=%d\n",local_irq_is_enabled());
- spin_lock( &cpu_stall_lock );
- cpu = p->processor;
-printk("got target cpu=%d this cpu=%d\n",cpu, current->processor );
- if ( cpu != current->processor )
- {
- static void cpu_stall(void * data)
- {
- if ( current->processor == (int) data )
- {
- printk("Stall cpu=%d is locked %d irq=%d\n",(int)data,spin_is_locked(&cpu_stall_lock),local_irq_is_enabled());
- spin_lock( &cpu_stall_lock );
- printk("release\n");
- spin_unlock( &cpu_stall_lock );
- }
- }
-printk("before\n");
- smp_call_function(cpu_stall, (void*)cpu, 1, 0); // don't wait!
-printk("after\n");
- }
-
if ( p->mm.shadow_mode && cmd == DOM0_SHADOW_CONTROL_OP_OFF )
{
shadow_mode_disable(p);
@@ -412,18 +388,15 @@ printk("after\n");
}
else if ( p->mm.shadow_mode && cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH && cmd<=DOM0_SHADOW_CONTROL_OP_CLEAN )
{
-printk("+");
rc = shadow_mode_table_op(p, sc);
-printk("=");
}
else
{
rc = -EINVAL;
}
- spin_unlock( &cpu_stall_lock );
-printk("SMC- %d\n",rc);
-
+ flush_tlb_cpu(p->processor);
+
spin_unlock(&p->mm.shadow_lock);
return rc;
@@ -549,8 +522,6 @@ int shadow_fault( unsigned long va, long error_code )
unsigned long gpte, spte;
struct mm_struct *m = &current->mm;
- // we know interrupts are always on entry to the page fault handler
-
SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
check_pagetable( current, current->mm.pagetable, "pre-sf" );
@@ -573,9 +544,18 @@ int shadow_fault( unsigned long va, long error_code )
return 0;
}
- spin_lock(&current->mm.shadow_lock);
// take the lock and reread gpte
+ while( unlikely(!spin_trylock(&current->mm.shadow_lock)) )
+ {
+ extern volatile unsigned long flush_cpumask;
+ if ( test_and_clear_bit(smp_processor_id(), &flush_cpumask) )
+ local_flush_tlb();
+ rep_nop();
+ }
+
+ ASSERT(spin_is_locked(&current->mm.shadow_lock));
+
if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
{
SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
diff --git a/xen/include/xen/shadow.h b/xen/include/xen/shadow.h
index 587f9178bd..f1ce8b6689 100644
--- a/xen/include/xen/shadow.h
+++ b/xen/include/xen/shadow.h
@@ -27,14 +27,14 @@ extern void shadow_mode_init(void);
extern int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc );
extern int shadow_fault( unsigned long va, long error_code );
extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,
- unsigned long *prev_spfn_ptr,
- l1_pgentry_t **prev_spl1e_ptr );
+ unsigned long *prev_spfn_ptr,
+ l1_pgentry_t **prev_spl1e_ptr );
extern void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte );
extern void unshadow_table( unsigned long gpfn, unsigned int type );
extern int shadow_mode_enable( struct task_struct *p, unsigned int mode );
extern void shadow_mode_disable( struct task_struct *p );
extern unsigned long shadow_l2_table(
- struct mm_struct *m, unsigned long gpfn );
+ struct mm_struct *m, unsigned long gpfn );
#define SHADOW_DEBUG 0
#define SHADOW_HASH_DEBUG 0
@@ -51,24 +51,24 @@ struct shadow_status {
#ifndef NDEBUG
#define SH_LOG(_f, _a...) \
- printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
- current->domain , __LINE__ , ## _a )
+printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
+ current->domain , __LINE__ , ## _a )
#else
#define SH_LOG(_f, _a...)
#endif
#if SHADOW_DEBUG
#define SH_VLOG(_f, _a...) \
- printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
- current->domain , __LINE__ , ## _a )
+ printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
+ current->domain , __LINE__ , ## _a )
#else
#define SH_VLOG(_f, _a...)
#endif
#if 0
#define SH_VVLOG(_f, _a...) \
- printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
- current->domain , __LINE__ , ## _a )
+ printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \
+ current->domain , __LINE__ , ## _a )
#else
#define SH_VVLOG(_f, _a...)
#endif
@@ -76,46 +76,57 @@ struct shadow_status {
/************************************************************************/
-static inline void mark_dirty( struct mm_struct *m, unsigned int mfn )
+ static inline void __mark_dirty( struct mm_struct *m, unsigned int mfn )
{
- unsigned int pfn;
+ unsigned int pfn;
- pfn = machine_to_phys_mapping[mfn];
+ ASSERT(spin_is_locked(&m->shadow_lock));
+
+ //printk("%08x %08lx\n", mfn, machine_to_phys_mapping[mfn] );
- /* We use values with the top bit set to mark MFNs that aren't
- really part of the domain's psuedo-physical memory map e.g.
- the shared info frame. Nothing to do here...
- */
- if ( unlikely(pfn & 0x80000000U) ) return;
+ pfn = machine_to_phys_mapping[mfn];
- ASSERT(m->shadow_dirty_bitmap);
- if( likely(pfn<m->shadow_dirty_bitmap_size) )
- {
- /* use setbit to be smp guest safe. Since the same page is likely to
- get marked dirty many times, examine the bit first before doing the
- expensive lock-prefixed opertion */
+ /* We use values with the top bit set to mark MFNs that aren't
+ really part of the domain's psuedo-physical memory map e.g.
+ the shared info frame. Nothing to do here...
+ */
+ if ( unlikely(pfn & 0x80000000U) ) return;
- if (! test_bit( pfn, m->shadow_dirty_bitmap ) )
- set_bit( pfn, m->shadow_dirty_bitmap );
- }
- else
- {
- extern void show_traceX(void);
- SH_LOG("mark_dirty OOR! mfn=%x pfn=%x max=%x (mm %p)",
- mfn, pfn, m->shadow_dirty_bitmap_size, m );
- SH_LOG("dom=%lld caf=%08x taf=%08x\n",
- frame_table[mfn].u.domain->domain,
- frame_table[mfn].count_and_flags,
- frame_table[mfn].type_and_flags );
- //show_traceX();
- }
+ ASSERT(m->shadow_dirty_bitmap);
+ if( likely(pfn<m->shadow_dirty_bitmap_size) )
+ {
+ /* These updates occur with mm.shadow_lock held */
+ __set_bit( pfn, m->shadow_dirty_bitmap );
+ }
+ else
+ {
+ extern void show_traceX(void);
+ SH_LOG("mark_dirty OOR! mfn=%x pfn=%x max=%x (mm %p)",
+ mfn, pfn, m->shadow_dirty_bitmap_size, m );
+ SH_LOG("dom=%lld caf=%08x taf=%08x\n",
+ frame_table[mfn].u.domain->domain,
+ frame_table[mfn].count_and_flags,
+ frame_table[mfn].type_and_flags );
+ //show_traceX();
+ }
}
+
+static inline void mark_dirty( struct mm_struct *m, unsigned int mfn )
+{
+ ASSERT(local_irq_is_enabled());
+ //if(spin_is_locked(&m->shadow_lock)) printk("+");
+ spin_lock(&m->shadow_lock);
+ __mark_dirty( m, mfn );
+ spin_unlock(&m->shadow_lock);
+}
+
+
/************************************************************************/
static inline void l1pte_write_fault( struct mm_struct *m,
- unsigned long *gpte_p, unsigned long *spte_p )
+ unsigned long *gpte_p, unsigned long *spte_p )
{
unsigned long gpte = *gpte_p;
unsigned long spte = *spte_p;
@@ -123,17 +134,17 @@ static inline void l1pte_write_fault( struct mm_struct *m,
switch( m->shadow_mode )
{
case SHM_test:
- spte = gpte;
- gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
- spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
- break;
+ spte = gpte;
+ gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
+ spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
+ break;
case SHM_logdirty:
- spte = gpte;
- gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
- spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
- mark_dirty( m, (gpte >> PAGE_SHIFT) );
- break;
+ spte = gpte;
+ gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
+ spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
+ __mark_dirty( m, (gpte >> PAGE_SHIFT) );
+ break;
}
*gpte_p = gpte;
@@ -141,7 +152,7 @@ static inline void l1pte_write_fault( struct mm_struct *m,
}
static inline void l1pte_read_fault( struct mm_struct *m,
- unsigned long *gpte_p, unsigned long *spte_p )
+ unsigned long *gpte_p, unsigned long *spte_p )
{
unsigned long gpte = *gpte_p;
unsigned long spte = *spte_p;
@@ -149,19 +160,19 @@ static inline void l1pte_read_fault( struct mm_struct *m,
switch( m->shadow_mode )
{
case SHM_test:
- spte = gpte;
- gpte |= _PAGE_ACCESSED;
- spte |= _PAGE_ACCESSED;
- if ( ! (gpte & _PAGE_DIRTY ) )
- spte &= ~ _PAGE_RW;
- break;
+ spte = gpte;
+ gpte |= _PAGE_ACCESSED;
+ spte |= _PAGE_ACCESSED;
+ if ( ! (gpte & _PAGE_DIRTY ) )
+ spte &= ~ _PAGE_RW;
+ break;
case SHM_logdirty:
- spte = gpte;
- gpte |= _PAGE_ACCESSED;
- spte |= _PAGE_ACCESSED;
- spte &= ~ _PAGE_RW;
- break;
+ spte = gpte;
+ gpte |= _PAGE_ACCESSED;
+ spte |= _PAGE_ACCESSED;
+ spte &= ~ _PAGE_RW;
+ break;
}
*gpte_p = gpte;
@@ -169,7 +180,7 @@ static inline void l1pte_read_fault( struct mm_struct *m,
}
static inline void l1pte_no_fault( struct mm_struct *m,
- unsigned long *gpte_p, unsigned long *spte_p )
+ unsigned long *gpte_p, unsigned long *spte_p )
{
unsigned long gpte = *gpte_p;
unsigned long spte = *spte_p;
@@ -177,26 +188,26 @@ static inline void l1pte_no_fault( struct mm_struct *m,
switch( m->shadow_mode )
{
case SHM_test:
- spte = 0;
- if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
- (_PAGE_PRESENT|_PAGE_ACCESSED) )
- {
- spte = gpte;
- if ( ! (gpte & _PAGE_DIRTY ) )
- spte &= ~ _PAGE_RW;
- }
- break;
+ spte = 0;
+ if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
+ (_PAGE_PRESENT|_PAGE_ACCESSED) )
+ {
+ spte = gpte;
+ if ( ! (gpte & _PAGE_DIRTY ) )
+ spte &= ~ _PAGE_RW;
+ }
+ break;
case SHM_logdirty:
- spte = 0;
- if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
- (_PAGE_PRESENT|_PAGE_ACCESSED) )
- {
- spte = gpte;
- spte &= ~ _PAGE_RW;
- }
-
- break;
+ spte = 0;
+ if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) ==
+ (_PAGE_PRESENT|_PAGE_ACCESSED) )
+ {
+ spte = gpte;
+ spte &= ~ _PAGE_RW;
+ }
+
+ break;
}
*gpte_p = gpte;
@@ -204,27 +215,27 @@ static inline void l1pte_no_fault( struct mm_struct *m,
}
static inline void l2pde_general( struct mm_struct *m,
- unsigned long *gpde_p, unsigned long *spde_p,
- unsigned long sl1pfn)
+ unsigned long *gpde_p, unsigned long *spde_p,
+ unsigned long sl1pfn)
{
unsigned long gpde = *gpde_p;
unsigned long spde = *spde_p;
- spde = 0;
+ spde = 0;
- if ( sl1pfn )
- {
- spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) |
- _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
- gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
-
- if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gpde & PAGE_MASK) ) )
- {
- // detect linear map, and keep pointing at guest
- SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
- spde = gpde & ~_PAGE_RW;
- }
+ if ( sl1pfn )
+ {
+ spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) |
+ _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
+ gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
+
+ if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gpde & PAGE_MASK) ) )
+ {
+ // detect linear map, and keep pointing at guest
+ SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
+ spde = gpde & ~_PAGE_RW;
}
+ }
*gpde_p = gpde;
*spde_p = spde;
@@ -237,45 +248,45 @@ static inline void l2pde_general( struct mm_struct *m,
#if SHADOW_HASH_DEBUG
static void shadow_audit(struct mm_struct *m, int print)
{
- int live=0, free=0, j=0, abs;
- struct shadow_status *a;
+ int live=0, free=0, j=0, abs;
+ struct shadow_status *a;
for(j=0;j<shadow_ht_buckets;j++)
{
a = &m->shadow_ht[j];
- if(a->pfn){live++; ASSERT(a->spfn_and_flags&PSH_pfn_mask);}
- ASSERT((a->pfn&0xf0000000)==0);
- ASSERT(a->pfn<0x00100000);
- a=a->next;
+ if(a->pfn){live++; ASSERT(a->spfn_and_flags&PSH_pfn_mask);}
+ ASSERT((a->pfn&0xf0000000)==0);
+ ASSERT(a->pfn<0x00100000);
+ a=a->next;
while(a && live<9999)
- {
- live++;
- if(a->pfn == 0 || a->spfn_and_flags == 0)
- {
- printk("XXX live=%d pfn=%08lx sp=%08lx next=%p\n",
- live, a->pfn, a->spfn_and_flags, a->next);
- BUG();
- }
- ASSERT(a->pfn);
- ASSERT((a->pfn&0xf0000000)==0);
- ASSERT(a->pfn<0x00100000);
- ASSERT(a->spfn_and_flags&PSH_pfn_mask);
- a=a->next;
- }
- ASSERT(live<9999);
+ {
+ live++;
+ if(a->pfn == 0 || a->spfn_and_flags == 0)
+ {
+ printk("XXX live=%d pfn=%08lx sp=%08lx next=%p\n",
+ live, a->pfn, a->spfn_and_flags, a->next);
+ BUG();
+ }
+ ASSERT(a->pfn);
+ ASSERT((a->pfn&0xf0000000)==0);
+ ASSERT(a->pfn<0x00100000);
+ ASSERT(a->spfn_and_flags&PSH_pfn_mask);
+ a=a->next;
}
+ ASSERT(live<9999);
+ }
a = m->shadow_ht_free;
while(a) { free++; a=a->next; }
if(print) printk("Xlive=%d free=%d\n",live,free);
- abs=(perfc_value(shadow_l1_pages)+perfc_value(shadow_l2_pages))-live;
- if( abs < -1 || abs > 1 )
- {
- printk("live=%d free=%d l1=%d l2=%d\n",live,free,
- perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) );
- BUG();
+ abs=(perfc_value(shadow_l1_pages)+perfc_value(shadow_l2_pages))-live;
+ if( abs < -1 || abs > 1 )
+ {
+ printk("live=%d free=%d l1=%d l2=%d\n",live,free,
+ perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) );
+ BUG();
}
}
@@ -287,56 +298,56 @@ static void shadow_audit(struct mm_struct *m, int print)
static inline struct shadow_status* hash_bucket( struct mm_struct *m,
- unsigned int gpfn )
+ unsigned int gpfn )
{
return &(m->shadow_ht[gpfn % shadow_ht_buckets]);
}
static inline unsigned long __shadow_status( struct mm_struct *m,
- unsigned int gpfn )
+ unsigned int gpfn )
{
- struct shadow_status **ob, *b, *B = hash_bucket( m, gpfn );
+ struct shadow_status **ob, *b, *B = hash_bucket( m, gpfn );
b = B;
ob = NULL;
- SH_VVLOG("lookup gpfn=%08x bucket=%p", gpfn, b );
- shadow_audit(m,0); // if in debug mode
+ SH_VVLOG("lookup gpfn=%08x bucket=%p", gpfn, b );
+ shadow_audit(m,0); // if in debug mode
- do
+ do
+ {
+ if ( b->pfn == gpfn )
{
- if ( b->pfn == gpfn )
- {
- unsigned long t;
- struct shadow_status *x;
-
- // swap with head
- t=B->pfn; B->pfn=b->pfn; b->pfn=t;
- t=B->spfn_and_flags; B->spfn_and_flags=b->spfn_and_flags;
- b->spfn_and_flags=t;
-
- if(ob)
- { // pull to front
- *ob=b->next;
- x=B->next;
- B->next=b;
- b->next=x;
- }
- return B->spfn_and_flags;
- }
+ unsigned long t;
+ struct shadow_status *x;
+
+ // swap with head
+ t=B->pfn; B->pfn=b->pfn; b->pfn=t;
+ t=B->spfn_and_flags; B->spfn_and_flags=b->spfn_and_flags;
+ b->spfn_and_flags=t;
+
+ if(ob)
+ { // pull to front
+ *ob=b->next;
+ x=B->next;
+ B->next=b;
+ b->next=x;
+ }
+ return B->spfn_and_flags;
+ }
#if SHADOW_HASH_DEBUG
- else
- {
- if(b!=B)ASSERT(b->pfn);
- }
-#endif
- ob=&b->next;
- b=b->next;
+ else
+ {
+ if(b!=B)ASSERT(b->pfn);
}
- while (b);
+#endif
+ ob=&b->next;
+ b=b->next;
+ }
+ while (b);
- return 0;
+ return 0;
}
/* we can make this locking more fine grained e.g. per shadow page if it
@@ -344,112 +355,119 @@ ever becomes a problem, but since we need a spin lock on the hash table
anyway its probably not worth being too clever. */
static inline unsigned long get_shadow_status( struct mm_struct *m,
- unsigned int gpfn )
+ unsigned int gpfn )
{
- unsigned long res;
-
- /* If we get here, we know that this domain is running in shadow mode.
- We also know that some sort of update has happened to the underlying
- page table page: either a PTE has been updated, or the page has
- changed type. If we're in log dirty mode, we should set the approrpiate
- bit in the dirty bitmap.
- NB: the VA update path doesn't use this so needs to be handled
- independnetly.
- */
-
- if( m->shadow_mode == SHM_logdirty )
- mark_dirty( m, gpfn );
+ unsigned long res;
+
+ /* If we get here, we know that this domain is running in shadow mode.
+ We also know that some sort of update has happened to the underlying
+ page table page: either a PTE has been updated, or the page has
+ changed type. If we're in log dirty mode, we should set the approrpiate
+ bit in the dirty bitmap.
+ NB: the VA update path doesn't use this so needs to be handled
+ independnetly.
+ */
+
+ ASSERT(local_irq_is_enabled());
+ //if(spin_is_locked(&m->shadow_lock)) printk("*");
+ spin_lock(&m->shadow_lock);
+
+ if( m->shadow_mode == SHM_logdirty )
+ __mark_dirty( m, gpfn );
- spin_lock(&m->shadow_lock);
- res = __shadow_status( m, gpfn );
- if (!res) spin_unlock(&m->shadow_lock);
- return res;
+ res = __shadow_status( m, gpfn );
+ if (!res) spin_unlock(&m->shadow_lock);
+ return res;
}
static inline void put_shadow_status( struct mm_struct *m )
{
- spin_unlock(&m->shadow_lock);
+ spin_unlock(&m->shadow_lock);
}
static inline void delete_shadow_status( struct mm_struct *m,
- unsigned int gpfn )
+ unsigned int gpfn )
{
- struct shadow_status *b, *B, **ob;
+ struct shadow_status *b, *B, **ob;
- B = b = hash_bucket( m, gpfn );
+ ASSERT(spin_is_locked(&m->shadow_lock));
- SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b );
- shadow_audit(m,0);
- ASSERT(gpfn);
+ B = b = hash_bucket( m, gpfn );
+
+ SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b );
+ shadow_audit(m,0);
+ ASSERT(gpfn);
- if( b->pfn == gpfn )
+ if( b->pfn == gpfn )
{
- if (b->next)
- {
- struct shadow_status *D=b->next;
- b->spfn_and_flags = b->next->spfn_and_flags;
- b->pfn = b->next->pfn;
-
- b->next = b->next->next;
- D->next = m->shadow_ht_free;
- D->pfn = 0;
- D->spfn_and_flags = 0;
- m->shadow_ht_free = D;
- }
- else
- {
- b->pfn = 0;
- b->spfn_and_flags = 0;
- }
+ if (b->next)
+ {
+ struct shadow_status *D=b->next;
+ b->spfn_and_flags = b->next->spfn_and_flags;
+ b->pfn = b->next->pfn;
+
+ b->next = b->next->next;
+ D->next = m->shadow_ht_free;
+ D->pfn = 0;
+ D->spfn_and_flags = 0;
+ m->shadow_ht_free = D;
+ }
+ else
+ {
+ b->pfn = 0;
+ b->spfn_and_flags = 0;
+ }
#if SHADOW_HASH_DEBUG
- if( __shadow_status(m,gpfn) ) BUG();
- shadow_audit(m,0);
+ if( __shadow_status(m,gpfn) ) BUG();
+ shadow_audit(m,0);
#endif
- return;
+ return;
}
- ob = &b->next;
- b=b->next;
+ ob = &b->next;
+ b=b->next;
- do
+ do
+ {
+ if ( b->pfn == gpfn )
{
- if ( b->pfn == gpfn )
- {
- b->pfn = 0;
- b->spfn_and_flags = 0;
+ b->pfn = 0;
+ b->spfn_and_flags = 0;
- // b is in the list
- *ob=b->next;
- b->next = m->shadow_ht_free;
- m->shadow_ht_free = b;
+ // b is in the list
+ *ob=b->next;
+ b->next = m->shadow_ht_free;
+ m->shadow_ht_free = b;
#if SHADOW_HASH_DEBUG
- if( __shadow_status(m,gpfn) ) BUG();
+ if( __shadow_status(m,gpfn) ) BUG();
#endif
- shadow_audit(m,0);
- return;
- }
-
- ob = &b->next;
- b=b->next;
+ shadow_audit(m,0);
+ return;
}
- while (b);
- // if we got here, it wasn't in the list
+ ob = &b->next;
+ b=b->next;
+ }
+ while (b);
+
+ // if we got here, it wasn't in the list
BUG();
}
static inline void set_shadow_status( struct mm_struct *m,
- unsigned int gpfn, unsigned long s )
+ unsigned int gpfn, unsigned long s )
{
- struct shadow_status *b, *B, *extra, **fptr;
+ struct shadow_status *b, *B, *extra, **fptr;
int i;
- B = b = hash_bucket( m, gpfn );
+ ASSERT(spin_is_locked(&m->shadow_lock));
+
+ B = b = hash_bucket( m, gpfn );
ASSERT(gpfn);
//ASSERT(s);
@@ -458,106 +476,107 @@ static inline void set_shadow_status( struct mm_struct *m,
shadow_audit(m,0);
- do
+ do
+ {
+ if ( b->pfn == gpfn )
{
- if ( b->pfn == gpfn )
- {
- b->spfn_and_flags = s;
- shadow_audit(m,0);
- return;
- }
-
- b=b->next;
+ b->spfn_and_flags = s;
+ shadow_audit(m,0);
+ return;
}
- while (b);
- // if we got here, this is an insert rather than update
+ b=b->next;
+ }
+ while (b);
+
+ // if we got here, this is an insert rather than update
ASSERT( s ); // deletes must have succeeded by here
if ( B->pfn == 0 )
- {
- // we can use this head
- ASSERT( B->next == 0 );
- B->pfn = gpfn;
- B->spfn_and_flags = s;
- shadow_audit(m,0);
- return;
- }
+ {
+ // we can use this head
+ ASSERT( B->next == 0 );
+ B->pfn = gpfn;
+ B->spfn_and_flags = s;
+ shadow_audit(m,0);
+ return;
+ }
if( unlikely(m->shadow_ht_free == NULL) )
{
- SH_LOG("allocate more shadow hashtable blocks");
+ SH_LOG("allocate more shadow hashtable blocks");
- // we need to allocate more space
- extra = kmalloc( sizeof(void*) + (shadow_ht_extra_size *
- sizeof(struct shadow_status)), GFP_KERNEL );
+ // we need to allocate more space
+ extra = kmalloc( sizeof(void*) + (shadow_ht_extra_size *
+ sizeof(struct shadow_status)), GFP_KERNEL );
- if( ! extra ) BUG(); // should be more graceful here....
+ if( ! extra ) BUG(); // should be more graceful here....
- memset( extra, 0, sizeof(void*) + (shadow_ht_extra_size *
- sizeof(struct shadow_status)) );
+ memset( extra, 0, sizeof(void*) + (shadow_ht_extra_size *
+ sizeof(struct shadow_status)) );
- m->shadow_extras_count++;
+ m->shadow_extras_count++;
- // add extras to free list
- fptr = &m->shadow_ht_free;
- for ( i=0; i<shadow_ht_extra_size; i++ )
- {
- *fptr = &extra[i];
- fptr = &(extra[i].next);
- }
- *fptr = NULL;
+ // add extras to free list
+ fptr = &m->shadow_ht_free;
+ for ( i=0; i<shadow_ht_extra_size; i++ )
+ {
+ *fptr = &extra[i];
+ fptr = &(extra[i].next);
+ }
+ *fptr = NULL;
- *((struct shadow_status ** ) &extra[shadow_ht_extra_size]) =
- m->shadow_ht_extras;
- m->shadow_ht_extras = extra;
+ *((struct shadow_status ** ) &extra[shadow_ht_extra_size]) =
+ m->shadow_ht_extras;
+ m->shadow_ht_extras = extra;
}
- // should really put this in B to go right to front
- b = m->shadow_ht_free;
+ // should really put this in B to go right to front
+ b = m->shadow_ht_free;
m->shadow_ht_free = b->next;
b->spfn_and_flags = s;
- b->pfn = gpfn;
- b->next = B->next;
- B->next = b;
+ b->pfn = gpfn;
+ b->next = B->next;
+ B->next = b;
- shadow_audit(m,0);
+ shadow_audit(m,0);
- return;
+ return;
}
static inline void __shadow_mk_pagetable( struct mm_struct *mm )
{
- unsigned long gpfn, spfn=0;
+ unsigned long gpfn, spfn=0;
- gpfn = pagetable_val(mm->pagetable) >> PAGE_SHIFT;
+ gpfn = pagetable_val(mm->pagetable) >> PAGE_SHIFT;
- if ( unlikely((spfn=__shadow_status(mm, gpfn)) == 0 ) )
- {
- spfn = shadow_l2_table(mm, gpfn );
- }
- mm->shadow_table = mk_pagetable(spfn<<PAGE_SHIFT);
+ if ( unlikely((spfn=__shadow_status(mm, gpfn)) == 0 ) )
+ {
+ spfn = shadow_l2_table(mm, gpfn );
+ }
+ mm->shadow_table = mk_pagetable(spfn<<PAGE_SHIFT);
}
static inline void shadow_mk_pagetable( struct mm_struct *mm )
{
- SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
- pagetable_val(mm->pagetable), mm->shadow_mode );
+ SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
+ pagetable_val(mm->pagetable), mm->shadow_mode );
- if ( unlikely(mm->shadow_mode) )
- {
+ if ( unlikely(mm->shadow_mode) )
+ {
+ ASSERT(local_irq_is_enabled());
spin_lock(&mm->shadow_lock);
- __shadow_mk_pagetable( mm );
+ __shadow_mk_pagetable( mm );
spin_unlock(&mm->shadow_lock);
- }
+ }
- SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d ) sh=%08lx",
- pagetable_val(mm->pagetable), mm->shadow_mode,
- pagetable_val(mm->shadow_table) );
+ SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d ) sh=%08lx",
+ pagetable_val(mm->pagetable), mm->shadow_mode,
+ pagetable_val(mm->shadow_table) );
}
@@ -570,3 +589,5 @@ extern int check_pagetable(struct mm_struct *m, pagetable_t pt, char *s);
#endif /* XEN_SHADOW_H */
+
+
diff --git a/xen/net/dev.c b/xen/net/dev.c
index 0252568131..909e586b53 100644
--- a/xen/net/dev.c
+++ b/xen/net/dev.c
@@ -2267,7 +2267,19 @@ long flush_bufs_for_vif(net_vif_t *vif)
/* if in shadow mode, mark the PTE as dirty */
if( p->mm.shadow_mode == SHM_logdirty )
+ {
mark_dirty( &p->mm, rx->pte_ptr>>PAGE_SHIFT );
+#if 0
+ mark_dirty( &p->mm, rx->buf_pfn ); // XXXXXXX debug
+
+ {
+ unsigned long * p = map_domain_mem( rx->buf_pfn<<PAGE_SHIFT );
+ p[2] = 0xdeadc001;
+ unmap_domain_mem(p);
+ }
+#endif
+
+ }
/* assume the shadow page table is about to be blown away,
and that its not worth marking the buffer as dirty */