diff options
author | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2004-05-18 21:04:01 +0000 |
---|---|---|
committer | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2004-05-18 21:04:01 +0000 |
commit | dcc772f1c332e526018dce7b652fa4002669cc1d (patch) | |
tree | e24a79221e2cd9e1ed11be25b4d0597af35ca149 /xen | |
parent | 9083a541fd10e818db2ca8766ec2fda6183b5ca6 (diff) | |
download | xen-dcc772f1c332e526018dce7b652fa4002669cc1d.tar.gz xen-dcc772f1c332e526018dce7b652fa4002669cc1d.tar.bz2 xen-dcc772f1c332e526018dce7b652fa4002669cc1d.zip |
bitkeeper revision 1.908 (40aa7a41_qzAxT0SBKFNAXKT6FF62g)
live migrate now works on SMP
Diffstat (limited to 'xen')
-rw-r--r-- | xen/arch/i386/smp.c | 2 | ||||
-rw-r--r-- | xen/arch/i386/traps.c | 1 | ||||
-rw-r--r-- | xen/common/domain.c | 10 | ||||
-rw-r--r-- | xen/common/shadow.c | 160 | ||||
-rw-r--r-- | xen/include/xen/shadow.h | 589 | ||||
-rw-r--r-- | xen/net/dev.c | 12 |
6 files changed, 398 insertions, 376 deletions
diff --git a/xen/arch/i386/smp.c b/xen/arch/i386/smp.c index 4989fc5085..363d61626e 100644 --- a/xen/arch/i386/smp.c +++ b/xen/arch/i386/smp.c @@ -212,7 +212,7 @@ static inline void send_IPI_allbutself(int vector) */ static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED; -static volatile unsigned long flush_cpumask; +volatile unsigned long flush_cpumask; asmlinkage void smp_invalidate_interrupt(void) { diff --git a/xen/arch/i386/traps.c b/xen/arch/i386/traps.c index 7250074420..d30c324804 100644 --- a/xen/arch/i386/traps.c +++ b/xen/arch/i386/traps.c @@ -167,7 +167,6 @@ void show_registers(struct pt_regs *regs) regs->xfs & 0xffff, regs->xgs & 0xffff, ss); show_stack(®s->esp); - show_trace(®s->esp); } diff --git a/xen/common/domain.c b/xen/common/domain.c index f952ce577e..ee11f20fcd 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -526,6 +526,16 @@ unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes) /* initialise to machine_to_phys_mapping table to likely pfn */ machine_to_phys_mapping[page-frame_table] = alloc_pfns; + +#ifndef NDEBUG + { + // initialise with magic marker if in DEBUG mode + void * a = map_domain_mem( (page-frame_table)<<PAGE_SHIFT ); + memset( a, 0x80 | (char) p->domain, PAGE_SIZE ); + unmap_domain_mem( a ); + } +#endif + } p->tot_pages = nr_pages; diff --git a/xen/common/shadow.c b/xen/common/shadow.c index 44945556e1..216c3deda1 100644 --- a/xen/common/shadow.c +++ b/xen/common/shadow.c @@ -28,7 +28,19 @@ hypercall lock anyhow (at least initially). ********/ -static spinlock_t cpu_stall_lock; + +/** + +FIXME: + +1. Flush needs to avoid blowing away the L2 page that another CPU may be using! + +fix using cpu_raise_softirq + +have a flag to count in, (after switching to init's PTs) +spinlock, reload cr3_shadow, unlock + +**/ static inline void free_shadow_page( struct mm_struct *m, struct pfn_info *pfn_info ) @@ -115,7 +127,7 @@ static inline int shadow_page_op( struct mm_struct *m, unsigned int op, for (i=0;i<ENTRIES_PER_L1_PAGETABLE;i++) { - if ( spl1e[i] & _PAGE_RW ) + if ( (spl1e[i] & _PAGE_PRESENT ) && (spl1e[i] & _PAGE_RW) ) { work++; spl1e[i] &= ~_PAGE_RW; @@ -124,6 +136,8 @@ static inline int shadow_page_op( struct mm_struct *m, unsigned int op, unmap_domain_mem( spl1e ); } } + break; + } return work; } @@ -161,7 +175,6 @@ static void __scan_shadow_table( struct mm_struct *m, unsigned int op ) void shadow_mode_init(void) { - spin_lock_init( &cpu_stall_lock ); } int shadow_mode_enable( struct task_struct *p, unsigned int mode ) @@ -184,9 +197,9 @@ int shadow_mode_enable( struct task_struct *p, unsigned int mode ) // allocate space for first lot of extra nodes m->shadow_ht_extras = kmalloc( sizeof(void*) + - (shadow_ht_extra_size * - sizeof(struct shadow_status)), - GFP_KERNEL ); + (shadow_ht_extra_size * + sizeof(struct shadow_status)), + GFP_KERNEL ); if( ! m->shadow_ht_extras ) goto nomem; @@ -225,7 +238,7 @@ int shadow_mode_enable( struct task_struct *p, unsigned int mode ) __shadow_mk_pagetable( m ); return 0; - nomem: +nomem: return -ENOMEM; } @@ -263,7 +276,7 @@ void shadow_mode_disable( struct task_struct *p ) } static int shadow_mode_table_op( struct task_struct *p, - dom0_shadow_control_t *sc ) + dom0_shadow_control_t *sc ) { unsigned int op = sc->op; struct mm_struct *m = &p->mm; @@ -273,6 +286,8 @@ static int shadow_mode_table_op( struct task_struct *p, // tables right now. Calling flush on yourself would be really // stupid. + ASSERT(spin_is_locked(&p->mm.shadow_lock)); + if ( m == ¤t->mm ) { printk("Don't try and flush your own page tables!\n"); @@ -291,48 +306,49 @@ static int shadow_mode_table_op( struct task_struct *p, case DOM0_SHADOW_CONTROL_OP_CLEAN: { - int i,j,zero=1; + int i,j,zero=1; - __scan_shadow_table( m, op ); + __scan_shadow_table( m, op ); + // __free_shadow_table( m ); - if( p->tot_pages > sc->pages || - !sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap ) - { - rc = -EINVAL; - goto out; - } + if( p->tot_pages > sc->pages || + !sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap ) + { + rc = -EINVAL; + goto out; + } - sc->pages = p->tot_pages; + sc->pages = p->tot_pages; #define chunk (8*1024) // do this in 1KB chunks for L1 cache - for(i=0;i<p->tot_pages;i+=chunk) - { - int bytes = (( ((p->tot_pages-i) > (chunk))? - (chunk):(p->tot_pages-i) ) + 7) / 8; + for(i=0;i<p->tot_pages;i+=chunk) + { + int bytes = (( ((p->tot_pages-i) > (chunk))? + (chunk):(p->tot_pages-i) ) + 7) / 8; - copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))), - p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))), - bytes ); + copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))), + p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))), + bytes ); - for(j=0; zero && j<bytes/sizeof(unsigned long);j++) - { - if( p->mm.shadow_dirty_bitmap[j] != 0 ) - zero = 0; - } - - memset( p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))), - 0, bytes); - } + for(j=0; zero && j<bytes/sizeof(unsigned long);j++) + { + if( p->mm.shadow_dirty_bitmap[j] != 0 ) + zero = 0; + } - if (zero) - { - /* might as well stop the domain as an optimization. */ - if ( p->state != TASK_STOPPED ) - send_guest_virq(p, VIRQ_STOP); - } - - break; + memset( p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))), + 0, bytes); + } + + if (zero) + { + /* might as well stop the domain as an optimization. */ + if ( p->state != TASK_STOPPED ) + send_guest_virq(p, VIRQ_STOP); + } + + break; } } @@ -352,50 +368,10 @@ out: int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc ) { unsigned int cmd = sc->op; - int rc = 0, cpu; - - // don't call if already shadowed... - - /* The following is pretty hideous because we don't have a way of - synchronously pausing a domain. If it's assigned to the curernt CPU, - we don't have to worry -- it can't possibly actually be running. - If its on another CPU, for the moment, we do something really gross: - we cause the other CPU to spin regardless of what domain it is running. - - I know this is really grim, but it only lasts a few 10's of - microseconds. It needs fixing as soon as the last of the Linux-isms - get removed from the task structure... - - Oh, and let's hope someone doesn't repin the CPU while we're here. - Also, prey someone else doesn't do this in another domain. - At least there's only one dom0 at the moment... - - */ + int rc = 0; -printk("XXX\n"); spin_lock(&p->mm.shadow_lock); -printk("SMC irq=%d\n",local_irq_is_enabled()); - spin_lock( &cpu_stall_lock ); - cpu = p->processor; -printk("got target cpu=%d this cpu=%d\n",cpu, current->processor ); - if ( cpu != current->processor ) - { - static void cpu_stall(void * data) - { - if ( current->processor == (int) data ) - { - printk("Stall cpu=%d is locked %d irq=%d\n",(int)data,spin_is_locked(&cpu_stall_lock),local_irq_is_enabled()); - spin_lock( &cpu_stall_lock ); - printk("release\n"); - spin_unlock( &cpu_stall_lock ); - } - } -printk("before\n"); - smp_call_function(cpu_stall, (void*)cpu, 1, 0); // don't wait! -printk("after\n"); - } - if ( p->mm.shadow_mode && cmd == DOM0_SHADOW_CONTROL_OP_OFF ) { shadow_mode_disable(p); @@ -412,18 +388,15 @@ printk("after\n"); } else if ( p->mm.shadow_mode && cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH && cmd<=DOM0_SHADOW_CONTROL_OP_CLEAN ) { -printk("+"); rc = shadow_mode_table_op(p, sc); -printk("="); } else { rc = -EINVAL; } - spin_unlock( &cpu_stall_lock ); -printk("SMC- %d\n",rc); - + flush_tlb_cpu(p->processor); + spin_unlock(&p->mm.shadow_lock); return rc; @@ -549,8 +522,6 @@ int shadow_fault( unsigned long va, long error_code ) unsigned long gpte, spte; struct mm_struct *m = ¤t->mm; - // we know interrupts are always on entry to the page fault handler - SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code ); check_pagetable( current, current->mm.pagetable, "pre-sf" ); @@ -573,9 +544,18 @@ int shadow_fault( unsigned long va, long error_code ) return 0; } - spin_lock(¤t->mm.shadow_lock); // take the lock and reread gpte + while( unlikely(!spin_trylock(¤t->mm.shadow_lock)) ) + { + extern volatile unsigned long flush_cpumask; + if ( test_and_clear_bit(smp_processor_id(), &flush_cpumask) ) + local_flush_tlb(); + rep_nop(); + } + + ASSERT(spin_is_locked(¤t->mm.shadow_lock)); + if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) ) { SH_VVLOG("shadow_fault - EXIT: read gpte faulted" ); diff --git a/xen/include/xen/shadow.h b/xen/include/xen/shadow.h index 587f9178bd..f1ce8b6689 100644 --- a/xen/include/xen/shadow.h +++ b/xen/include/xen/shadow.h @@ -27,14 +27,14 @@ extern void shadow_mode_init(void); extern int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc ); extern int shadow_fault( unsigned long va, long error_code ); extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte, - unsigned long *prev_spfn_ptr, - l1_pgentry_t **prev_spl1e_ptr ); + unsigned long *prev_spfn_ptr, + l1_pgentry_t **prev_spl1e_ptr ); extern void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte ); extern void unshadow_table( unsigned long gpfn, unsigned int type ); extern int shadow_mode_enable( struct task_struct *p, unsigned int mode ); extern void shadow_mode_disable( struct task_struct *p ); extern unsigned long shadow_l2_table( - struct mm_struct *m, unsigned long gpfn ); + struct mm_struct *m, unsigned long gpfn ); #define SHADOW_DEBUG 0 #define SHADOW_HASH_DEBUG 0 @@ -51,24 +51,24 @@ struct shadow_status { #ifndef NDEBUG #define SH_LOG(_f, _a...) \ - printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \ - current->domain , __LINE__ , ## _a ) +printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \ + current->domain , __LINE__ , ## _a ) #else #define SH_LOG(_f, _a...) #endif #if SHADOW_DEBUG #define SH_VLOG(_f, _a...) \ - printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \ - current->domain , __LINE__ , ## _a ) + printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \ + current->domain , __LINE__ , ## _a ) #else #define SH_VLOG(_f, _a...) #endif #if 0 #define SH_VVLOG(_f, _a...) \ - printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \ - current->domain , __LINE__ , ## _a ) + printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \ + current->domain , __LINE__ , ## _a ) #else #define SH_VVLOG(_f, _a...) #endif @@ -76,46 +76,57 @@ struct shadow_status { /************************************************************************/ -static inline void mark_dirty( struct mm_struct *m, unsigned int mfn ) + static inline void __mark_dirty( struct mm_struct *m, unsigned int mfn ) { - unsigned int pfn; + unsigned int pfn; - pfn = machine_to_phys_mapping[mfn]; + ASSERT(spin_is_locked(&m->shadow_lock)); + + //printk("%08x %08lx\n", mfn, machine_to_phys_mapping[mfn] ); - /* We use values with the top bit set to mark MFNs that aren't - really part of the domain's psuedo-physical memory map e.g. - the shared info frame. Nothing to do here... - */ - if ( unlikely(pfn & 0x80000000U) ) return; + pfn = machine_to_phys_mapping[mfn]; - ASSERT(m->shadow_dirty_bitmap); - if( likely(pfn<m->shadow_dirty_bitmap_size) ) - { - /* use setbit to be smp guest safe. Since the same page is likely to - get marked dirty many times, examine the bit first before doing the - expensive lock-prefixed opertion */ + /* We use values with the top bit set to mark MFNs that aren't + really part of the domain's psuedo-physical memory map e.g. + the shared info frame. Nothing to do here... + */ + if ( unlikely(pfn & 0x80000000U) ) return; - if (! test_bit( pfn, m->shadow_dirty_bitmap ) ) - set_bit( pfn, m->shadow_dirty_bitmap ); - } - else - { - extern void show_traceX(void); - SH_LOG("mark_dirty OOR! mfn=%x pfn=%x max=%x (mm %p)", - mfn, pfn, m->shadow_dirty_bitmap_size, m ); - SH_LOG("dom=%lld caf=%08x taf=%08x\n", - frame_table[mfn].u.domain->domain, - frame_table[mfn].count_and_flags, - frame_table[mfn].type_and_flags ); - //show_traceX(); - } + ASSERT(m->shadow_dirty_bitmap); + if( likely(pfn<m->shadow_dirty_bitmap_size) ) + { + /* These updates occur with mm.shadow_lock held */ + __set_bit( pfn, m->shadow_dirty_bitmap ); + } + else + { + extern void show_traceX(void); + SH_LOG("mark_dirty OOR! mfn=%x pfn=%x max=%x (mm %p)", + mfn, pfn, m->shadow_dirty_bitmap_size, m ); + SH_LOG("dom=%lld caf=%08x taf=%08x\n", + frame_table[mfn].u.domain->domain, + frame_table[mfn].count_and_flags, + frame_table[mfn].type_and_flags ); + //show_traceX(); + } } + +static inline void mark_dirty( struct mm_struct *m, unsigned int mfn ) +{ + ASSERT(local_irq_is_enabled()); + //if(spin_is_locked(&m->shadow_lock)) printk("+"); + spin_lock(&m->shadow_lock); + __mark_dirty( m, mfn ); + spin_unlock(&m->shadow_lock); +} + + /************************************************************************/ static inline void l1pte_write_fault( struct mm_struct *m, - unsigned long *gpte_p, unsigned long *spte_p ) + unsigned long *gpte_p, unsigned long *spte_p ) { unsigned long gpte = *gpte_p; unsigned long spte = *spte_p; @@ -123,17 +134,17 @@ static inline void l1pte_write_fault( struct mm_struct *m, switch( m->shadow_mode ) { case SHM_test: - spte = gpte; - gpte |= _PAGE_DIRTY | _PAGE_ACCESSED; - spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; - break; + spte = gpte; + gpte |= _PAGE_DIRTY | _PAGE_ACCESSED; + spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; + break; case SHM_logdirty: - spte = gpte; - gpte |= _PAGE_DIRTY | _PAGE_ACCESSED; - spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; - mark_dirty( m, (gpte >> PAGE_SHIFT) ); - break; + spte = gpte; + gpte |= _PAGE_DIRTY | _PAGE_ACCESSED; + spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; + __mark_dirty( m, (gpte >> PAGE_SHIFT) ); + break; } *gpte_p = gpte; @@ -141,7 +152,7 @@ static inline void l1pte_write_fault( struct mm_struct *m, } static inline void l1pte_read_fault( struct mm_struct *m, - unsigned long *gpte_p, unsigned long *spte_p ) + unsigned long *gpte_p, unsigned long *spte_p ) { unsigned long gpte = *gpte_p; unsigned long spte = *spte_p; @@ -149,19 +160,19 @@ static inline void l1pte_read_fault( struct mm_struct *m, switch( m->shadow_mode ) { case SHM_test: - spte = gpte; - gpte |= _PAGE_ACCESSED; - spte |= _PAGE_ACCESSED; - if ( ! (gpte & _PAGE_DIRTY ) ) - spte &= ~ _PAGE_RW; - break; + spte = gpte; + gpte |= _PAGE_ACCESSED; + spte |= _PAGE_ACCESSED; + if ( ! (gpte & _PAGE_DIRTY ) ) + spte &= ~ _PAGE_RW; + break; case SHM_logdirty: - spte = gpte; - gpte |= _PAGE_ACCESSED; - spte |= _PAGE_ACCESSED; - spte &= ~ _PAGE_RW; - break; + spte = gpte; + gpte |= _PAGE_ACCESSED; + spte |= _PAGE_ACCESSED; + spte &= ~ _PAGE_RW; + break; } *gpte_p = gpte; @@ -169,7 +180,7 @@ static inline void l1pte_read_fault( struct mm_struct *m, } static inline void l1pte_no_fault( struct mm_struct *m, - unsigned long *gpte_p, unsigned long *spte_p ) + unsigned long *gpte_p, unsigned long *spte_p ) { unsigned long gpte = *gpte_p; unsigned long spte = *spte_p; @@ -177,26 +188,26 @@ static inline void l1pte_no_fault( struct mm_struct *m, switch( m->shadow_mode ) { case SHM_test: - spte = 0; - if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == - (_PAGE_PRESENT|_PAGE_ACCESSED) ) - { - spte = gpte; - if ( ! (gpte & _PAGE_DIRTY ) ) - spte &= ~ _PAGE_RW; - } - break; + spte = 0; + if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == + (_PAGE_PRESENT|_PAGE_ACCESSED) ) + { + spte = gpte; + if ( ! (gpte & _PAGE_DIRTY ) ) + spte &= ~ _PAGE_RW; + } + break; case SHM_logdirty: - spte = 0; - if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == - (_PAGE_PRESENT|_PAGE_ACCESSED) ) - { - spte = gpte; - spte &= ~ _PAGE_RW; - } - - break; + spte = 0; + if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == + (_PAGE_PRESENT|_PAGE_ACCESSED) ) + { + spte = gpte; + spte &= ~ _PAGE_RW; + } + + break; } *gpte_p = gpte; @@ -204,27 +215,27 @@ static inline void l1pte_no_fault( struct mm_struct *m, } static inline void l2pde_general( struct mm_struct *m, - unsigned long *gpde_p, unsigned long *spde_p, - unsigned long sl1pfn) + unsigned long *gpde_p, unsigned long *spde_p, + unsigned long sl1pfn) { unsigned long gpde = *gpde_p; unsigned long spde = *spde_p; - spde = 0; + spde = 0; - if ( sl1pfn ) - { - spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) | - _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY; - gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY; - - if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gpde & PAGE_MASK) ) ) - { - // detect linear map, and keep pointing at guest - SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn); - spde = gpde & ~_PAGE_RW; - } + if ( sl1pfn ) + { + spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) | + _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY; + gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY; + + if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gpde & PAGE_MASK) ) ) + { + // detect linear map, and keep pointing at guest + SH_VLOG("4c: linear mapping ( %08lx )",sl1pfn); + spde = gpde & ~_PAGE_RW; } + } *gpde_p = gpde; *spde_p = spde; @@ -237,45 +248,45 @@ static inline void l2pde_general( struct mm_struct *m, #if SHADOW_HASH_DEBUG static void shadow_audit(struct mm_struct *m, int print) { - int live=0, free=0, j=0, abs; - struct shadow_status *a; + int live=0, free=0, j=0, abs; + struct shadow_status *a; for(j=0;j<shadow_ht_buckets;j++) { a = &m->shadow_ht[j]; - if(a->pfn){live++; ASSERT(a->spfn_and_flags&PSH_pfn_mask);} - ASSERT((a->pfn&0xf0000000)==0); - ASSERT(a->pfn<0x00100000); - a=a->next; + if(a->pfn){live++; ASSERT(a->spfn_and_flags&PSH_pfn_mask);} + ASSERT((a->pfn&0xf0000000)==0); + ASSERT(a->pfn<0x00100000); + a=a->next; while(a && live<9999) - { - live++; - if(a->pfn == 0 || a->spfn_and_flags == 0) - { - printk("XXX live=%d pfn=%08lx sp=%08lx next=%p\n", - live, a->pfn, a->spfn_and_flags, a->next); - BUG(); - } - ASSERT(a->pfn); - ASSERT((a->pfn&0xf0000000)==0); - ASSERT(a->pfn<0x00100000); - ASSERT(a->spfn_and_flags&PSH_pfn_mask); - a=a->next; - } - ASSERT(live<9999); + { + live++; + if(a->pfn == 0 || a->spfn_and_flags == 0) + { + printk("XXX live=%d pfn=%08lx sp=%08lx next=%p\n", + live, a->pfn, a->spfn_and_flags, a->next); + BUG(); + } + ASSERT(a->pfn); + ASSERT((a->pfn&0xf0000000)==0); + ASSERT(a->pfn<0x00100000); + ASSERT(a->spfn_and_flags&PSH_pfn_mask); + a=a->next; } + ASSERT(live<9999); + } a = m->shadow_ht_free; while(a) { free++; a=a->next; } if(print) printk("Xlive=%d free=%d\n",live,free); - abs=(perfc_value(shadow_l1_pages)+perfc_value(shadow_l2_pages))-live; - if( abs < -1 || abs > 1 ) - { - printk("live=%d free=%d l1=%d l2=%d\n",live,free, - perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) ); - BUG(); + abs=(perfc_value(shadow_l1_pages)+perfc_value(shadow_l2_pages))-live; + if( abs < -1 || abs > 1 ) + { + printk("live=%d free=%d l1=%d l2=%d\n",live,free, + perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages) ); + BUG(); } } @@ -287,56 +298,56 @@ static void shadow_audit(struct mm_struct *m, int print) static inline struct shadow_status* hash_bucket( struct mm_struct *m, - unsigned int gpfn ) + unsigned int gpfn ) { return &(m->shadow_ht[gpfn % shadow_ht_buckets]); } static inline unsigned long __shadow_status( struct mm_struct *m, - unsigned int gpfn ) + unsigned int gpfn ) { - struct shadow_status **ob, *b, *B = hash_bucket( m, gpfn ); + struct shadow_status **ob, *b, *B = hash_bucket( m, gpfn ); b = B; ob = NULL; - SH_VVLOG("lookup gpfn=%08x bucket=%p", gpfn, b ); - shadow_audit(m,0); // if in debug mode + SH_VVLOG("lookup gpfn=%08x bucket=%p", gpfn, b ); + shadow_audit(m,0); // if in debug mode - do + do + { + if ( b->pfn == gpfn ) { - if ( b->pfn == gpfn ) - { - unsigned long t; - struct shadow_status *x; - - // swap with head - t=B->pfn; B->pfn=b->pfn; b->pfn=t; - t=B->spfn_and_flags; B->spfn_and_flags=b->spfn_and_flags; - b->spfn_and_flags=t; - - if(ob) - { // pull to front - *ob=b->next; - x=B->next; - B->next=b; - b->next=x; - } - return B->spfn_and_flags; - } + unsigned long t; + struct shadow_status *x; + + // swap with head + t=B->pfn; B->pfn=b->pfn; b->pfn=t; + t=B->spfn_and_flags; B->spfn_and_flags=b->spfn_and_flags; + b->spfn_and_flags=t; + + if(ob) + { // pull to front + *ob=b->next; + x=B->next; + B->next=b; + b->next=x; + } + return B->spfn_and_flags; + } #if SHADOW_HASH_DEBUG - else - { - if(b!=B)ASSERT(b->pfn); - } -#endif - ob=&b->next; - b=b->next; + else + { + if(b!=B)ASSERT(b->pfn); } - while (b); +#endif + ob=&b->next; + b=b->next; + } + while (b); - return 0; + return 0; } /* we can make this locking more fine grained e.g. per shadow page if it @@ -344,112 +355,119 @@ ever becomes a problem, but since we need a spin lock on the hash table anyway its probably not worth being too clever. */ static inline unsigned long get_shadow_status( struct mm_struct *m, - unsigned int gpfn ) + unsigned int gpfn ) { - unsigned long res; - - /* If we get here, we know that this domain is running in shadow mode. - We also know that some sort of update has happened to the underlying - page table page: either a PTE has been updated, or the page has - changed type. If we're in log dirty mode, we should set the approrpiate - bit in the dirty bitmap. - NB: the VA update path doesn't use this so needs to be handled - independnetly. - */ - - if( m->shadow_mode == SHM_logdirty ) - mark_dirty( m, gpfn ); + unsigned long res; + + /* If we get here, we know that this domain is running in shadow mode. + We also know that some sort of update has happened to the underlying + page table page: either a PTE has been updated, or the page has + changed type. If we're in log dirty mode, we should set the approrpiate + bit in the dirty bitmap. + NB: the VA update path doesn't use this so needs to be handled + independnetly. + */ + + ASSERT(local_irq_is_enabled()); + //if(spin_is_locked(&m->shadow_lock)) printk("*"); + spin_lock(&m->shadow_lock); + + if( m->shadow_mode == SHM_logdirty ) + __mark_dirty( m, gpfn ); - spin_lock(&m->shadow_lock); - res = __shadow_status( m, gpfn ); - if (!res) spin_unlock(&m->shadow_lock); - return res; + res = __shadow_status( m, gpfn ); + if (!res) spin_unlock(&m->shadow_lock); + return res; } static inline void put_shadow_status( struct mm_struct *m ) { - spin_unlock(&m->shadow_lock); + spin_unlock(&m->shadow_lock); } static inline void delete_shadow_status( struct mm_struct *m, - unsigned int gpfn ) + unsigned int gpfn ) { - struct shadow_status *b, *B, **ob; + struct shadow_status *b, *B, **ob; - B = b = hash_bucket( m, gpfn ); + ASSERT(spin_is_locked(&m->shadow_lock)); - SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b ); - shadow_audit(m,0); - ASSERT(gpfn); + B = b = hash_bucket( m, gpfn ); + + SH_VVLOG("delete gpfn=%08x bucket=%p", gpfn, b ); + shadow_audit(m,0); + ASSERT(gpfn); - if( b->pfn == gpfn ) + if( b->pfn == gpfn ) { - if (b->next) - { - struct shadow_status *D=b->next; - b->spfn_and_flags = b->next->spfn_and_flags; - b->pfn = b->next->pfn; - - b->next = b->next->next; - D->next = m->shadow_ht_free; - D->pfn = 0; - D->spfn_and_flags = 0; - m->shadow_ht_free = D; - } - else - { - b->pfn = 0; - b->spfn_and_flags = 0; - } + if (b->next) + { + struct shadow_status *D=b->next; + b->spfn_and_flags = b->next->spfn_and_flags; + b->pfn = b->next->pfn; + + b->next = b->next->next; + D->next = m->shadow_ht_free; + D->pfn = 0; + D->spfn_and_flags = 0; + m->shadow_ht_free = D; + } + else + { + b->pfn = 0; + b->spfn_and_flags = 0; + } #if SHADOW_HASH_DEBUG - if( __shadow_status(m,gpfn) ) BUG(); - shadow_audit(m,0); + if( __shadow_status(m,gpfn) ) BUG(); + shadow_audit(m,0); #endif - return; + return; } - ob = &b->next; - b=b->next; + ob = &b->next; + b=b->next; - do + do + { + if ( b->pfn == gpfn ) { - if ( b->pfn == gpfn ) - { - b->pfn = 0; - b->spfn_and_flags = 0; + b->pfn = 0; + b->spfn_and_flags = 0; - // b is in the list - *ob=b->next; - b->next = m->shadow_ht_free; - m->shadow_ht_free = b; + // b is in the list + *ob=b->next; + b->next = m->shadow_ht_free; + m->shadow_ht_free = b; #if SHADOW_HASH_DEBUG - if( __shadow_status(m,gpfn) ) BUG(); + if( __shadow_status(m,gpfn) ) BUG(); #endif - shadow_audit(m,0); - return; - } - - ob = &b->next; - b=b->next; + shadow_audit(m,0); + return; } - while (b); - // if we got here, it wasn't in the list + ob = &b->next; + b=b->next; + } + while (b); + + // if we got here, it wasn't in the list BUG(); } static inline void set_shadow_status( struct mm_struct *m, - unsigned int gpfn, unsigned long s ) + unsigned int gpfn, unsigned long s ) { - struct shadow_status *b, *B, *extra, **fptr; + struct shadow_status *b, *B, *extra, **fptr; int i; - B = b = hash_bucket( m, gpfn ); + ASSERT(spin_is_locked(&m->shadow_lock)); + + B = b = hash_bucket( m, gpfn ); ASSERT(gpfn); //ASSERT(s); @@ -458,106 +476,107 @@ static inline void set_shadow_status( struct mm_struct *m, shadow_audit(m,0); - do + do + { + if ( b->pfn == gpfn ) { - if ( b->pfn == gpfn ) - { - b->spfn_and_flags = s; - shadow_audit(m,0); - return; - } - - b=b->next; + b->spfn_and_flags = s; + shadow_audit(m,0); + return; } - while (b); - // if we got here, this is an insert rather than update + b=b->next; + } + while (b); + + // if we got here, this is an insert rather than update ASSERT( s ); // deletes must have succeeded by here if ( B->pfn == 0 ) - { - // we can use this head - ASSERT( B->next == 0 ); - B->pfn = gpfn; - B->spfn_and_flags = s; - shadow_audit(m,0); - return; - } + { + // we can use this head + ASSERT( B->next == 0 ); + B->pfn = gpfn; + B->spfn_and_flags = s; + shadow_audit(m,0); + return; + } if( unlikely(m->shadow_ht_free == NULL) ) { - SH_LOG("allocate more shadow hashtable blocks"); + SH_LOG("allocate more shadow hashtable blocks"); - // we need to allocate more space - extra = kmalloc( sizeof(void*) + (shadow_ht_extra_size * - sizeof(struct shadow_status)), GFP_KERNEL ); + // we need to allocate more space + extra = kmalloc( sizeof(void*) + (shadow_ht_extra_size * + sizeof(struct shadow_status)), GFP_KERNEL ); - if( ! extra ) BUG(); // should be more graceful here.... + if( ! extra ) BUG(); // should be more graceful here.... - memset( extra, 0, sizeof(void*) + (shadow_ht_extra_size * - sizeof(struct shadow_status)) ); + memset( extra, 0, sizeof(void*) + (shadow_ht_extra_size * + sizeof(struct shadow_status)) ); - m->shadow_extras_count++; + m->shadow_extras_count++; - // add extras to free list - fptr = &m->shadow_ht_free; - for ( i=0; i<shadow_ht_extra_size; i++ ) - { - *fptr = &extra[i]; - fptr = &(extra[i].next); - } - *fptr = NULL; + // add extras to free list + fptr = &m->shadow_ht_free; + for ( i=0; i<shadow_ht_extra_size; i++ ) + { + *fptr = &extra[i]; + fptr = &(extra[i].next); + } + *fptr = NULL; - *((struct shadow_status ** ) &extra[shadow_ht_extra_size]) = - m->shadow_ht_extras; - m->shadow_ht_extras = extra; + *((struct shadow_status ** ) &extra[shadow_ht_extra_size]) = + m->shadow_ht_extras; + m->shadow_ht_extras = extra; } - // should really put this in B to go right to front - b = m->shadow_ht_free; + // should really put this in B to go right to front + b = m->shadow_ht_free; m->shadow_ht_free = b->next; b->spfn_and_flags = s; - b->pfn = gpfn; - b->next = B->next; - B->next = b; + b->pfn = gpfn; + b->next = B->next; + B->next = b; - shadow_audit(m,0); + shadow_audit(m,0); - return; + return; } static inline void __shadow_mk_pagetable( struct mm_struct *mm ) { - unsigned long gpfn, spfn=0; + unsigned long gpfn, spfn=0; - gpfn = pagetable_val(mm->pagetable) >> PAGE_SHIFT; + gpfn = pagetable_val(mm->pagetable) >> PAGE_SHIFT; - if ( unlikely((spfn=__shadow_status(mm, gpfn)) == 0 ) ) - { - spfn = shadow_l2_table(mm, gpfn ); - } - mm->shadow_table = mk_pagetable(spfn<<PAGE_SHIFT); + if ( unlikely((spfn=__shadow_status(mm, gpfn)) == 0 ) ) + { + spfn = shadow_l2_table(mm, gpfn ); + } + mm->shadow_table = mk_pagetable(spfn<<PAGE_SHIFT); } static inline void shadow_mk_pagetable( struct mm_struct *mm ) { - SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )", - pagetable_val(mm->pagetable), mm->shadow_mode ); + SH_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )", + pagetable_val(mm->pagetable), mm->shadow_mode ); - if ( unlikely(mm->shadow_mode) ) - { + if ( unlikely(mm->shadow_mode) ) + { + ASSERT(local_irq_is_enabled()); spin_lock(&mm->shadow_lock); - __shadow_mk_pagetable( mm ); + __shadow_mk_pagetable( mm ); spin_unlock(&mm->shadow_lock); - } + } - SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d ) sh=%08lx", - pagetable_val(mm->pagetable), mm->shadow_mode, - pagetable_val(mm->shadow_table) ); + SH_VVLOG("leaving shadow_mk_pagetable( gptbase=%08lx, mode=%d ) sh=%08lx", + pagetable_val(mm->pagetable), mm->shadow_mode, + pagetable_val(mm->shadow_table) ); } @@ -570,3 +589,5 @@ extern int check_pagetable(struct mm_struct *m, pagetable_t pt, char *s); #endif /* XEN_SHADOW_H */ + + diff --git a/xen/net/dev.c b/xen/net/dev.c index 0252568131..909e586b53 100644 --- a/xen/net/dev.c +++ b/xen/net/dev.c @@ -2267,7 +2267,19 @@ long flush_bufs_for_vif(net_vif_t *vif) /* if in shadow mode, mark the PTE as dirty */ if( p->mm.shadow_mode == SHM_logdirty ) + { mark_dirty( &p->mm, rx->pte_ptr>>PAGE_SHIFT ); +#if 0 + mark_dirty( &p->mm, rx->buf_pfn ); // XXXXXXX debug + + { + unsigned long * p = map_domain_mem( rx->buf_pfn<<PAGE_SHIFT ); + p[2] = 0xdeadc001; + unmap_domain_mem(p); + } +#endif + + } /* assume the shadow page table is about to be blown away, and that its not worth marking the buffer as dirty */ |