aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2004-04-14 16:15:47 +0000
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2004-04-14 16:15:47 +0000
commit0a4286ffcd388d268719a7b05cdf4276ff546169 (patch)
tree5daef9acf12b8dc464371ac81e353c19960296d7
parent667d6bf256f7a99223118e58c8bfee4703688b8f (diff)
downloadxen-0a4286ffcd388d268719a7b05cdf4276ff546169.tar.gz
xen-0a4286ffcd388d268719a7b05cdf4276ff546169.tar.bz2
xen-0a4286ffcd388d268719a7b05cdf4276ff546169.zip
bitkeeper revision 1.864 (407d63b3Kv7jrCnhzfWFt1VQd3vpFQ)
More TLB-flush fixes.
-rw-r--r--xen/arch/i386/flushtlb.c20
-rw-r--r--xen/arch/i386/smp.c16
-rw-r--r--xen/common/memory.c18
-rw-r--r--xen/common/shadow.c529
-rw-r--r--xen/include/asm-i386/flushtlb.h36
-rw-r--r--xen/include/asm-x86_64/flushtlb.h36
-rw-r--r--xen/include/xen/mm.h24
-rw-r--r--xen/net/dev.c12
8 files changed, 352 insertions, 339 deletions
diff --git a/xen/arch/i386/flushtlb.c b/xen/arch/i386/flushtlb.c
index f247ccd4e8..9180454e4e 100644
--- a/xen/arch/i386/flushtlb.c
+++ b/xen/arch/i386/flushtlb.c
@@ -11,21 +11,18 @@
#include <xen/sched.h>
#include <asm/flushtlb.h>
-unsigned long tlbflush_mask;
-unsigned long tlbflush_clock;
-unsigned long tlbflush_time[NR_CPUS];
+u32 tlbflush_clock;
+u32 tlbflush_time[NR_CPUS];
static inline void tlb_clocktick(unsigned int cpu)
{
- unsigned long x, nx, y, ny;
-
- clear_bit(cpu, &tlbflush_mask);
+ u32 y, ny;
/* Tick the clock. 'y' contains the current time after the tick. */
ny = tlbflush_clock;
do {
#ifdef CONFIG_SMP
- if ( unlikely(((y = ny+1) & (GLOBAL_FLUSH_PERIOD - 1)) == 0) )
+ if ( unlikely(((y = ny+1) & TLBCLOCK_EPOCH_MASK) == 0) )
{
new_tlbflush_clock_period();
y = tlbflush_clock;
@@ -37,13 +34,8 @@ static inline void tlb_clocktick(unsigned int cpu)
}
while ( unlikely((ny = cmpxchg(&tlbflush_clock, y-1, y)) != y-1) );
- /* Update cpu's timestamp to current time, unless someone else beats us. */
- nx = tlbflush_time[cpu];
- do {
- if ( unlikely((x = nx) >= y) )
- break;
- }
- while ( unlikely((nx = cmpxchg(&tlbflush_time[cpu], x, y)) != x) );
+ /* Update cpu's timestamp to new time. */
+ tlbflush_time[cpu] = y;
}
void write_cr3_counted(unsigned long pa)
diff --git a/xen/arch/i386/smp.c b/xen/arch/i386/smp.c
index 0fe283ce09..5ed43d5551 100644
--- a/xen/arch/i386/smp.c
+++ b/xen/arch/i386/smp.c
@@ -284,10 +284,15 @@ void new_tlbflush_clock_period(void)
if ( unlikely(!spin_trylock(&synchronous_ipi_lock)) )
return;
- flush_cpumask = tlbflush_mask & ~(1 << smp_processor_id());
- if ( unlikely(flush_cpumask != 0) )
+ /* Someone may acquire the lock and execute the flush before us. */
+ if ( ((tlbflush_clock+1) & TLBCLOCK_EPOCH_MASK) != 0 )
+ goto out;
+
+ if ( smp_num_cpus > 1 )
{
- send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
+ /* Flush everyone else. We definitely flushed just before entry. */
+ flush_cpumask = ((1 << smp_num_cpus) - 1) & ~(1 << smp_processor_id());
+ send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
while ( flush_cpumask != 0 )
{
rep_nop();
@@ -295,11 +300,10 @@ void new_tlbflush_clock_period(void)
}
}
- /* No need for cmpxchg updates here: we are protected by tlbstate lock. */
- tlbflush_mask = (1 << smp_num_cpus) - 1;
- wmb(); /* Reset the mask before allowing the clock to continue ticking. */
+ /* No need for atomicity: we are the only possible updater. */
tlbflush_clock++;
+ out:
spin_unlock(&synchronous_ipi_lock);
}
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 1be614b28e..4b13f84fb3 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -153,7 +153,7 @@ static int alloc_l2_table(struct pfn_info *page);
static int alloc_l1_table(struct pfn_info *page);
static int get_page_from_pagenr(unsigned long page_nr, int check_level);
static int get_page_and_type_from_pagenr(unsigned long page_nr,
- unsigned int type,
+ u32 type,
int check_level);
#define CHECK_STRICT 0 /* Subject domain must own the page */
#define CHECK_ANYDOM 1 /* Any domain may own the page (if subject is priv.) */
@@ -299,7 +299,7 @@ static int get_page_from_pagenr(unsigned long page_nr, int check_level)
{
struct task_struct *p = current;
struct pfn_info *page = &frame_table[page_nr];
- unsigned long y, x, nx;
+ u32 y, x, nx;
if ( unlikely(!pfn_is_ram(page_nr)) )
{
@@ -345,7 +345,7 @@ static int get_page_from_pagenr(unsigned long page_nr, int check_level)
static int get_page_and_type_from_pagenr(unsigned long page_nr,
- unsigned int type,
+ u32 type,
int check_level)
{
struct pfn_info *page = &frame_table[page_nr];
@@ -355,7 +355,7 @@ static int get_page_and_type_from_pagenr(unsigned long page_nr,
if ( unlikely(!get_page_type(page, type)) )
{
- MEM_LOG("Bad page type for pfn %08lx (%08lx)",
+ MEM_LOG("Bad page type for pfn %08lx (%08x)",
page_nr, page->type_and_flags);
put_page(page);
return 0;
@@ -379,7 +379,7 @@ static int get_page_and_type_from_pagenr(unsigned long page_nr,
*/
static int get_linear_pagetable(l2_pgentry_t l2e, unsigned long pfn)
{
- unsigned long x, y;
+ u32 x, y;
struct pfn_info *page;
if ( (l2_pgentry_val(l2e) & _PAGE_RW) )
@@ -1207,7 +1207,7 @@ void __audit_page(unsigned long pfn) {
page = &frame_table[pfn];
page_addr = pfn << PAGE_SHIFT;
- printk("audit page: pfn=%lx info: cf=%lx tf=%lx ts=%lx dom=%lx\n", pfn,
+ printk("audit page: pfn=%lx info: cf=%x tf=%x ts=%x dom=%lx\n", pfn,
page->count_and_flags, page->type_and_flags,
page->tlbflush_timestamp, (unsigned long)page->u.domain);
@@ -1234,7 +1234,7 @@ void __audit_page(unsigned long pfn) {
continue;
if ( l1_pgentry_to_pagenr(l1e) == pfn )
{
- printk(" pte_pfn=%06lx cf=%08lx tf=%08lx dom=%08lx\n",
+ printk(" pte_pfn=%06lx cf=%08x tf=%08x dom=%08lx\n",
i, frame_table[i].count_and_flags,
frame_table[i].type_and_flags,
(unsigned long)frame_table[i].u.domain);
@@ -1311,7 +1311,7 @@ void audit_all_pages(u_char key, void *dev_id, struct pt_regs *regs)
if ( ((frame_table[i].count_and_flags & PGC_count_mask) != 0) &&
((frame_table[i].count_and_flags & PGC_zombie) != 0) )
{
- printk("zombie: pfn=%08lx cf=%08lx tf=%08lx dom=%08lx\n",
+ printk("zombie: pfn=%08lx cf=%08x tf=%08x dom=%08lx\n",
i, frame_table[i].count_and_flags,
frame_table[i].type_and_flags,
(unsigned long)frame_table[i].u.domain);
@@ -1356,7 +1356,7 @@ void audit_all_pages(u_char key, void *dev_id, struct pt_regs *regs)
if ( (frame_table[i].count_and_flags & PGC_count_mask)
!= ref_count )
{
- printk("refcount error: pfn=%06lx cf=%08lx refcount=%lx\n",
+ printk("refcount error: pfn=%06lx cf=%08x refcount=%lx\n",
i, frame_table[i].count_and_flags, ref_count);
__audit_page(i);
printk("\n");
diff --git a/xen/common/shadow.c b/xen/common/shadow.c
index 14d395cbf7..1144c0e65e 100644
--- a/xen/common/shadow.c
+++ b/xen/common/shadow.c
@@ -27,20 +27,20 @@ hypercall lock anyhow (at least initially).
********/
static inline void free_shadow_page( struct mm_struct *m,
- struct pfn_info *pfn_info )
+ struct pfn_info *pfn_info )
{
- unsigned long flags;
+ unsigned long flags;
unsigned long type = pfn_info->type_and_flags & PGT_type_mask;
m->shadow_page_count--;
if (type == PGT_l1_page_table)
- perfc_decr(shadow_l1_pages);
+ perfc_decr(shadow_l1_pages);
else if (type == PGT_l2_page_table)
- perfc_decr(shadow_l2_pages);
- else printk("Free shadow weird page type pfn=%08x type=%08lx\n",
- frame_table-pfn_info, pfn_info->type_and_flags);
-
+ perfc_decr(shadow_l2_pages);
+ else printk("Free shadow weird page type pfn=%08x type=%08x\n",
+ frame_table-pfn_info, pfn_info->type_and_flags);
+
pfn_info->type_and_flags = 0;
spin_lock_irqsave(&free_list_lock, flags);
@@ -53,7 +53,7 @@ static void __free_shadow_table( struct mm_struct *m )
{
int j, free=0;
struct shadow_status *a,*next;
-
+
// the code assumes you're not using the page tables i.e.
// the domain is stopped and cr3 is something else!!
@@ -62,39 +62,39 @@ static void __free_shadow_table( struct mm_struct *m )
shadow_audit(m,1);
for(j=0;j<shadow_ht_buckets;j++)
- {
- a = &m->shadow_ht[j];
- if (a->pfn)
- {
- free_shadow_page( m,
- &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
- a->pfn = 0;
- a->spfn_and_flags = 0;
- free++;
+ {
+ a = &m->shadow_ht[j];
+ if (a->pfn)
+ {
+ free_shadow_page( m,
+ &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
+ a->pfn = 0;
+ a->spfn_and_flags = 0;
+ free++;
+ }
+ next=a->next;
+ a->next=NULL;
+ a=next;
+ while(a)
+ {
+ struct shadow_status *next = a->next;
+
+ free_shadow_page( m,
+ &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
+ a->pfn = 0;
+ a->spfn_and_flags = 0;
+ free++;
+ a->next = m->shadow_ht_free;
+ m->shadow_ht_free = a;
+ a=next;
}
- next=a->next;
- a->next=NULL;
- a=next;
- while(a)
- {
- struct shadow_status *next = a->next;
-
- free_shadow_page( m,
- &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
- a->pfn = 0;
- a->spfn_and_flags = 0;
- free++;
- a->next = m->shadow_ht_free;
- m->shadow_ht_free = a;
- a=next;
- }
- shadow_audit(m,0);
+ shadow_audit(m,0);
}
SH_LOG("Free shadow table. Freed= %d",free);
}
static inline int shadow_page_op( struct mm_struct *m, unsigned int op,
- struct pfn_info *spfn_info )
+ struct pfn_info *spfn_info )
{
int work = 0;
unsigned int spfn = spfn_info-frame_table;
@@ -103,22 +103,22 @@ static inline int shadow_page_op( struct mm_struct *m, unsigned int op,
{
case DOM0_SHADOW_CONTROL_OP_CLEAN:
{
- int i;
- if ( (spfn_info->type_and_flags & PGT_type_mask) ==
- PGT_l1_page_table )
- {
- unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
-
- for (i=0;i<ENTRIES_PER_L1_PAGETABLE;i++)
- {
- if ( spl1e[i] & _PAGE_RW )
- {
- work++;
- spl1e[i] &= ~_PAGE_RW;
- }
- }
- unmap_domain_mem( spl1e );
- }
+ int i;
+ if ( (spfn_info->type_and_flags & PGT_type_mask) ==
+ PGT_l1_page_table )
+ {
+ unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
+
+ for (i=0;i<ENTRIES_PER_L1_PAGETABLE;i++)
+ {
+ if ( spl1e[i] & _PAGE_RW )
+ {
+ work++;
+ spl1e[i] &= ~_PAGE_RW;
+ }
+ }
+ unmap_domain_mem( spl1e );
+ }
}
}
return work;
@@ -127,7 +127,7 @@ static void __scan_shadow_table( struct mm_struct *m, unsigned int op )
{
int j, work=0;
struct shadow_status *a;
-
+
// the code assumes you're not using the page tables i.e.
// the domain is stopped and cr3 is something else!!
@@ -137,18 +137,18 @@ static void __scan_shadow_table( struct mm_struct *m, unsigned int op )
for(j=0;j<shadow_ht_buckets;j++)
{
- a = &m->shadow_ht[j];
- if (a->pfn)
+ a = &m->shadow_ht[j];
+ if (a->pfn)
{
- work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
+ work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
}
- a=a->next;
- while(a)
- {
- work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
- a=a->next;
- }
- shadow_audit(m,0);
+ a=a->next;
+ while(a)
+ {
+ work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] );
+ a=a->next;
+ }
+ shadow_audit(m,0);
}
SH_LOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
}
@@ -165,35 +165,35 @@ int shadow_mode_enable( struct task_struct *p, unsigned int mode )
spin_lock(&m->shadow_lock);
m->shadow_mode = mode;
-
+
// allocate hashtable
m->shadow_ht = kmalloc( shadow_ht_buckets *
- sizeof(struct shadow_status), GFP_KERNEL );
+ sizeof(struct shadow_status), GFP_KERNEL );
if( ! m->shadow_ht )
- goto nomem;
+ goto nomem;
memset( m->shadow_ht, 0, shadow_ht_buckets *
- sizeof(struct shadow_status) );
+ sizeof(struct shadow_status) );
// allocate space for first lot of extra nodes
m->shadow_ht_extras = kmalloc( sizeof(void*) + (shadow_ht_extra_size *
- sizeof(struct shadow_status)), GFP_KERNEL );
+ sizeof(struct shadow_status)), GFP_KERNEL );
if( ! m->shadow_ht_extras )
- goto nomem;
+ goto nomem;
memset( m->shadow_ht_extras, 0, sizeof(void*) + (shadow_ht_extra_size *
- sizeof(struct shadow_status)) );
+ sizeof(struct shadow_status)) );
m->shadow_extras_count++;
-
+
// add extras to free list
fptr = &m->shadow_ht_free;
for ( i=0; i<shadow_ht_extra_size; i++ )
{
- *fptr = &m->shadow_ht_extras[i];
- fptr = &(m->shadow_ht_extras[i].next);
+ *fptr = &m->shadow_ht_extras[i];
+ fptr = &(m->shadow_ht_extras[i].next);
}
*fptr = NULL;
*((struct shadow_status ** )
@@ -201,16 +201,16 @@ int shadow_mode_enable( struct task_struct *p, unsigned int mode )
if ( mode == SHM_logdirty )
{
- m->shadow_dirty_bitmap_size = (p->max_pages+63)&(~63);
- m->shadow_dirty_bitmap =
- kmalloc( m->shadow_dirty_bitmap_size/8, GFP_KERNEL );
-
- if( !m->shadow_dirty_bitmap )
- {
- m->shadow_dirty_bitmap_size = 0;
- goto nomem;
- }
- memset(m->shadow_dirty_bitmap,0,m->shadow_dirty_bitmap_size/8);
+ m->shadow_dirty_bitmap_size = (p->max_pages+63)&(~63);
+ m->shadow_dirty_bitmap =
+ kmalloc( m->shadow_dirty_bitmap_size/8, GFP_KERNEL );
+
+ if( !m->shadow_dirty_bitmap )
+ {
+ m->shadow_dirty_bitmap_size = 0;
+ goto nomem;
+ }
+ memset(m->shadow_dirty_bitmap,0,m->shadow_dirty_bitmap_size/8);
}
spin_unlock(&m->shadow_lock);
@@ -220,7 +220,7 @@ int shadow_mode_enable( struct task_struct *p, unsigned int mode )
return 0;
-nomem:
+ nomem:
spin_unlock(&m->shadow_lock);
return -ENOMEM;
}
@@ -236,24 +236,24 @@ void shadow_mode_disable( struct task_struct *p )
spin_unlock(&m->shadow_lock);
SH_LOG("freed tables count=%d l1=%d l2=%d",
- m->shadow_page_count, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
+ m->shadow_page_count, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages));
next = m->shadow_ht_extras;
while( next )
{
- struct shadow_status * this = next;
- m->shadow_extras_count--;
- next = *((struct shadow_status **)(&next[shadow_ht_extra_size]));
- kfree( this );
+ struct shadow_status * this = next;
+ m->shadow_extras_count--;
+ next = *((struct shadow_status **)(&next[shadow_ht_extra_size]));
+ kfree( this );
}
SH_LOG("freed extras, now %d", m->shadow_extras_count);
if( m->shadow_dirty_bitmap )
{
- kfree( m->shadow_dirty_bitmap );
- m->shadow_dirty_bitmap = 0;
- m->shadow_dirty_bitmap_size = 0;
+ kfree( m->shadow_dirty_bitmap );
+ m->shadow_dirty_bitmap = 0;
+ m->shadow_dirty_bitmap_size = 0;
}
// free the hashtable itself
@@ -270,8 +270,8 @@ static void shadow_mode_table_op( struct task_struct *p, unsigned int op )
if ( m == &current->mm )
{
- printk("Don't try and flush your own page tables!\n");
- return;
+ printk("Don't try and flush your own page tables!\n");
+ return;
}
@@ -284,14 +284,14 @@ static void shadow_mode_table_op( struct task_struct *p, unsigned int op )
switch(op)
{
case DOM0_SHADOW_CONTROL_OP_FLUSH:
- __free_shadow_table( m );
- break;
+ __free_shadow_table( m );
+ break;
case DOM0_SHADOW_CONTROL_OP_CLEAN:
- __scan_shadow_table( m, op );
- // we used to bzero dirty bitmap here, but now leave this to user space
- // if we were double buffering we'd do the flip here
- break;
+ __scan_shadow_table( m, op );
+ // we used to bzero dirty bitmap here, but now leave this to user space
+ // if we were double buffering we'd do the flip here
+ break;
}
spin_unlock(&m->shadow_lock);
@@ -315,29 +315,29 @@ int shadow_mode_control( struct task_struct *p, unsigned int op )
// sychronously stop domain
if( 0 && !(p->state & TASK_STOPPED) && !(p->state & TASK_PAUSED))
{
- printk("about to pause domain\n");
- sched_pause_sync(p);
- printk("paused domain\n");
- we_paused = 1;
+ printk("about to pause domain\n");
+ sched_pause_sync(p);
+ printk("paused domain\n");
+ we_paused = 1;
}
if ( p->mm.shadow_mode && op == DOM0_SHADOW_CONTROL_OP_OFF )
{
- shadow_mode_disable(p);
+ shadow_mode_disable(p);
}
else if ( op == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST )
{
- if(p->mm.shadow_mode) shadow_mode_disable(p);
- shadow_mode_enable(p, SHM_test);
- }
+ if(p->mm.shadow_mode) shadow_mode_disable(p);
+ shadow_mode_enable(p, SHM_test);
+ }
else if ( p->mm.shadow_mode && op >= DOM0_SHADOW_CONTROL_OP_FLUSH && op<=DOM0_SHADOW_CONTROL_OP_CLEAN )
{
- shadow_mode_table_op(p, op);
+ shadow_mode_table_op(p, op);
}
else
{
- if ( we_paused ) wake_up(p);
- return -EINVAL;
+ if ( we_paused ) wake_up(p);
+ return -EINVAL;
}
if ( we_paused ) wake_up(p);
@@ -359,8 +359,8 @@ void unshadow_table( unsigned long gpfn, unsigned int type )
unsigned long spfn;
SH_VLOG("unshadow_table type=%08x gpfn=%08lx",
- type,
- gpfn );
+ type,
+ gpfn );
perfc_incrc(unshadow_table_count);
@@ -404,21 +404,21 @@ unsigned long shadow_l2_table(
// mark pfn as being shadowed, update field to point at shadow
set_shadow_status(m, gpfn, spfn | PSH_shadowed);
-
+
// we need to do this before the linear map is set up
spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT);
// get hypervisor and 2x linear PT mapings installed
memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
+ &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
+ HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+ mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+ mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(frame_table[gpfn].u.domain->mm.perdomain_pt) |
- __PAGE_HYPERVISOR);
+ mk_l2_pgentry(__pa(frame_table[gpfn].u.domain->mm.perdomain_pt) |
+ __PAGE_HYPERVISOR);
// can't use the linear map as we may not be in the right PT
gpl2e = (l2_pgentry_t *) map_domain_mem(gpfn << PAGE_SHIFT);
@@ -426,24 +426,24 @@ unsigned long shadow_l2_table(
// proactively create entries for pages that are already shadowed
for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
{
- unsigned long spte = 0;
+ unsigned long spte = 0;
#if 0 // Turns out this doesn't really help
- unsigned long gpte;
+ unsigned long gpte;
- gpte = l2_pgentry_val(gpl2e[i]);
+ gpte = l2_pgentry_val(gpl2e[i]);
- if (gpte & _PAGE_PRESENT)
- {
- unsigned long s_sh =
- __shadow_status(p, gpte>>PAGE_SHIFT);
+ if (gpte & _PAGE_PRESENT)
+ {
+ unsigned long s_sh =
+ __shadow_status(p, gpte>>PAGE_SHIFT);
- l2pde_general( m, &gpte, &spte, s_sh );
+ l2pde_general( m, &gpte, &spte, s_sh );
- }
+ }
#endif
- spl2e[i] = mk_l2_pgentry( spte );
+ spl2e[i] = mk_l2_pgentry( spte );
}
@@ -470,20 +470,20 @@ int shadow_fault( unsigned long va, long error_code )
if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
{
- SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
- return 0; // propagate to guest
+ SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
+ return 0; // propagate to guest
}
if ( ! (gpte & _PAGE_PRESENT) )
{
- SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
- return 0; // we're not going to be able to help
+ SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
+ return 0; // we're not going to be able to help
}
if ( (error_code & 2) && ! (gpte & _PAGE_RW) )
{
- // write fault on RO page
- return 0;
+ // write fault on RO page
+ return 0;
}
spin_lock(&current->mm.shadow_lock);
@@ -491,35 +491,35 @@ int shadow_fault( unsigned long va, long error_code )
if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
{
- SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
- spin_unlock(&m->shadow_lock);
- return 0; // propagate to guest
+ SH_VVLOG("shadow_fault - EXIT: read gpte faulted" );
+ spin_unlock(&m->shadow_lock);
+ return 0; // propagate to guest
}
if ( unlikely(!(gpte & _PAGE_PRESENT)) )
{
- SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
- spin_unlock(&m->shadow_lock);
- return 0; // we're not going to be able to help
+ SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
+ spin_unlock(&m->shadow_lock);
+ return 0; // we're not going to be able to help
}
if ( error_code & 2 )
{ // write fault
- if ( likely(gpte & _PAGE_RW) )
- {
- l1pte_write_fault( m, &gpte, &spte );
- }
- else
- { // write fault on RO page
- SH_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
- spin_unlock(&m->shadow_lock);
- return 0; // propagate to guest
- // not clear whether we should set accessed bit here...
- }
+ if ( likely(gpte & _PAGE_RW) )
+ {
+ l1pte_write_fault( m, &gpte, &spte );
+ }
+ else
+ { // write fault on RO page
+ SH_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
+ spin_unlock(&m->shadow_lock);
+ return 0; // propagate to guest
+ // not clear whether we should set accessed bit here...
+ }
}
else
{
- l1pte_read_fault( m, &gpte, &spte );
+ l1pte_read_fault( m, &gpte, &spte );
}
SH_VVLOG("plan: gpte=%08lx spte=%08lx", gpte, spte );
@@ -527,77 +527,77 @@ int shadow_fault( unsigned long va, long error_code )
// write back updated gpte
// XXX watch out for read-only L2 entries! (not used in Linux)
if ( unlikely( __put_user( gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
- BUG(); // fixme!
+ BUG(); // fixme!
if ( unlikely( __put_user( spte, (unsigned long*)&shadow_linear_pg_table[va>>PAGE_SHIFT])) )
{
- // failed:
- // the L1 may not be shadowed, or the L2 entry may be insufficient
+ // failed:
+ // the L1 may not be shadowed, or the L2 entry may be insufficient
- unsigned long gpde, spde, gl1pfn, sl1pfn;
+ unsigned long gpde, spde, gl1pfn, sl1pfn;
- SH_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx spte=%08lx",gpte,spte );
+ SH_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx spte=%08lx",gpte,spte );
- gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]);
+ gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]);
- gl1pfn = gpde>>PAGE_SHIFT;
+ gl1pfn = gpde>>PAGE_SHIFT;
- if ( ! (sl1pfn=__shadow_status(&current->mm, gl1pfn) ) )
+ if ( ! (sl1pfn=__shadow_status(&current->mm, gl1pfn) ) )
{
- // this L1 is NOT already shadowed so we need to shadow it
- struct pfn_info *sl1pfn_info;
- unsigned long *gpl1e, *spl1e;
- int i;
- sl1pfn_info = alloc_shadow_page( &current->mm );
- sl1pfn_info->type_and_flags = PGT_l1_page_table;
+ // this L1 is NOT already shadowed so we need to shadow it
+ struct pfn_info *sl1pfn_info;
+ unsigned long *gpl1e, *spl1e;
+ int i;
+ sl1pfn_info = alloc_shadow_page( &current->mm );
+ sl1pfn_info->type_and_flags = PGT_l1_page_table;
- sl1pfn = sl1pfn_info - frame_table;
+ sl1pfn = sl1pfn_info - frame_table;
- SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
- perfc_incrc(shadow_l1_table_count);
- perfc_incr(shadow_l1_pages);
+ SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
+ perfc_incrc(shadow_l1_table_count);
+ perfc_incr(shadow_l1_pages);
- set_shadow_status(&current->mm, gl1pfn, PSH_shadowed | sl1pfn);
+ set_shadow_status(&current->mm, gl1pfn, PSH_shadowed | sl1pfn);
- l2pde_general( m, &gpde, &spde, sl1pfn );
+ l2pde_general( m, &gpde, &spde, sl1pfn );
- linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
- shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
+ linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
+ shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
- gpl1e = (unsigned long *) &(linear_pg_table[
- (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]);
+ gpl1e = (unsigned long *) &(linear_pg_table[
+ (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]);
- spl1e = (unsigned long *) &shadow_linear_pg_table[
- (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ];
+ spl1e = (unsigned long *) &shadow_linear_pg_table[
+ (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ];
- for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
- {
- l1pte_no_fault( m, &gpl1e[i], &spl1e[i] );
+ for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
+ {
+ l1pte_no_fault( m, &gpl1e[i], &spl1e[i] );
}
}
- else
+ else
{
- // this L1 was shadowed (by another PT) but we didn't have an L2
- // entry for it
+ // this L1 was shadowed (by another PT) but we didn't have an L2
+ // entry for it
- SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
+ SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
- l2pde_general( m, &gpde, &spde, sl1pfn );
+ l2pde_general( m, &gpde, &spde, sl1pfn );
- linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
- shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
-
+ linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
+ shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
+
}
- shadow_linear_pg_table[va>>PAGE_SHIFT] = mk_l1_pgentry(spte);
- // (we need to do the above even if we've just made the shadow L1)
+ shadow_linear_pg_table[va>>PAGE_SHIFT] = mk_l1_pgentry(spte);
+ // (we need to do the above even if we've just made the shadow L1)
} // end of fixup writing the shadow L1 directly failed
-
+
perfc_incrc(shadow_fixup_count);
check_pagetable( current, current->mm.pagetable, "post-sf" );
@@ -611,14 +611,14 @@ int shadow_fault( unsigned long va, long error_code )
void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,
unsigned long *prev_spfn_ptr,
- l1_pgentry_t **prev_spl1e_ptr )
+ l1_pgentry_t **prev_spl1e_ptr )
{
unsigned long gpfn, spfn, spte, prev_spfn = *prev_spfn_ptr;
l1_pgentry_t * spl1e, * prev_spl1e = *prev_spl1e_ptr;
SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%p\n",
- pa,gpte,prev_spfn, prev_spl1e);
+ pa,gpte,prev_spfn, prev_spl1e);
// to get here, we know the l1 page *must* be shadowed
@@ -627,14 +627,14 @@ void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,
if ( spfn == prev_spfn )
{
- spl1e = prev_spl1e;
+ spl1e = prev_spl1e;
}
else
{
- if( prev_spl1e ) unmap_domain_mem( prev_spl1e );
- spl1e = (l1_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
- *prev_spfn_ptr = spfn;
- *prev_spl1e_ptr = spl1e;
+ if( prev_spl1e ) unmap_domain_mem( prev_spl1e );
+ spl1e = (l1_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
+ *prev_spfn_ptr = spfn;
+ *prev_spl1e_ptr = spl1e;
}
// XXX we assume only pagetables can be shadowed;
@@ -664,17 +664,17 @@ void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte )
spte = 0;
if( gpte & _PAGE_PRESENT )
- s_sh = __shadow_status(&current->mm, gpte >> PAGE_SHIFT);
+ s_sh = __shadow_status(&current->mm, gpte >> PAGE_SHIFT);
sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
// no real need for a cache here
- l2pde_general( &current->mm, &gpte, &spte, s_sh );
+ l2pde_general( &current->mm, &gpte, &spte, s_sh );
// XXXX Should mark guest pte as DIRTY and ACCESSED too!!!!!
sp2le[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t) ] =
- mk_l2_pgentry( spte );
+ mk_l2_pgentry( spte );
unmap_domain_mem( (void *) sp2le );
}
@@ -690,57 +690,57 @@ char * sh_check_name;
{printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n", sh_check_name, level, i, ## _a , gpte, spte ); BUG();}
static int check_pte( struct mm_struct *m,
- unsigned long gpte, unsigned long spte, int level, int i )
+ unsigned long gpte, unsigned long spte, int level, int i )
{
unsigned long mask, gpfn, spfn;
if ( spte == 0 || spte == 0xdeadface || spte == 0x00000E00)
- return 1; // always safe
+ return 1; // always safe
if ( !(spte & _PAGE_PRESENT) )
- FAIL("Non zero not present spte");
+ FAIL("Non zero not present spte");
if( level == 2 ) sh_l2_present++;
if( level == 1 ) sh_l1_present++;
if ( !(gpte & _PAGE_PRESENT) )
- FAIL("Guest not present yet shadow is");
+ FAIL("Guest not present yet shadow is");
mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|0xFFFFF000);
if ( (spte & mask) != (gpte & mask ) )
- FAIL("Corrupt?");
+ FAIL("Corrupt?");
if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) )
- FAIL("Dirty coherence");
+ FAIL("Dirty coherence");
if ( (spte & _PAGE_ACCESSED ) && !(gpte & _PAGE_ACCESSED) )
- FAIL("Accessed coherence");
+ FAIL("Accessed coherence");
if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) )
- FAIL("RW coherence");
+ FAIL("RW coherence");
if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY) ))
- FAIL("RW2 coherence");
-
+ FAIL("RW2 coherence");
+
spfn = spte>>PAGE_SHIFT;
gpfn = gpte>>PAGE_SHIFT;
if ( gpfn == spfn )
{
- if ( level > 1 )
- FAIL("Linear map ???"); // XXX this will fail on BSD
+ if ( level > 1 )
+ FAIL("Linear map ???"); // XXX this will fail on BSD
- return 1;
+ return 1;
}
else
{
- if ( level < 2 )
- FAIL("Shadow in L1 entry?");
+ if ( level < 2 )
+ FAIL("Shadow in L1 entry?");
- if ( __shadow_status(p, gpfn) != (PSH_shadowed | spfn) )
- FAIL("spfn problem g.sf=%08lx",
- __shadow_status(p, gpfn) );
+ if ( __shadow_status(p, gpfn) != (PSH_shadowed | spfn) )
+ FAIL("spfn problem g.sf=%08lx",
+ __shadow_status(p, gpfn) );
}
return 1;
@@ -748,7 +748,7 @@ static int check_pte( struct mm_struct *m,
static int check_l1_table( struct mm_struct *m, unsigned long va,
- unsigned long g2, unsigned long s2 )
+ unsigned long g2, unsigned long s2 )
{
int j;
unsigned long *gpl1e, *spl1e;
@@ -761,12 +761,12 @@ static int check_l1_table( struct mm_struct *m, unsigned long va,
for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
{
- unsigned long gpte = gpl1e[j];
- unsigned long spte = spl1e[j];
-
- check_pte( p, gpte, spte, 1, j );
+ unsigned long gpte = gpl1e[j];
+ unsigned long spte = spl1e[j];
+
+ check_pte( p, gpte, spte, 1, j );
}
-
+
unmap_domain_mem( spl1e );
unmap_domain_mem( gpl1e );
@@ -793,17 +793,17 @@ int check_pagetable( struct mm_struct *m, pagetable_t pt, char *s )
if ( ! (__shadow_status(p, gpfn) & PSH_shadowed) )
{
- printk("%s-PT %08lx not shadowed\n", s, gptbase);
+ printk("%s-PT %08lx not shadowed\n", s, gptbase);
- if( __shadow_status(p, gpfn) != 0 ) BUG();
+ if( __shadow_status(p, gpfn) != 0 ) BUG();
- return 0;
+ return 0;
}
-
+
spfn = __shadow_status(p, gpfn) & PSH_pfn_mask;
if ( ! __shadow_status(p, gpfn) == (PSH_shadowed | spfn) )
- FAILPT("ptbase shadow inconsistent1");
+ FAILPT("ptbase shadow inconsistent1");
gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT );
spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
@@ -812,55 +812,55 @@ int check_pagetable( struct mm_struct *m, pagetable_t pt, char *s )
if ( memcmp( &spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
- ((SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT))-DOMAIN_ENTRIES_PER_L2_PAGETABLE)
- * sizeof(l2_pgentry_t)) )
+ &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
+ ((SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT))-DOMAIN_ENTRIES_PER_L2_PAGETABLE)
+ * sizeof(l2_pgentry_t)) )
{
- printk("gpfn=%08lx spfn=%08lx\n", gpfn, spfn);
- for (i=DOMAIN_ENTRIES_PER_L2_PAGETABLE;
- i<(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT));
- i++ )
- printk("+++ (%d) %08lx %08lx\n",i,
- l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]) );
- FAILPT("hypervisor entries inconsistent");
+ printk("gpfn=%08lx spfn=%08lx\n", gpfn, spfn);
+ for (i=DOMAIN_ENTRIES_PER_L2_PAGETABLE;
+ i<(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT));
+ i++ )
+ printk("+++ (%d) %08lx %08lx\n",i,
+ l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]) );
+ FAILPT("hypervisor entries inconsistent");
}
if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
- l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
- FAILPT("hypervisor linear map inconsistent");
+ l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) )
+ FAILPT("hypervisor linear map inconsistent");
if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
- ((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR)) )
- FAILPT("hypervisor shadow linear map inconsistent %08lx %08lx",
- l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]),
- (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR
- );
+ ((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR)) )
+ FAILPT("hypervisor shadow linear map inconsistent %08lx %08lx",
+ l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]),
+ (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR
+ );
if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
- ((__pa(frame_table[gpfn].u.domain->mm.perdomain_pt) | __PAGE_HYPERVISOR))) )
- FAILPT("hypervisor per-domain map inconsistent");
+ ((__pa(frame_table[gpfn].u.domain->mm.perdomain_pt) | __PAGE_HYPERVISOR))) )
+ FAILPT("hypervisor per-domain map inconsistent");
// check the whole L2
for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
{
- unsigned long gpte = l2_pgentry_val(gpl2e[i]);
- unsigned long spte = l2_pgentry_val(spl2e[i]);
+ unsigned long gpte = l2_pgentry_val(gpl2e[i]);
+ unsigned long spte = l2_pgentry_val(spl2e[i]);
- check_pte( p, gpte, spte, 2, i );
+ check_pte( p, gpte, spte, 2, i );
}
// go back and recurse
for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
{
- unsigned long gpte = l2_pgentry_val(gpl2e[i]);
- unsigned long spte = l2_pgentry_val(spl2e[i]);
+ unsigned long gpte = l2_pgentry_val(gpl2e[i]);
+ unsigned long spte = l2_pgentry_val(spl2e[i]);
- if ( spte )
- check_l1_table( p,
- i<<L2_PAGETABLE_SHIFT,
- gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT );
+ if ( spte )
+ check_l1_table( p,
+ i<<L2_PAGETABLE_SHIFT,
+ gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT );
}
@@ -868,15 +868,10 @@ int check_pagetable( struct mm_struct *m, pagetable_t pt, char *s )
unmap_domain_mem( gpl2e );
SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
- sh_l2_present, sh_l1_present );
-
+ sh_l2_present, sh_l1_present );
+
return 1;
}
#endif
-
-
-
-
-
diff --git a/xen/include/asm-i386/flushtlb.h b/xen/include/asm-i386/flushtlb.h
index 0dd6afc891..4b558eae83 100644
--- a/xen/include/asm-i386/flushtlb.h
+++ b/xen/include/asm-i386/flushtlb.h
@@ -13,23 +13,35 @@
#include <xen/smp.h>
/*
- * Every GLOBAL_FLUSH_PERIOD ticks of the tlbflush clock, every TLB in the
- * system is guaranteed to have been flushed.
+ * Every time the TLB clock passes an "epoch", every CPU's TLB is flushed.
+ * Therefore, if the current TLB time and a previously-read timestamp differ
+ * in their significant bits (i.e., ~TLBCLOCK_EPOCH_MASK), then the TLB clock
+ * has wrapped at least once and every CPU's TLB is guaranteed to have been
+ * flushed meanwhile.
+ * This allows us to deal gracefully with a bounded (a.k.a. wrapping) clock.
*/
-#define GLOBAL_FLUSH_PERIOD (1<<16)
+#define TLBCLOCK_EPOCH_MASK ((1U<<16)-1)
/*
- * '_cpu_stamp' is the current timestamp for the CPU we are testing.
- * '_lastuse_stamp' is a timestamp taken when the PFN we are testing was last
+ * 'cpu_stamp' is the current timestamp for the CPU we are testing.
+ * 'lastuse_stamp' is a timestamp taken when the PFN we are testing was last
* used for a purpose that may have caused the CPU's TLB to become tainted.
*/
-#define NEED_FLUSH(_cpu_stamp, _lastuse_stamp) \
- (((_cpu_stamp) <= (_lastuse_stamp)) && \
- (((_lastuse_stamp) - (_cpu_stamp)) <= (2*GLOBAL_FLUSH_PERIOD)))
-
-extern unsigned long tlbflush_mask;
-extern unsigned long tlbflush_clock;
-extern unsigned long tlbflush_time[NR_CPUS];
+static inline int NEED_FLUSH(u32 cpu_stamp, u32 lastuse_stamp)
+{
+ /*
+ * Why does this work?
+ * 1. XOR sets high-order bits determines if stamps from differing epochs.
+ * 2. Subtraction sets high-order bits if 'cpu_stamp > lastuse_stamp'.
+ * In either case a flush is unnecessary: we therefore OR the results from
+ * (1) and (2), mask the high-order bits, and return the inverse.
+ */
+ return !(((lastuse_stamp^cpu_stamp)|(lastuse_stamp-cpu_stamp)) &
+ ~TLBCLOCK_EPOCH_MASK);
+}
+
+extern u32 tlbflush_clock;
+extern u32 tlbflush_time[NR_CPUS];
extern void new_tlbflush_clock_period(void);
diff --git a/xen/include/asm-x86_64/flushtlb.h b/xen/include/asm-x86_64/flushtlb.h
index 0dd6afc891..4b558eae83 100644
--- a/xen/include/asm-x86_64/flushtlb.h
+++ b/xen/include/asm-x86_64/flushtlb.h
@@ -13,23 +13,35 @@
#include <xen/smp.h>
/*
- * Every GLOBAL_FLUSH_PERIOD ticks of the tlbflush clock, every TLB in the
- * system is guaranteed to have been flushed.
+ * Every time the TLB clock passes an "epoch", every CPU's TLB is flushed.
+ * Therefore, if the current TLB time and a previously-read timestamp differ
+ * in their significant bits (i.e., ~TLBCLOCK_EPOCH_MASK), then the TLB clock
+ * has wrapped at least once and every CPU's TLB is guaranteed to have been
+ * flushed meanwhile.
+ * This allows us to deal gracefully with a bounded (a.k.a. wrapping) clock.
*/
-#define GLOBAL_FLUSH_PERIOD (1<<16)
+#define TLBCLOCK_EPOCH_MASK ((1U<<16)-1)
/*
- * '_cpu_stamp' is the current timestamp for the CPU we are testing.
- * '_lastuse_stamp' is a timestamp taken when the PFN we are testing was last
+ * 'cpu_stamp' is the current timestamp for the CPU we are testing.
+ * 'lastuse_stamp' is a timestamp taken when the PFN we are testing was last
* used for a purpose that may have caused the CPU's TLB to become tainted.
*/
-#define NEED_FLUSH(_cpu_stamp, _lastuse_stamp) \
- (((_cpu_stamp) <= (_lastuse_stamp)) && \
- (((_lastuse_stamp) - (_cpu_stamp)) <= (2*GLOBAL_FLUSH_PERIOD)))
-
-extern unsigned long tlbflush_mask;
-extern unsigned long tlbflush_clock;
-extern unsigned long tlbflush_time[NR_CPUS];
+static inline int NEED_FLUSH(u32 cpu_stamp, u32 lastuse_stamp)
+{
+ /*
+ * Why does this work?
+ * 1. XOR sets high-order bits determines if stamps from differing epochs.
+ * 2. Subtraction sets high-order bits if 'cpu_stamp > lastuse_stamp'.
+ * In either case a flush is unnecessary: we therefore OR the results from
+ * (1) and (2), mask the high-order bits, and return the inverse.
+ */
+ return !(((lastuse_stamp^cpu_stamp)|(lastuse_stamp-cpu_stamp)) &
+ ~TLBCLOCK_EPOCH_MASK);
+}
+
+extern u32 tlbflush_clock;
+extern u32 tlbflush_time[NR_CPUS];
extern void new_tlbflush_clock_period(void);
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 96048d6cc7..15629e5f51 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -63,11 +63,11 @@ struct pfn_info
unsigned long _unused;
} u;
/* Reference count and various PGC_xxx flags and fields. */
- unsigned long count_and_flags;
+ u32 count_and_flags;
/* Type reference count and various PGT_xxx flags and fields. */
- unsigned long type_and_flags;
+ u32 type_and_flags;
/* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
- unsigned long tlbflush_timestamp;
+ u32 tlbflush_timestamp;
};
/* The following page types are MUTUALLY EXCLUSIVE. */
@@ -136,7 +136,7 @@ void free_page_type(struct pfn_info *page, unsigned int type);
static inline void put_page(struct pfn_info *page)
{
- unsigned long nx, x, y = page->count_and_flags;
+ u32 nx, x, y = page->count_and_flags;
do {
x = y;
@@ -152,7 +152,7 @@ static inline void put_page(struct pfn_info *page)
static inline int get_page(struct pfn_info *page,
struct task_struct *domain)
{
- unsigned long x, nx, y = page->count_and_flags;
+ u32 x, nx, y = page->count_and_flags;
struct task_struct *p, *np = page->u.domain;
do {
@@ -164,7 +164,7 @@ static inline int get_page(struct pfn_info *page,
unlikely(x & PGC_zombie) || /* Zombie? */
unlikely(p != domain) ) /* Wrong owner? */
{
- DPRINTK("Error pfn %08lx: ed=%p,sd=%p,caf=%08lx\n",
+ DPRINTK("Error pfn %08lx: ed=%p,sd=%p,caf=%08x\n",
page_to_pfn(page), domain, p, x);
return 0;
}
@@ -182,7 +182,7 @@ static inline int get_page(struct pfn_info *page,
static inline void put_page_type(struct pfn_info *page)
{
- unsigned long nx, x, y = page->type_and_flags;
+ u32 nx, x, y = page->type_and_flags;
again:
do {
@@ -214,9 +214,9 @@ static inline void put_page_type(struct pfn_info *page)
}
-static inline int get_page_type(struct pfn_info *page, unsigned long type)
+static inline int get_page_type(struct pfn_info *page, u32 type)
{
- unsigned long nx, x, y = page->type_and_flags;
+ u32 nx, x, y = page->type_and_flags;
again:
do {
x = y;
@@ -239,7 +239,7 @@ static inline int get_page_type(struct pfn_info *page, unsigned long type)
}
else if ( unlikely((x & PGT_type_mask) != type) )
{
- DPRINTK("Unexpected type (saw %08lx != exp %08lx) for pfn %08lx\n",
+ DPRINTK("Unexpected type (saw %08x != exp %08x) for pfn %08lx\n",
x & PGT_type_mask, type, page_to_pfn(page));
return 0;
}
@@ -261,7 +261,7 @@ static inline int get_page_type(struct pfn_info *page, unsigned long type)
/* Try to validate page type; drop the new reference on failure. */
if ( unlikely(!alloc_page_type(page, type)) )
{
- DPRINTK("Error while validating pfn %08lx for type %08lx\n",
+ DPRINTK("Error while validating pfn %08lx for type %08x\n",
page_to_pfn(page), type);
put_page_type(page);
return 0;
@@ -282,7 +282,7 @@ static inline void put_page_and_type(struct pfn_info *page)
static inline int get_page_and_type(struct pfn_info *page,
struct task_struct *domain,
- unsigned int type)
+ u32 type)
{
int rc = get_page(page, domain);
diff --git a/xen/net/dev.c b/xen/net/dev.c
index 31d80e2a14..69ed0e399a 100644
--- a/xen/net/dev.c
+++ b/xen/net/dev.c
@@ -553,7 +553,7 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
unsigned long *sptr = map_domain_mem( (spte_pfn<<PAGE_SHIFT) |
(((unsigned long)ptep)&~PAGE_MASK) );
- // avoid the fault later
+ /* Avoid the fault later. */
*sptr = new_pte;
unmap_domain_mem(sptr);
@@ -2086,13 +2086,11 @@ static void get_rx_bufs(net_vif_t *vif)
pte_pfn = rx.addr >> PAGE_SHIFT;
pte_page = &frame_table[pte_pfn];
- //printk("MMM %08lx ", rx.addr);
-
/* The address passed down must be to a valid PTE. */
if ( unlikely(pte_pfn >= max_page) ||
unlikely(!get_page_and_type(pte_page, p, PGT_l1_page_table)) )
{
- DPRINTK("Bad page frame for ppte %llu,%08lx,%08lx,%08lx\n",
+ DPRINTK("Bad page frame for ppte %llu,%08lx,%08lx,%08x\n",
p->domain, pte_pfn, max_page, pte_page->type_and_flags);
make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
continue;
@@ -2100,7 +2098,7 @@ static void get_rx_bufs(net_vif_t *vif)
ptep = map_domain_mem(rx.addr);
pte = *ptep;
- //printk("%08lx\n",pte);
+
/* We must be passed a valid writeable mapping to swizzle. */
if ( unlikely((pte & (_PAGE_PRESENT|_PAGE_RW)) !=
(_PAGE_PRESENT|_PAGE_RW)) ||
@@ -2143,7 +2141,7 @@ static void get_rx_bufs(net_vif_t *vif)
make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
goto rx_unmap_and_continue;
- // XXX IAP should SHADOW_CONFIG do something here?
+ /* XXX IAP should SHADOW_CONFIG do something here? */
}
/*
@@ -2155,7 +2153,7 @@ static void get_rx_bufs(net_vif_t *vif)
0) !=
(PGC_allocated | PGC_tlb_flush_on_type_change | 2)) )
{
- DPRINTK("Page held more than once %08lx\n",
+ DPRINTK("Page held more than once %08x\n",
buf_page->count_and_flags);
if ( !get_page_type(buf_page, PGT_writeable_page) )
put_page(buf_page);