bitkeeper revision 1.1159.45.1 (4124d092e9_SvIhSMDUvJb9u3drP3A)

Merge ssh://xenbk@gandalf.hpl.hp.com//var/bk/xeno-unstable.bk into labyrinth.cl.cam.ac.uk:/auto/anfs/scratch/labyrinth/iap10/xeno-clone/xeno.bk
author: iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> 2004-08-19 16:08:50 +0000
committer: iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> 2004-08-19 16:08:50 +0000
commit: 8db016d517d4f355234c48a3ba230f0c3e287015 (patch)
tree: 3c9186fc5c46d90de04e612b2f71fbbffa94e9b9
parent: b0b7188948a2da46932790ada9a8f09626dc948c (diff)
parent: 106c7d60ec845cb407381ae25e4d4aad5b26c886 (diff)
download: xen-8db016d517d4f355234c48a3ba230f0c3e287015.tar.gz
xen-8db016d517d4f355234c48a3ba230f0c3e287015.tar.bz2
xen-8db016d517d4f355234c48a3ba230f0c3e287015.zip
12 files changed, 127 insertions, 168 deletions
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c b/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c
index b72d0efe11..b13e3d75ef 100644
--- a/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c
+++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c
@@ -36,13 +36,16 @@ typedef struct user_balloon_op {
 } user_balloon_op_t;
 /* END OF USER DEFINE */
 
-/* Dead entry written into balloon-owned entries in the PMT. */
-#define DEAD 0xdeadbeef
-
 static struct proc_dir_entry *balloon_pde;
 unsigned long credit;
 static unsigned long current_pages, most_seen_pages;
 
+/*
+ * Dead entry written into balloon-owned entries in the PMT.
+ * It is deliberately different to INVALID_P2M_ENTRY.
+ */
+#define DEAD 0xdead1234
+
 static inline pte_t *get_ptep(unsigned long addr)
 {
     pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
@@ -79,17 +82,16 @@ static unsigned long inflate_balloon(unsigned long num_pages)
     for ( i = 0; i < num_pages; i++, currp++ )
     {
 	struct page *page = alloc_page(GFP_HIGHUSER);
-	unsigned long pfn =  page - mem_map;
+	unsigned long pfn = page - mem_map;
 
         /* If allocation fails then free all reserved pages. */
-        if ( page == 0 )
+        if ( page == NULL )
         {
-            printk(KERN_ERR "Unable to inflate balloon by %ld, only %ld pages free.",
-                   num_pages, i);
+            printk(KERN_ERR "Unable to inflate balloon by %ld, only"
+                   " %ld pages free.", num_pages, i);
             currp = parray;
-            for(j = 0; j < i; j++, ++currp){
+            for ( j = 0; j < i; j++, currp++ )
                 __free_page((struct page *) (mem_map + *currp));
-            }
 	    ret = -EFAULT;
             goto cleanup;
         }
@@ -102,9 +104,8 @@ static unsigned long inflate_balloon(unsigned long num_pages)
     {
 	unsigned long mfn = phys_to_machine_mapping[*currp];
         curraddr = (unsigned long)page_address(mem_map + *currp);
-	if (curraddr)
+	if ( curraddr != 0 )
             queue_l1_entry_update(get_ptep(curraddr), 0);
-
         phys_to_machine_mapping[*currp] = DEAD;
         *currp = mfn;
     }
@@ -313,17 +314,18 @@ claim_new_pages(unsigned long num_pages)
     XEN_flush_page_update_queue();
     new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 
                                 parray, num_pages, 0);
-    if (new_page_cnt != num_pages)
+    if ( new_page_cnt != num_pages )
     {
         printk(KERN_WARNING
             "claim_new_pages: xen granted only %lu of %lu requested pages\n",
             new_page_cnt, num_pages);
 
-	/* XXX
-	 * avoid xen lockup when user forgot to setdomainmaxmem.  xen
-	 * usually can dribble out a few pages and then hangs
+	/* 
+	 * Avoid xen lockup when user forgot to setdomainmaxmem. Xen
+	 * usually can dribble out a few pages and then hangs.
 	 */
-	if (new_page_cnt < 1000) {
+	if ( new_page_cnt < 1000 )
+        {
             printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
 	    HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
                                 parray, new_page_cnt, 0);
@@ -331,7 +333,7 @@ claim_new_pages(unsigned long num_pages)
 	}
     }
     memcpy(phys_to_machine_mapping+most_seen_pages, parray,
-            new_page_cnt * sizeof(unsigned long));
+           new_page_cnt * sizeof(unsigned long));
 
     pagetable_extend(most_seen_pages,new_page_cnt);
 
@@ -465,12 +467,15 @@ static int __init init_module(void)
     /* 
      * make a new phys map if mem= says xen can give us memory  to grow
      */
-    if (max_pfn > start_info.nr_pages) {
+    if ( max_pfn > start_info.nr_pages )
+    {
         extern unsigned long *phys_to_machine_mapping;
         unsigned long *newmap;
         newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long));
-        phys_to_machine_mapping = memcpy(newmap, phys_to_machine_mapping,
-            start_info.nr_pages * sizeof(unsigned long));
+        memset(newmap, ~0, max_pfn * sizeof(unsigned long));
+        memcpy(newmap, phys_to_machine_mapping,
+               start_info.nr_pages * sizeof(unsigned long));
+        phys_to_machine_mapping = newmap;
     }
 
     return 0;
diff --git a/linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h b/linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h
index e6845abc86..9ddd30bf73 100644
--- a/linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h
+++ b/linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h
@@ -58,7 +58,19 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
  *     then we'll have p2m(m2p(MFN))==MFN.
  * If we detect a special mapping then it doesn't have a 'struct page'.
  * We force !VALID_PAGE() by returning an out-of-range pointer.
+ *
+ * NB. These checks require that, for any MFN that is not in our reservation,
+ * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
+ * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
+ * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
+ * 
+ * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
+ *      use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
+ *      require. In all the cases we care about, the high bit gets shifted out
+ *      (e.g., phys_to_machine()) so behaviour there is correct.
  */
+#define INVALID_P2M_ENTRY (~0UL)
+#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
 #define pte_page(_pte)                                        \
 ({                                                            \
     unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT;         \
diff --git a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c
index 6f5e1b2c73..46702c5795 100644
--- a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c
+++ b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c
@@ -61,6 +61,8 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 			pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
 			pfn = pte->pte_low >> PAGE_SHIFT;
 			queue_l1_entry_update(pte, 0);
+			phys_to_machine_mapping[(__pa(ret)>>PAGE_SHIFT)+i] =
+				INVALID_P2M_ENTRY;
 			flush_page_update_queue();
 			if (HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
 						  &pfn, 1, 0) != 1) BUG();
@@ -79,7 +81,6 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 				pfn+i, (__pa(ret)>>PAGE_SHIFT)+i);
 			phys_to_machine_mapping[(__pa(ret)>>PAGE_SHIFT)+i] =
 				pfn+i;
-                        flush_page_update_queue();
 		}
 		flush_page_update_queue();
 	}
diff --git a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c
index fc7bc3e523..957555f92a 100644
--- a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c
+++ b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c
@@ -299,7 +299,7 @@ unsigned long allocate_empty_lowmem_region(unsigned long pages)
         pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE))); 
         pfn_array[i] = pte->pte_low >> PAGE_SHIFT;
         queue_l1_entry_update(pte, 0);
-        phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = 0xdeadbeef;
+        phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = INVALID_P2M_ENTRY;
     }
 
     flush_page_update_queue();
diff --git a/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c
index f26387f305..9dc64cc0c3 100644
--- a/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c
+++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c
@@ -415,7 +415,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
         mcl[i].args[3] = blkif->domid;
 
         phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
-            phys_seg[i].buffer >> PAGE_SHIFT;
+            FOREIGN_FRAME(phys_seg[i].buffer >> PAGE_SHIFT);
     }
 
     if ( unlikely(HYPERVISOR_multicall(mcl, nr_psegs) != 0) )
diff --git a/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c
index 5a3a45873f..e28274a457 100644
--- a/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c
+++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c
@@ -1,5 +1,5 @@
 /******************************************************************************
- * block.c
+ * blkfront.c
  * 
  * XenLinux virtual block-device driver.
  * 
@@ -67,11 +67,12 @@ static inline int GET_ID_FROM_FREELIST( void )
 {
     unsigned long free = rec_ring_free;
 
-    if(free>BLKIF_RING_SIZE) BUG();
+    if ( free > BLKIF_RING_SIZE )
+        BUG();
 
     rec_ring_free = rec_ring[free].id;
 
-    rec_ring[free].id = 0x0fffffee; // debug
+    rec_ring[free].id = 0x0fffffee; /* debug */
 
     return free;
 }
@@ -253,8 +254,6 @@ static int blkif_queue_request(struct request *req)
     id = GET_ID_FROM_FREELIST();
     rec_ring[id].id = (unsigned long) req;
 
-//printk(KERN_ALERT"r: %d req %p (%ld)\n",req_prod,req,id);
-
     ring_req->id = id;
     ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
         BLKIF_OP_READ;
@@ -300,8 +299,6 @@ void do_blkif_request(request_queue_t *rq)
 
     DPRINTK("Entered do_blkif_request\n"); 
 
-//printk(KERN_ALERT"r: %d req\n",req_prod);
-
     queued = 0;
 
     while ((req = elv_next_request(rq)) != NULL) {
@@ -310,7 +307,8 @@ void do_blkif_request(request_queue_t *rq)
             continue;
         }
 
-        if (BLKIF_RING_FULL) {
+        if ( BLKIF_RING_FULL )
+        {
             blk_stop_queue(rq);
             break;
         }
@@ -358,11 +356,9 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
 	id = bret->id;
 	req = (struct request *)rec_ring[id].id;
 
-//printk(KERN_ALERT"i: %d req %p (%ld)\n",i,req,id);
-
 	blkif_completion( &rec_ring[id] );
 
-	ADD_ID_TO_FREELIST(id);  // overwrites req
+	ADD_ID_TO_FREELIST(id); /* overwrites req */
 
         switch ( bret->operation )
         {
@@ -772,8 +768,6 @@ static int blkif_queue_request(unsigned long   id,
     req->nr_segments   = 1;
     req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
 
-//printk("N: %d req %p (%ld)\n",req_prod,rec_ring[xid].id,xid);
-
     req_prod++;
 
     /* Keep a private copy so we can reissue requests when recovering. */    
@@ -892,8 +886,6 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
 	id = bret->id;
 	bh = (struct buffer_head *)rec_ring[id].id; 
 
-//printk("i: %d req %p (%ld)\n",i,bh,id);
-
 	blkif_completion( &rec_ring[id] );
 
 	ADD_ID_TO_FREELIST(id);
@@ -942,16 +934,11 @@ static inline void translate_req_to_pfn(blkif_request_t *xreq,
     xreq->operation     = req->operation;
     xreq->nr_segments   = req->nr_segments;
     xreq->device        = req->device;
-    // preserve id
+    /* preserve id */
     xreq->sector_number = req->sector_number;
 
     for ( i = 0; i < req->nr_segments; i++ )
-    {
-        xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) |
-            (machine_to_phys_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] <<
-             PAGE_SHIFT);
-    }
-    
+        xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]);
 }
 
 static inline void translate_req_to_mfn(blkif_request_t *xreq,
@@ -962,15 +949,11 @@ static inline void translate_req_to_mfn(blkif_request_t *xreq,
     xreq->operation     = req->operation;
     xreq->nr_segments   = req->nr_segments;
     xreq->device        = req->device;
-    xreq->id            = req->id;   // copy id (unlike above)
+    xreq->id            = req->id;   /* copy id (unlike above) */
     xreq->sector_number = req->sector_number;
 
     for ( i = 0; i < req->nr_segments; i++ )
-    {
-        xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) |
-            (phys_to_machine_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] << 
-             PAGE_SHIFT);
-    }
+        xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]);
 }
 
 
@@ -978,7 +961,6 @@ static inline void translate_req_to_mfn(blkif_request_t *xreq,
 static inline void flush_requests(void)
 {
     DISABLE_SCATTERGATHER();
-//printk(KERN_ALERT"flush %d\n",req_prod);
     wmb(); /* Ensure that the frontend can see the requests. */
     blk_ring->req_prod = req_prod;
     notify_via_evtchn(blkif_evtchn);
@@ -1010,8 +992,6 @@ void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
     blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req.id = id;
     rec_ring[id].id = (unsigned long) req;
 
-//printk("c: %d req %p (%ld)\n",req_prod,req,id);
-
     translate_req_to_pfn( &rec_ring[id], req );
 
     req_prod++;
@@ -1094,13 +1074,13 @@ static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
                    " in state %d\n", blkif_state);
             break;
         }
+
         blkif_evtchn = status->evtchn;
-        blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
-        if ( (rc=request_irq(blkif_irq, blkif_int, 
-                          SA_SAMPLE_RANDOM, "blkif", NULL)) )
-	{
+        blkif_irq    = bind_evtchn_to_irq(blkif_evtchn);
+
+        if ( (rc = request_irq(blkif_irq, blkif_int, 
+                               SA_SAMPLE_RANDOM, "blkif", NULL)) )
 	    printk(KERN_ALERT"blkfront request_irq failed (%ld)\n",rc);
-	}
 
         if ( recovery )
         {
@@ -1109,31 +1089,28 @@ static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
 	    /* Hmm, requests might be re-ordered when we re-issue them.
 	       This will need to be fixed once we have barriers */
 
-	    // req_prod = 0;   : already is zero
-
-	    // stage 1 : find active and move to safety
-	    for ( i=0; i <BLKIF_RING_SIZE; i++ )
+	    /* Stage 1 : Find active and move to safety. */
+	    for ( i = 0; i < BLKIF_RING_SIZE; i++ )
 	    {
 		if ( rec_ring[i].id >= PAGE_OFFSET )
 		{
 		    translate_req_to_mfn(
-			&blk_ring->ring[req_prod].req, &rec_ring[i] );
-
+			&blk_ring->ring[req_prod].req, &rec_ring[i]);
 		    req_prod++;
 		}
 	    }
 
-printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod);
+            printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod);
 	    
-	    // stage 2 : set up shadow list
-	    for ( i=0; i<req_prod; i++ )
+            /* Stage 2 : Set up shadow list. */
+	    for ( i = 0; i < req_prod; i++ )
 	    {
 		rec_ring[i].id = blk_ring->ring[i].req.id;		
 		blk_ring->ring[i].req.id = i;
-		translate_req_to_pfn( &rec_ring[i], &blk_ring->ring[i].req );
+		translate_req_to_pfn(&rec_ring[i], &blk_ring->ring[i].req);
 	    }
 
-	    // stage 3 : set up free list
+	    /* Stage 3 : Set up free list. */
 	    for ( ; i < BLKIF_RING_SIZE; i++ )
 		rec_ring[i].id = i+1;
 	    rec_ring_free = req_prod;
@@ -1150,9 +1127,6 @@ printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod);
 
             /* Kicks things back into life. */
             flush_requests();
-
-
-
         }
         else
         {
@@ -1270,7 +1244,7 @@ void blkdev_resume(void)
 
 /* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */
 
-void blkif_completion( blkif_request_t *req )
+void blkif_completion(blkif_request_t *req)
 {
     int i;
 
@@ -1281,10 +1255,8 @@ void blkif_completion( blkif_request_t *req )
 	{
 	    unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
 	    unsigned long mfn = phys_to_machine_mapping[pfn];
-
 	    queue_machphys_update(mfn, pfn);
 	}
-
 	break;
     }
     
diff --git a/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c
index 23b0f87130..009012c9f6 100644
--- a/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c
+++ b/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c
@@ -204,6 +204,12 @@ static void net_rx_action(unsigned long unused)
         mdata   = virt_to_machine(vdata);
         new_mfn = get_new_mfn();
         
+        /*
+         * Set the new P2M table entry before reassigning the old data page.
+         * Heed the comment in pgtable-2level.h:pte_page(). :-)
+         */
+        phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
+        
         mmu[0].ptr  = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
         mmu[0].val  = __pa(vdata) >> PAGE_SHIFT;  
         mmu[1].ptr  = MMU_EXTENDED_COMMAND;
@@ -250,8 +256,6 @@ static void net_rx_action(unsigned long unused)
         mdata   = ((mmu[2].ptr & PAGE_MASK) |
                    ((unsigned long)skb->data & ~PAGE_MASK));
         
-        phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
-        
         atomic_set(&(skb_shinfo(skb)->dataref), 1);
         skb_shinfo(skb)->nr_frags = 0;
         skb_shinfo(skb)->frag_list = NULL;
@@ -556,7 +560,7 @@ static void net_tx_action(unsigned long unused)
         }
 
         phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
-            txreq.addr >> PAGE_SHIFT;
+            FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
 
         __skb_put(skb, PKT_PROT_LEN);
         memcpy(skb->data, 
diff --git a/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c
index b2b63441d5..0011273abd 100644
--- a/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c
+++ b/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c
@@ -263,9 +263,9 @@ static void network_alloc_rx_buffers(struct net_device *dev)
         
         rx_pfn_array[nr_pfns] = virt_to_machine(skb->head) >> PAGE_SHIFT;
 
-	/* remove this page from pseudo phys map (migration optimization) */
+	/* Remove this page from pseudo phys map before passing back to Xen. */
 	phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] 
-	    = 0x80000001;
+	    = INVALID_P2M_ENTRY;
 
         rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping;
         rx_mcl[nr_pfns].args[0] = (unsigned long)skb->head >> PAGE_SHIFT;
@@ -478,15 +478,6 @@ static int netif_poll(struct net_device *dev, int *pbudget)
         mcl->args[2] = 0;
         mcl++;
         (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
-
-#if 0 
-	if (unlikely(rx_mcl[0].args[5] != 0))
-	    printk(KERN_ALERT"Hypercall0 failed %u\n",np->rx->resp_prod);
-
-	if (unlikely(rx_mcl[1].args[5] != 0))
-	    printk(KERN_ALERT"Hypercall1 failed %u\n",np->rx->resp_prod);
-#endif
-
     }
 
     while ( (skb = __skb_dequeue(&rxq)) != NULL )
diff --git a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
index 760569f95d..f30bd2b83d 100644
--- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
+++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
@@ -88,30 +88,33 @@ static inline pte_t ptep_get_and_clear(pte_t *xp)
  *     not have MFN in our p2m table. Conversely, if the page is ours,
  *     then we'll have p2m(m2p(MFN))==MFN.
  * If we detect a special mapping then it doesn't have a 'struct page'.
- * We force !VALID_PAGE() by returning an out-of-range pointer.
+ * We force !pfn_valid() by returning an out-of-range pointer.
+ *
+ * NB. These checks require that, for any MFN that is not in our reservation,
+ * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
+ * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
+ * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
+ * 
+ * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
+ *      use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
+ *      require. In all the cases we care about, the high bit gets shifted out
+ *      (e.g., phys_to_machine()) so behaviour there is correct.
  */
-#define pte_page(_pte)                                        \
-({                                                            \
-    unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT;         \
-    unsigned long pfn = mfn_to_pfn(mfn);                      \
-    if ( (pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn) )     \
-        pfn = max_mapnr; /* special: force !VALID_PAGE() */   \
-    pfn_to_page(pfn);                                         \
-})
-
-#define pte_none(x)		(!(x).pte_low)
-/* See comments above pte_page */
-/* XXXcl check pte_present because msync.c:filemap_sync_pte calls
- * without pte_present check */
+#define INVALID_P2M_ENTRY (~0UL)
+#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
 #define pte_pfn(_pte)                                                   \
 ({                                                                      \
     unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT;                   \
-    unsigned long pfn = pte_present(_pte) ? mfn_to_pfn(mfn) : mfn;      \
+    unsigned long pfn = mfn_to_pfn(mfn);                                \
     if ( (pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn) )               \
         pfn = max_mapnr; /* special: force !pfn_valid() */              \
     pfn;                                                                \
 })
 
+#define pte_page(_pte) pfn_to_page(pte_pfn(_pte))
+
+#define pte_none(x)		(!(x).pte_low)
+
 #define pfn_pte(pfn, prot)	__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
 #define pfn_pte_ma(pfn, prot)	__pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
 #define pfn_pmd(pfn, prot)	__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
diff --git a/tools/libxc/xc_linux_save.c b/tools/libxc/xc_linux_save.c
index 5a47b30f56..6e4bb55b12 100644
--- a/tools/libxc/xc_linux_save.c
+++ b/tools/libxc/xc_linux_save.c
@@ -295,7 +295,7 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
     int rc = 1, i, j, k, last_iter, iter = 0;
     unsigned long mfn;
     u32 domid = ioctxt->domain;
-    int live = 0; // (ioctxt->flags & XCFLAGS_LIVE);
+    int live =  (ioctxt->flags & XCFLAGS_LIVE);
     int debug = (ioctxt->flags & XCFLAGS_DEBUG);
     int sent_last_iter, skip_this_iter;
 
@@ -440,7 +440,8 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
 
     /* Domain is still running at this point */
 
-    if( live ){ 
+    if( live ){
+printf("GO LIVE!!\n");
         if ( xc_shadow_control( xc_handle, domid, 
                                 DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
                                 NULL, 0, NULL ) < 0 ) {
diff --git a/xen/arch/x86/shadow.c b/xen/arch/x86/shadow.c
index 362b0f4560..06445943ab 100644
--- a/xen/arch/x86/shadow.c
+++ b/xen/arch/x86/shadow.c
@@ -29,41 +29,6 @@ hypercall lock anyhow (at least initially).
 ********/
 
 
-/**
-
-FIXME:
-
-The shadow table flush command is dangerous on SMP systems as the
-guest may be using the L2 on one CPU while the other is trying to 
-blow the table away. 
-
-The current save restore code works around this by not calling FLUSH,
-but by calling CLEAN2 which leaves all L2s in tact (this is probably
-quicker anyhow).
-
-Even so, we have to be very careful. The flush code may need to cause
-a TLB flush on another CPU. It needs to do this while holding the
-shadow table lock. The trouble is, the guest may be in the shadow page
-fault handler spinning waiting to grab the shadow lock. It may have
-intterupts disabled, hence we can't use the normal flush_tlb_cpu
-mechanism.
-
-For the moment, we have a grim race whereby the spinlock in the shadow
-fault handler is actually a try lock, in a loop with a helper for the
-tlb flush code.
-
-A better soloution would be to take a new flush lock, then raise a
-per-domain soft irq on the other CPU.  The softirq will switch to
-init's PTs, then do an atomic inc of a variable to count himself in,
-then spin on a lock.  Having noticed that the other guy has counted
-in, flush the shadow table, then release him by dropping the lock. He
-will then reload cr3 from mm.page_table on the way out of the softirq.
-
-In domian-softirq context we know that the guy holds no locks and has
-interrupts enabled. Nothing can go wrong ;-)
-
-**/
-
 static inline void free_shadow_page(struct mm_struct *m, 
                                     struct pfn_info *page)
 {
@@ -381,9 +346,9 @@ static int shadow_mode_table_op(struct domain *d,
 		d->mm.shadow_dirty_net_count   = 0;
 		d->mm.shadow_dirty_block_count = 0;
 	
-		sc->pages = d->tot_pages;
+		sc->pages = d->max_pages;
 
-		if( d->tot_pages > sc->pages || 
+		if( d->max_pages > sc->pages || 
 			!sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap )
 		{
 			rc = -EINVAL;
@@ -393,10 +358,10 @@ static int shadow_mode_table_op(struct domain *d,
 	
 #define chunk (8*1024) // do this in 1KB chunks for L1 cache
 	
-		for(i=0;i<d->tot_pages;i+=chunk)
+		for(i=0;i<d->max_pages;i+=chunk)
 		{
-			int bytes = ((  ((d->tot_pages-i) > (chunk))?
-							(chunk):(d->tot_pages-i) ) + 7) / 8;
+			int bytes = ((  ((d->max_pages-i) > (chunk))?
+							(chunk):(d->max_pages-i) ) + 7) / 8;
 	    
 			copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
 						  d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
@@ -428,21 +393,21 @@ static int shadow_mode_table_op(struct domain *d,
 		sc->stats.dirty_net_count   = d->mm.shadow_dirty_net_count;
 		sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
 	
-		if( d->tot_pages > sc->pages || 
+		if( d->max_pages > sc->pages || 
 			!sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap )
 		{
 			rc = -EINVAL;
 			goto out;
 		}
 	
-		sc->pages = d->tot_pages;
+		sc->pages = d->max_pages;
 	
 #define chunk (8*1024) // do this in 1KB chunks for L1 cache
 	
-		for(i=0;i<d->tot_pages;i+=chunk)
+		for(i=0;i<d->max_pages;i+=chunk)
 		{
-			int bytes = ((  ((d->tot_pages-i) > (chunk))?
-							(chunk):(d->tot_pages-i) ) + 7) / 8;
+			int bytes = ((  ((d->max_pages-i) > (chunk))?
+							(chunk):(d->max_pages-i) ) + 7) / 8;
 	    
 			copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
 						  d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
@@ -475,7 +440,13 @@ int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
     unsigned int cmd = sc->op;
     int rc = 0;
 
-    spin_lock(&d->mm.shadow_lock);
+	if (d == current)
+		printk("Attempt to control your _own_ shadow tables. I hope you know what you're doing!\n");
+
+	domain_pause(d);
+	synchronise_pagetables(d->processor);
+
+	spin_lock(&d->mm.shadow_lock);
 
     if ( cmd == DOM0_SHADOW_CONTROL_OP_OFF )
     {
@@ -502,10 +473,10 @@ int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
         rc = -EINVAL;
     }
 
-	flush_tlb_cpu(d->processor);
-   
     spin_unlock(&d->mm.shadow_lock);
 
+	domain_unpause(d);
+
     return rc;
 }
 
@@ -518,6 +489,7 @@ static inline struct pfn_info *alloc_shadow_page(struct mm_struct *m)
 void unshadow_table( unsigned long gpfn, unsigned int type )
 {
     unsigned long spfn;
+	struct domain *d = frame_table[gpfn].u.inuse.domain;
 
     SH_VLOG("unshadow_table type=%08x gpfn=%08lx",
             type,
@@ -530,11 +502,11 @@ void unshadow_table( unsigned long gpfn, unsigned int type )
     // even in the SMP guest case, there won't be a race here as
     // this CPU was the one that cmpxchg'ed the page to invalid
 
-    spfn = __shadow_status(&current->mm, gpfn) & PSH_pfn_mask;
+    spfn = __shadow_status(&d->mm, gpfn) & PSH_pfn_mask;
 
-    delete_shadow_status(&current->mm, gpfn);
+    delete_shadow_status(&d->mm, gpfn);
 
-    free_shadow_page( &current->mm, &frame_table[spfn] );
+    free_shadow_page(&d->mm, &frame_table[spfn] );
 
 }
 
@@ -651,15 +623,7 @@ int shadow_fault( unsigned long va, long error_code )
 
     // take the lock and reread gpte
 
-    while( unlikely(!spin_trylock(&current->mm.shadow_lock)) )
-	{
-		extern volatile unsigned long flush_cpumask;
-		if ( test_and_clear_bit(smp_processor_id(), &flush_cpumask) )
-			local_flush_tlb();
-		rep_nop();
-	}
-	
-	ASSERT(spin_is_locked(&current->mm.shadow_lock));
+	spin_lock(&current->mm.shadow_lock);
 	
     if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
     {
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index a57d43b9c3..b3b056fd68 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -422,7 +422,7 @@ void free_domheap_pages(struct pfn_info *pg, int order)
         drop_dom_ref = (d->xenheap_pages == 0);
         spin_unlock_recursive(&d->page_alloc_lock);
     }
-    else
+    else if ( likely(d != NULL) )
     {
         /* NB. May recursively lock from domain_relinquish_memory(). */
         spin_lock_recursive(&d->page_alloc_lock);
@@ -442,6 +442,12 @@ void free_domheap_pages(struct pfn_info *pg, int order)
 
         free_heap_pages(MEMZONE_DOM, pg, order);
     }
+    else
+    {
+        /* Freeing an anonymous domain-heap page. */
+        free_heap_pages(MEMZONE_DOM, pg, order);
+        drop_dom_ref = 0;
+    }
 
     if ( drop_dom_ref )
         put_domain(d);
author	iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>	2004-08-19 16:08:50 +0000
committer	iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>	2004-08-19 16:08:50 +0000
commit	8db016d517d4f355234c48a3ba230f0c3e287015 (patch)
tree	3c9186fc5c46d90de04e612b2f71fbbffa94e9b9
parent	b0b7188948a2da46932790ada9a8f09626dc948c (diff)
parent	106c7d60ec845cb407381ae25e4d4aad5b26c886 (diff)
download	xen-8db016d517d4f355234c48a3ba230f0c3e287015.tar.gz xen-8db016d517d4f355234c48a3ba230f0c3e287015.tar.bz2 xen-8db016d517d4f355234c48a3ba230f0c3e287015.zip