aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-08-19 16:08:50 +0000
committeriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-08-19 16:08:50 +0000
commit8db016d517d4f355234c48a3ba230f0c3e287015 (patch)
tree3c9186fc5c46d90de04e612b2f71fbbffa94e9b9
parentb0b7188948a2da46932790ada9a8f09626dc948c (diff)
parent106c7d60ec845cb407381ae25e4d4aad5b26c886 (diff)
downloadxen-8db016d517d4f355234c48a3ba230f0c3e287015.tar.gz
xen-8db016d517d4f355234c48a3ba230f0c3e287015.tar.bz2
xen-8db016d517d4f355234c48a3ba230f0c3e287015.zip
bitkeeper revision 1.1159.45.1 (4124d092e9_SvIhSMDUvJb9u3drP3A)
Merge ssh://xenbk@gandalf.hpl.hp.com//var/bk/xeno-unstable.bk into labyrinth.cl.cam.ac.uk:/auto/anfs/scratch/labyrinth/iap10/xeno-clone/xeno.bk
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c45
-rw-r--r--linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h12
-rw-r--r--linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c3
-rw-r--r--linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c2
-rw-r--r--linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c2
-rw-r--r--linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c78
-rw-r--r--linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c10
-rw-r--r--linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c13
-rw-r--r--linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h33
-rw-r--r--tools/libxc/xc_linux_save.c5
-rw-r--r--xen/arch/x86/shadow.c84
-rw-r--r--xen/common/page_alloc.c8
12 files changed, 127 insertions, 168 deletions
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c b/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c
index b72d0efe11..b13e3d75ef 100644
--- a/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c
+++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c
@@ -36,13 +36,16 @@ typedef struct user_balloon_op {
} user_balloon_op_t;
/* END OF USER DEFINE */
-/* Dead entry written into balloon-owned entries in the PMT. */
-#define DEAD 0xdeadbeef
-
static struct proc_dir_entry *balloon_pde;
unsigned long credit;
static unsigned long current_pages, most_seen_pages;
+/*
+ * Dead entry written into balloon-owned entries in the PMT.
+ * It is deliberately different to INVALID_P2M_ENTRY.
+ */
+#define DEAD 0xdead1234
+
static inline pte_t *get_ptep(unsigned long addr)
{
pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
@@ -79,17 +82,16 @@ static unsigned long inflate_balloon(unsigned long num_pages)
for ( i = 0; i < num_pages; i++, currp++ )
{
struct page *page = alloc_page(GFP_HIGHUSER);
- unsigned long pfn = page - mem_map;
+ unsigned long pfn = page - mem_map;
/* If allocation fails then free all reserved pages. */
- if ( page == 0 )
+ if ( page == NULL )
{
- printk(KERN_ERR "Unable to inflate balloon by %ld, only %ld pages free.",
- num_pages, i);
+ printk(KERN_ERR "Unable to inflate balloon by %ld, only"
+ " %ld pages free.", num_pages, i);
currp = parray;
- for(j = 0; j < i; j++, ++currp){
+ for ( j = 0; j < i; j++, currp++ )
__free_page((struct page *) (mem_map + *currp));
- }
ret = -EFAULT;
goto cleanup;
}
@@ -102,9 +104,8 @@ static unsigned long inflate_balloon(unsigned long num_pages)
{
unsigned long mfn = phys_to_machine_mapping[*currp];
curraddr = (unsigned long)page_address(mem_map + *currp);
- if (curraddr)
+ if ( curraddr != 0 )
queue_l1_entry_update(get_ptep(curraddr), 0);
-
phys_to_machine_mapping[*currp] = DEAD;
*currp = mfn;
}
@@ -313,17 +314,18 @@ claim_new_pages(unsigned long num_pages)
XEN_flush_page_update_queue();
new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
parray, num_pages, 0);
- if (new_page_cnt != num_pages)
+ if ( new_page_cnt != num_pages )
{
printk(KERN_WARNING
"claim_new_pages: xen granted only %lu of %lu requested pages\n",
new_page_cnt, num_pages);
- /* XXX
- * avoid xen lockup when user forgot to setdomainmaxmem. xen
- * usually can dribble out a few pages and then hangs
+ /*
+ * Avoid xen lockup when user forgot to setdomainmaxmem. Xen
+ * usually can dribble out a few pages and then hangs.
*/
- if (new_page_cnt < 1000) {
+ if ( new_page_cnt < 1000 )
+ {
printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
parray, new_page_cnt, 0);
@@ -331,7 +333,7 @@ claim_new_pages(unsigned long num_pages)
}
}
memcpy(phys_to_machine_mapping+most_seen_pages, parray,
- new_page_cnt * sizeof(unsigned long));
+ new_page_cnt * sizeof(unsigned long));
pagetable_extend(most_seen_pages,new_page_cnt);
@@ -465,12 +467,15 @@ static int __init init_module(void)
/*
* make a new phys map if mem= says xen can give us memory to grow
*/
- if (max_pfn > start_info.nr_pages) {
+ if ( max_pfn > start_info.nr_pages )
+ {
extern unsigned long *phys_to_machine_mapping;
unsigned long *newmap;
newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long));
- phys_to_machine_mapping = memcpy(newmap, phys_to_machine_mapping,
- start_info.nr_pages * sizeof(unsigned long));
+ memset(newmap, ~0, max_pfn * sizeof(unsigned long));
+ memcpy(newmap, phys_to_machine_mapping,
+ start_info.nr_pages * sizeof(unsigned long));
+ phys_to_machine_mapping = newmap;
}
return 0;
diff --git a/linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h b/linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h
index e6845abc86..9ddd30bf73 100644
--- a/linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h
+++ b/linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h
@@ -58,7 +58,19 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
* then we'll have p2m(m2p(MFN))==MFN.
* If we detect a special mapping then it doesn't have a 'struct page'.
* We force !VALID_PAGE() by returning an out-of-range pointer.
+ *
+ * NB. These checks require that, for any MFN that is not in our reservation,
+ * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
+ * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
+ * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
+ *
+ * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
+ * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
+ * require. In all the cases we care about, the high bit gets shifted out
+ * (e.g., phys_to_machine()) so behaviour there is correct.
*/
+#define INVALID_P2M_ENTRY (~0UL)
+#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
#define pte_page(_pte) \
({ \
unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT; \
diff --git a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c
index 6f5e1b2c73..46702c5795 100644
--- a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c
+++ b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c
@@ -61,6 +61,8 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
pfn = pte->pte_low >> PAGE_SHIFT;
queue_l1_entry_update(pte, 0);
+ phys_to_machine_mapping[(__pa(ret)>>PAGE_SHIFT)+i] =
+ INVALID_P2M_ENTRY;
flush_page_update_queue();
if (HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
&pfn, 1, 0) != 1) BUG();
@@ -79,7 +81,6 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
pfn+i, (__pa(ret)>>PAGE_SHIFT)+i);
phys_to_machine_mapping[(__pa(ret)>>PAGE_SHIFT)+i] =
pfn+i;
- flush_page_update_queue();
}
flush_page_update_queue();
}
diff --git a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c
index fc7bc3e523..957555f92a 100644
--- a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c
+++ b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c
@@ -299,7 +299,7 @@ unsigned long allocate_empty_lowmem_region(unsigned long pages)
pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
pfn_array[i] = pte->pte_low >> PAGE_SHIFT;
queue_l1_entry_update(pte, 0);
- phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = 0xdeadbeef;
+ phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = INVALID_P2M_ENTRY;
}
flush_page_update_queue();
diff --git a/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c
index f26387f305..9dc64cc0c3 100644
--- a/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c
+++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c
@@ -415,7 +415,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
mcl[i].args[3] = blkif->domid;
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
- phys_seg[i].buffer >> PAGE_SHIFT;
+ FOREIGN_FRAME(phys_seg[i].buffer >> PAGE_SHIFT);
}
if ( unlikely(HYPERVISOR_multicall(mcl, nr_psegs) != 0) )
diff --git a/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c
index 5a3a45873f..e28274a457 100644
--- a/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c
+++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c
@@ -1,5 +1,5 @@
/******************************************************************************
- * block.c
+ * blkfront.c
*
* XenLinux virtual block-device driver.
*
@@ -67,11 +67,12 @@ static inline int GET_ID_FROM_FREELIST( void )
{
unsigned long free = rec_ring_free;
- if(free>BLKIF_RING_SIZE) BUG();
+ if ( free > BLKIF_RING_SIZE )
+ BUG();
rec_ring_free = rec_ring[free].id;
- rec_ring[free].id = 0x0fffffee; // debug
+ rec_ring[free].id = 0x0fffffee; /* debug */
return free;
}
@@ -253,8 +254,6 @@ static int blkif_queue_request(struct request *req)
id = GET_ID_FROM_FREELIST();
rec_ring[id].id = (unsigned long) req;
-//printk(KERN_ALERT"r: %d req %p (%ld)\n",req_prod,req,id);
-
ring_req->id = id;
ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
BLKIF_OP_READ;
@@ -300,8 +299,6 @@ void do_blkif_request(request_queue_t *rq)
DPRINTK("Entered do_blkif_request\n");
-//printk(KERN_ALERT"r: %d req\n",req_prod);
-
queued = 0;
while ((req = elv_next_request(rq)) != NULL) {
@@ -310,7 +307,8 @@ void do_blkif_request(request_queue_t *rq)
continue;
}
- if (BLKIF_RING_FULL) {
+ if ( BLKIF_RING_FULL )
+ {
blk_stop_queue(rq);
break;
}
@@ -358,11 +356,9 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
id = bret->id;
req = (struct request *)rec_ring[id].id;
-//printk(KERN_ALERT"i: %d req %p (%ld)\n",i,req,id);
-
blkif_completion( &rec_ring[id] );
- ADD_ID_TO_FREELIST(id); // overwrites req
+ ADD_ID_TO_FREELIST(id); /* overwrites req */
switch ( bret->operation )
{
@@ -772,8 +768,6 @@ static int blkif_queue_request(unsigned long id,
req->nr_segments = 1;
req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
-//printk("N: %d req %p (%ld)\n",req_prod,rec_ring[xid].id,xid);
-
req_prod++;
/* Keep a private copy so we can reissue requests when recovering. */
@@ -892,8 +886,6 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
id = bret->id;
bh = (struct buffer_head *)rec_ring[id].id;
-//printk("i: %d req %p (%ld)\n",i,bh,id);
-
blkif_completion( &rec_ring[id] );
ADD_ID_TO_FREELIST(id);
@@ -942,16 +934,11 @@ static inline void translate_req_to_pfn(blkif_request_t *xreq,
xreq->operation = req->operation;
xreq->nr_segments = req->nr_segments;
xreq->device = req->device;
- // preserve id
+ /* preserve id */
xreq->sector_number = req->sector_number;
for ( i = 0; i < req->nr_segments; i++ )
- {
- xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) |
- (machine_to_phys_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] <<
- PAGE_SHIFT);
- }
-
+ xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]);
}
static inline void translate_req_to_mfn(blkif_request_t *xreq,
@@ -962,15 +949,11 @@ static inline void translate_req_to_mfn(blkif_request_t *xreq,
xreq->operation = req->operation;
xreq->nr_segments = req->nr_segments;
xreq->device = req->device;
- xreq->id = req->id; // copy id (unlike above)
+ xreq->id = req->id; /* copy id (unlike above) */
xreq->sector_number = req->sector_number;
for ( i = 0; i < req->nr_segments; i++ )
- {
- xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) |
- (phys_to_machine_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] <<
- PAGE_SHIFT);
- }
+ xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]);
}
@@ -978,7 +961,6 @@ static inline void translate_req_to_mfn(blkif_request_t *xreq,
static inline void flush_requests(void)
{
DISABLE_SCATTERGATHER();
-//printk(KERN_ALERT"flush %d\n",req_prod);
wmb(); /* Ensure that the frontend can see the requests. */
blk_ring->req_prod = req_prod;
notify_via_evtchn(blkif_evtchn);
@@ -1010,8 +992,6 @@ void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req.id = id;
rec_ring[id].id = (unsigned long) req;
-//printk("c: %d req %p (%ld)\n",req_prod,req,id);
-
translate_req_to_pfn( &rec_ring[id], req );
req_prod++;
@@ -1094,13 +1074,13 @@ static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
" in state %d\n", blkif_state);
break;
}
+
blkif_evtchn = status->evtchn;
- blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
- if ( (rc=request_irq(blkif_irq, blkif_int,
- SA_SAMPLE_RANDOM, "blkif", NULL)) )
- {
+ blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
+
+ if ( (rc = request_irq(blkif_irq, blkif_int,
+ SA_SAMPLE_RANDOM, "blkif", NULL)) )
printk(KERN_ALERT"blkfront request_irq failed (%ld)\n",rc);
- }
if ( recovery )
{
@@ -1109,31 +1089,28 @@ static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
/* Hmm, requests might be re-ordered when we re-issue them.
This will need to be fixed once we have barriers */
- // req_prod = 0; : already is zero
-
- // stage 1 : find active and move to safety
- for ( i=0; i <BLKIF_RING_SIZE; i++ )
+ /* Stage 1 : Find active and move to safety. */
+ for ( i = 0; i < BLKIF_RING_SIZE; i++ )
{
if ( rec_ring[i].id >= PAGE_OFFSET )
{
translate_req_to_mfn(
- &blk_ring->ring[req_prod].req, &rec_ring[i] );
-
+ &blk_ring->ring[req_prod].req, &rec_ring[i]);
req_prod++;
}
}
-printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod);
+ printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod);
- // stage 2 : set up shadow list
- for ( i=0; i<req_prod; i++ )
+ /* Stage 2 : Set up shadow list. */
+ for ( i = 0; i < req_prod; i++ )
{
rec_ring[i].id = blk_ring->ring[i].req.id;
blk_ring->ring[i].req.id = i;
- translate_req_to_pfn( &rec_ring[i], &blk_ring->ring[i].req );
+ translate_req_to_pfn(&rec_ring[i], &blk_ring->ring[i].req);
}
- // stage 3 : set up free list
+ /* Stage 3 : Set up free list. */
for ( ; i < BLKIF_RING_SIZE; i++ )
rec_ring[i].id = i+1;
rec_ring_free = req_prod;
@@ -1150,9 +1127,6 @@ printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod);
/* Kicks things back into life. */
flush_requests();
-
-
-
}
else
{
@@ -1270,7 +1244,7 @@ void blkdev_resume(void)
/* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */
-void blkif_completion( blkif_request_t *req )
+void blkif_completion(blkif_request_t *req)
{
int i;
@@ -1281,10 +1255,8 @@ void blkif_completion( blkif_request_t *req )
{
unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
unsigned long mfn = phys_to_machine_mapping[pfn];
-
queue_machphys_update(mfn, pfn);
}
-
break;
}
diff --git a/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c
index 23b0f87130..009012c9f6 100644
--- a/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c
+++ b/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c
@@ -204,6 +204,12 @@ static void net_rx_action(unsigned long unused)
mdata = virt_to_machine(vdata);
new_mfn = get_new_mfn();
+ /*
+ * Set the new P2M table entry before reassigning the old data page.
+ * Heed the comment in pgtable-2level.h:pte_page(). :-)
+ */
+ phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
+
mmu[0].ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
mmu[0].val = __pa(vdata) >> PAGE_SHIFT;
mmu[1].ptr = MMU_EXTENDED_COMMAND;
@@ -250,8 +256,6 @@ static void net_rx_action(unsigned long unused)
mdata = ((mmu[2].ptr & PAGE_MASK) |
((unsigned long)skb->data & ~PAGE_MASK));
- phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
-
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
skb_shinfo(skb)->frag_list = NULL;
@@ -556,7 +560,7 @@ static void net_tx_action(unsigned long unused)
}
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
- txreq.addr >> PAGE_SHIFT;
+ FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
__skb_put(skb, PKT_PROT_LEN);
memcpy(skb->data,
diff --git a/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c
index b2b63441d5..0011273abd 100644
--- a/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c
+++ b/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c
@@ -263,9 +263,9 @@ static void network_alloc_rx_buffers(struct net_device *dev)
rx_pfn_array[nr_pfns] = virt_to_machine(skb->head) >> PAGE_SHIFT;
- /* remove this page from pseudo phys map (migration optimization) */
+ /* Remove this page from pseudo phys map before passing back to Xen. */
phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT]
- = 0x80000001;
+ = INVALID_P2M_ENTRY;
rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping;
rx_mcl[nr_pfns].args[0] = (unsigned long)skb->head >> PAGE_SHIFT;
@@ -478,15 +478,6 @@ static int netif_poll(struct net_device *dev, int *pbudget)
mcl->args[2] = 0;
mcl++;
(void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
-
-#if 0
- if (unlikely(rx_mcl[0].args[5] != 0))
- printk(KERN_ALERT"Hypercall0 failed %u\n",np->rx->resp_prod);
-
- if (unlikely(rx_mcl[1].args[5] != 0))
- printk(KERN_ALERT"Hypercall1 failed %u\n",np->rx->resp_prod);
-#endif
-
}
while ( (skb = __skb_dequeue(&rxq)) != NULL )
diff --git a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
index 760569f95d..f30bd2b83d 100644
--- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
+++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
@@ -88,30 +88,33 @@ static inline pte_t ptep_get_and_clear(pte_t *xp)
* not have MFN in our p2m table. Conversely, if the page is ours,
* then we'll have p2m(m2p(MFN))==MFN.
* If we detect a special mapping then it doesn't have a 'struct page'.
- * We force !VALID_PAGE() by returning an out-of-range pointer.
+ * We force !pfn_valid() by returning an out-of-range pointer.
+ *
+ * NB. These checks require that, for any MFN that is not in our reservation,
+ * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
+ * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
+ * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
+ *
+ * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
+ * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
+ * require. In all the cases we care about, the high bit gets shifted out
+ * (e.g., phys_to_machine()) so behaviour there is correct.
*/
-#define pte_page(_pte) \
-({ \
- unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT; \
- unsigned long pfn = mfn_to_pfn(mfn); \
- if ( (pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn) ) \
- pfn = max_mapnr; /* special: force !VALID_PAGE() */ \
- pfn_to_page(pfn); \
-})
-
-#define pte_none(x) (!(x).pte_low)
-/* See comments above pte_page */
-/* XXXcl check pte_present because msync.c:filemap_sync_pte calls
- * without pte_present check */
+#define INVALID_P2M_ENTRY (~0UL)
+#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1)))
#define pte_pfn(_pte) \
({ \
unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT; \
- unsigned long pfn = pte_present(_pte) ? mfn_to_pfn(mfn) : mfn; \
+ unsigned long pfn = mfn_to_pfn(mfn); \
if ( (pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn) ) \
pfn = max_mapnr; /* special: force !pfn_valid() */ \
pfn; \
})
+#define pte_page(_pte) pfn_to_page(pte_pfn(_pte))
+
+#define pte_none(x) (!(x).pte_low)
+
#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pfn_pte_ma(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
diff --git a/tools/libxc/xc_linux_save.c b/tools/libxc/xc_linux_save.c
index 5a47b30f56..6e4bb55b12 100644
--- a/tools/libxc/xc_linux_save.c
+++ b/tools/libxc/xc_linux_save.c
@@ -295,7 +295,7 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
int rc = 1, i, j, k, last_iter, iter = 0;
unsigned long mfn;
u32 domid = ioctxt->domain;
- int live = 0; // (ioctxt->flags & XCFLAGS_LIVE);
+ int live = (ioctxt->flags & XCFLAGS_LIVE);
int debug = (ioctxt->flags & XCFLAGS_DEBUG);
int sent_last_iter, skip_this_iter;
@@ -440,7 +440,8 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
/* Domain is still running at this point */
- if( live ){
+ if( live ){
+printf("GO LIVE!!\n");
if ( xc_shadow_control( xc_handle, domid,
DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
NULL, 0, NULL ) < 0 ) {
diff --git a/xen/arch/x86/shadow.c b/xen/arch/x86/shadow.c
index 362b0f4560..06445943ab 100644
--- a/xen/arch/x86/shadow.c
+++ b/xen/arch/x86/shadow.c
@@ -29,41 +29,6 @@ hypercall lock anyhow (at least initially).
********/
-/**
-
-FIXME:
-
-The shadow table flush command is dangerous on SMP systems as the
-guest may be using the L2 on one CPU while the other is trying to
-blow the table away.
-
-The current save restore code works around this by not calling FLUSH,
-but by calling CLEAN2 which leaves all L2s in tact (this is probably
-quicker anyhow).
-
-Even so, we have to be very careful. The flush code may need to cause
-a TLB flush on another CPU. It needs to do this while holding the
-shadow table lock. The trouble is, the guest may be in the shadow page
-fault handler spinning waiting to grab the shadow lock. It may have
-intterupts disabled, hence we can't use the normal flush_tlb_cpu
-mechanism.
-
-For the moment, we have a grim race whereby the spinlock in the shadow
-fault handler is actually a try lock, in a loop with a helper for the
-tlb flush code.
-
-A better soloution would be to take a new flush lock, then raise a
-per-domain soft irq on the other CPU. The softirq will switch to
-init's PTs, then do an atomic inc of a variable to count himself in,
-then spin on a lock. Having noticed that the other guy has counted
-in, flush the shadow table, then release him by dropping the lock. He
-will then reload cr3 from mm.page_table on the way out of the softirq.
-
-In domian-softirq context we know that the guy holds no locks and has
-interrupts enabled. Nothing can go wrong ;-)
-
-**/
-
static inline void free_shadow_page(struct mm_struct *m,
struct pfn_info *page)
{
@@ -381,9 +346,9 @@ static int shadow_mode_table_op(struct domain *d,
d->mm.shadow_dirty_net_count = 0;
d->mm.shadow_dirty_block_count = 0;
- sc->pages = d->tot_pages;
+ sc->pages = d->max_pages;
- if( d->tot_pages > sc->pages ||
+ if( d->max_pages > sc->pages ||
!sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap )
{
rc = -EINVAL;
@@ -393,10 +358,10 @@ static int shadow_mode_table_op(struct domain *d,
#define chunk (8*1024) // do this in 1KB chunks for L1 cache
- for(i=0;i<d->tot_pages;i+=chunk)
+ for(i=0;i<d->max_pages;i+=chunk)
{
- int bytes = (( ((d->tot_pages-i) > (chunk))?
- (chunk):(d->tot_pages-i) ) + 7) / 8;
+ int bytes = (( ((d->max_pages-i) > (chunk))?
+ (chunk):(d->max_pages-i) ) + 7) / 8;
copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
@@ -428,21 +393,21 @@ static int shadow_mode_table_op(struct domain *d,
sc->stats.dirty_net_count = d->mm.shadow_dirty_net_count;
sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count;
- if( d->tot_pages > sc->pages ||
+ if( d->max_pages > sc->pages ||
!sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap )
{
rc = -EINVAL;
goto out;
}
- sc->pages = d->tot_pages;
+ sc->pages = d->max_pages;
#define chunk (8*1024) // do this in 1KB chunks for L1 cache
- for(i=0;i<d->tot_pages;i+=chunk)
+ for(i=0;i<d->max_pages;i+=chunk)
{
- int bytes = (( ((d->tot_pages-i) > (chunk))?
- (chunk):(d->tot_pages-i) ) + 7) / 8;
+ int bytes = (( ((d->max_pages-i) > (chunk))?
+ (chunk):(d->max_pages-i) ) + 7) / 8;
copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
@@ -475,7 +440,13 @@ int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
unsigned int cmd = sc->op;
int rc = 0;
- spin_lock(&d->mm.shadow_lock);
+ if (d == current)
+ printk("Attempt to control your _own_ shadow tables. I hope you know what you're doing!\n");
+
+ domain_pause(d);
+ synchronise_pagetables(d->processor);
+
+ spin_lock(&d->mm.shadow_lock);
if ( cmd == DOM0_SHADOW_CONTROL_OP_OFF )
{
@@ -502,10 +473,10 @@ int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
rc = -EINVAL;
}
- flush_tlb_cpu(d->processor);
-
spin_unlock(&d->mm.shadow_lock);
+ domain_unpause(d);
+
return rc;
}
@@ -518,6 +489,7 @@ static inline struct pfn_info *alloc_shadow_page(struct mm_struct *m)
void unshadow_table( unsigned long gpfn, unsigned int type )
{
unsigned long spfn;
+ struct domain *d = frame_table[gpfn].u.inuse.domain;
SH_VLOG("unshadow_table type=%08x gpfn=%08lx",
type,
@@ -530,11 +502,11 @@ void unshadow_table( unsigned long gpfn, unsigned int type )
// even in the SMP guest case, there won't be a race here as
// this CPU was the one that cmpxchg'ed the page to invalid
- spfn = __shadow_status(&current->mm, gpfn) & PSH_pfn_mask;
+ spfn = __shadow_status(&d->mm, gpfn) & PSH_pfn_mask;
- delete_shadow_status(&current->mm, gpfn);
+ delete_shadow_status(&d->mm, gpfn);
- free_shadow_page( &current->mm, &frame_table[spfn] );
+ free_shadow_page(&d->mm, &frame_table[spfn] );
}
@@ -651,15 +623,7 @@ int shadow_fault( unsigned long va, long error_code )
// take the lock and reread gpte
- while( unlikely(!spin_trylock(&current->mm.shadow_lock)) )
- {
- extern volatile unsigned long flush_cpumask;
- if ( test_and_clear_bit(smp_processor_id(), &flush_cpumask) )
- local_flush_tlb();
- rep_nop();
- }
-
- ASSERT(spin_is_locked(&current->mm.shadow_lock));
+ spin_lock(&current->mm.shadow_lock);
if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
{
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index a57d43b9c3..b3b056fd68 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -422,7 +422,7 @@ void free_domheap_pages(struct pfn_info *pg, int order)
drop_dom_ref = (d->xenheap_pages == 0);
spin_unlock_recursive(&d->page_alloc_lock);
}
- else
+ else if ( likely(d != NULL) )
{
/* NB. May recursively lock from domain_relinquish_memory(). */
spin_lock_recursive(&d->page_alloc_lock);
@@ -442,6 +442,12 @@ void free_domheap_pages(struct pfn_info *pg, int order)
free_heap_pages(MEMZONE_DOM, pg, order);
}
+ else
+ {
+ /* Freeing an anonymous domain-heap page. */
+ free_heap_pages(MEMZONE_DOM, pg, order);
+ drop_dom_ref = 0;
+ }
if ( drop_dom_ref )
put_domain(d);