diff options
author | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2004-08-19 16:08:50 +0000 |
---|---|---|
committer | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2004-08-19 16:08:50 +0000 |
commit | 8db016d517d4f355234c48a3ba230f0c3e287015 (patch) | |
tree | 3c9186fc5c46d90de04e612b2f71fbbffa94e9b9 | |
parent | b0b7188948a2da46932790ada9a8f09626dc948c (diff) | |
parent | 106c7d60ec845cb407381ae25e4d4aad5b26c886 (diff) | |
download | xen-8db016d517d4f355234c48a3ba230f0c3e287015.tar.gz xen-8db016d517d4f355234c48a3ba230f0c3e287015.tar.bz2 xen-8db016d517d4f355234c48a3ba230f0c3e287015.zip |
bitkeeper revision 1.1159.45.1 (4124d092e9_SvIhSMDUvJb9u3drP3A)
Merge ssh://xenbk@gandalf.hpl.hp.com//var/bk/xeno-unstable.bk
into labyrinth.cl.cam.ac.uk:/auto/anfs/scratch/labyrinth/iap10/xeno-clone/xeno.bk
-rw-r--r-- | linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c | 45 | ||||
-rw-r--r-- | linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h | 12 | ||||
-rw-r--r-- | linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c | 3 | ||||
-rw-r--r-- | linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c | 2 | ||||
-rw-r--r-- | linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c | 2 | ||||
-rw-r--r-- | linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c | 78 | ||||
-rw-r--r-- | linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c | 10 | ||||
-rw-r--r-- | linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c | 13 | ||||
-rw-r--r-- | linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h | 33 | ||||
-rw-r--r-- | tools/libxc/xc_linux_save.c | 5 | ||||
-rw-r--r-- | xen/arch/x86/shadow.c | 84 | ||||
-rw-r--r-- | xen/common/page_alloc.c | 8 |
12 files changed, 127 insertions, 168 deletions
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c b/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c index b72d0efe11..b13e3d75ef 100644 --- a/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c +++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/balloon/balloon.c @@ -36,13 +36,16 @@ typedef struct user_balloon_op { } user_balloon_op_t; /* END OF USER DEFINE */ -/* Dead entry written into balloon-owned entries in the PMT. */ -#define DEAD 0xdeadbeef - static struct proc_dir_entry *balloon_pde; unsigned long credit; static unsigned long current_pages, most_seen_pages; +/* + * Dead entry written into balloon-owned entries in the PMT. + * It is deliberately different to INVALID_P2M_ENTRY. + */ +#define DEAD 0xdead1234 + static inline pte_t *get_ptep(unsigned long addr) { pgd_t *pgd; pmd_t *pmd; pte_t *ptep; @@ -79,17 +82,16 @@ static unsigned long inflate_balloon(unsigned long num_pages) for ( i = 0; i < num_pages; i++, currp++ ) { struct page *page = alloc_page(GFP_HIGHUSER); - unsigned long pfn = page - mem_map; + unsigned long pfn = page - mem_map; /* If allocation fails then free all reserved pages. */ - if ( page == 0 ) + if ( page == NULL ) { - printk(KERN_ERR "Unable to inflate balloon by %ld, only %ld pages free.", - num_pages, i); + printk(KERN_ERR "Unable to inflate balloon by %ld, only" + " %ld pages free.", num_pages, i); currp = parray; - for(j = 0; j < i; j++, ++currp){ + for ( j = 0; j < i; j++, currp++ ) __free_page((struct page *) (mem_map + *currp)); - } ret = -EFAULT; goto cleanup; } @@ -102,9 +104,8 @@ static unsigned long inflate_balloon(unsigned long num_pages) { unsigned long mfn = phys_to_machine_mapping[*currp]; curraddr = (unsigned long)page_address(mem_map + *currp); - if (curraddr) + if ( curraddr != 0 ) queue_l1_entry_update(get_ptep(curraddr), 0); - phys_to_machine_mapping[*currp] = DEAD; *currp = mfn; } @@ -313,17 +314,18 @@ claim_new_pages(unsigned long num_pages) XEN_flush_page_update_queue(); new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, parray, num_pages, 0); - if (new_page_cnt != num_pages) + if ( new_page_cnt != num_pages ) { printk(KERN_WARNING "claim_new_pages: xen granted only %lu of %lu requested pages\n", new_page_cnt, num_pages); - /* XXX - * avoid xen lockup when user forgot to setdomainmaxmem. xen - * usually can dribble out a few pages and then hangs + /* + * Avoid xen lockup when user forgot to setdomainmaxmem. Xen + * usually can dribble out a few pages and then hangs. */ - if (new_page_cnt < 1000) { + if ( new_page_cnt < 1000 ) + { printk(KERN_WARNING "Remember to use setdomainmaxmem\n"); HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, parray, new_page_cnt, 0); @@ -331,7 +333,7 @@ claim_new_pages(unsigned long num_pages) } } memcpy(phys_to_machine_mapping+most_seen_pages, parray, - new_page_cnt * sizeof(unsigned long)); + new_page_cnt * sizeof(unsigned long)); pagetable_extend(most_seen_pages,new_page_cnt); @@ -465,12 +467,15 @@ static int __init init_module(void) /* * make a new phys map if mem= says xen can give us memory to grow */ - if (max_pfn > start_info.nr_pages) { + if ( max_pfn > start_info.nr_pages ) + { extern unsigned long *phys_to_machine_mapping; unsigned long *newmap; newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long)); - phys_to_machine_mapping = memcpy(newmap, phys_to_machine_mapping, - start_info.nr_pages * sizeof(unsigned long)); + memset(newmap, ~0, max_pfn * sizeof(unsigned long)); + memcpy(newmap, phys_to_machine_mapping, + start_info.nr_pages * sizeof(unsigned long)); + phys_to_machine_mapping = newmap; } return 0; diff --git a/linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h b/linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h index e6845abc86..9ddd30bf73 100644 --- a/linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h +++ b/linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h @@ -58,7 +58,19 @@ static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) * then we'll have p2m(m2p(MFN))==MFN. * If we detect a special mapping then it doesn't have a 'struct page'. * We force !VALID_PAGE() by returning an out-of-range pointer. + * + * NB. These checks require that, for any MFN that is not in our reservation, + * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if + * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. + * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. + * + * NB2. When deliberately mapping foreign pages into the p2m table, you *must* + * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we + * require. In all the cases we care about, the high bit gets shifted out + * (e.g., phys_to_machine()) so behaviour there is correct. */ +#define INVALID_P2M_ENTRY (~0UL) +#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1))) #define pte_page(_pte) \ ({ \ unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT; \ diff --git a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c index 6f5e1b2c73..46702c5795 100644 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/kernel/pci-dma.c @@ -61,6 +61,8 @@ void *dma_alloc_coherent(struct device *dev, size_t size, pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE))); pfn = pte->pte_low >> PAGE_SHIFT; queue_l1_entry_update(pte, 0); + phys_to_machine_mapping[(__pa(ret)>>PAGE_SHIFT)+i] = + INVALID_P2M_ENTRY; flush_page_update_queue(); if (HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, &pfn, 1, 0) != 1) BUG(); @@ -79,7 +81,6 @@ void *dma_alloc_coherent(struct device *dev, size_t size, pfn+i, (__pa(ret)>>PAGE_SHIFT)+i); phys_to_machine_mapping[(__pa(ret)>>PAGE_SHIFT)+i] = pfn+i; - flush_page_update_queue(); } flush_page_update_queue(); } diff --git a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c index fc7bc3e523..957555f92a 100644 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c @@ -299,7 +299,7 @@ unsigned long allocate_empty_lowmem_region(unsigned long pages) pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE))); pfn_array[i] = pte->pte_low >> PAGE_SHIFT; queue_l1_entry_update(pte, 0); - phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = 0xdeadbeef; + phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = INVALID_P2M_ENTRY; } flush_page_update_queue(); diff --git a/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c index f26387f305..9dc64cc0c3 100644 --- a/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c @@ -415,7 +415,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) mcl[i].args[3] = blkif->domid; phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = - phys_seg[i].buffer >> PAGE_SHIFT; + FOREIGN_FRAME(phys_seg[i].buffer >> PAGE_SHIFT); } if ( unlikely(HYPERVISOR_multicall(mcl, nr_psegs) != 0) ) diff --git a/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c index 5a3a45873f..e28274a457 100644 --- a/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkfront/blkfront.c @@ -1,5 +1,5 @@ /****************************************************************************** - * block.c + * blkfront.c * * XenLinux virtual block-device driver. * @@ -67,11 +67,12 @@ static inline int GET_ID_FROM_FREELIST( void ) { unsigned long free = rec_ring_free; - if(free>BLKIF_RING_SIZE) BUG(); + if ( free > BLKIF_RING_SIZE ) + BUG(); rec_ring_free = rec_ring[free].id; - rec_ring[free].id = 0x0fffffee; // debug + rec_ring[free].id = 0x0fffffee; /* debug */ return free; } @@ -253,8 +254,6 @@ static int blkif_queue_request(struct request *req) id = GET_ID_FROM_FREELIST(); rec_ring[id].id = (unsigned long) req; -//printk(KERN_ALERT"r: %d req %p (%ld)\n",req_prod,req,id); - ring_req->id = id; ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; @@ -300,8 +299,6 @@ void do_blkif_request(request_queue_t *rq) DPRINTK("Entered do_blkif_request\n"); -//printk(KERN_ALERT"r: %d req\n",req_prod); - queued = 0; while ((req = elv_next_request(rq)) != NULL) { @@ -310,7 +307,8 @@ void do_blkif_request(request_queue_t *rq) continue; } - if (BLKIF_RING_FULL) { + if ( BLKIF_RING_FULL ) + { blk_stop_queue(rq); break; } @@ -358,11 +356,9 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) id = bret->id; req = (struct request *)rec_ring[id].id; -//printk(KERN_ALERT"i: %d req %p (%ld)\n",i,req,id); - blkif_completion( &rec_ring[id] ); - ADD_ID_TO_FREELIST(id); // overwrites req + ADD_ID_TO_FREELIST(id); /* overwrites req */ switch ( bret->operation ) { @@ -772,8 +768,6 @@ static int blkif_queue_request(unsigned long id, req->nr_segments = 1; req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect; -//printk("N: %d req %p (%ld)\n",req_prod,rec_ring[xid].id,xid); - req_prod++; /* Keep a private copy so we can reissue requests when recovering. */ @@ -892,8 +886,6 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) id = bret->id; bh = (struct buffer_head *)rec_ring[id].id; -//printk("i: %d req %p (%ld)\n",i,bh,id); - blkif_completion( &rec_ring[id] ); ADD_ID_TO_FREELIST(id); @@ -942,16 +934,11 @@ static inline void translate_req_to_pfn(blkif_request_t *xreq, xreq->operation = req->operation; xreq->nr_segments = req->nr_segments; xreq->device = req->device; - // preserve id + /* preserve id */ xreq->sector_number = req->sector_number; for ( i = 0; i < req->nr_segments; i++ ) - { - xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) | - (machine_to_phys_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] << - PAGE_SHIFT); - } - + xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]); } static inline void translate_req_to_mfn(blkif_request_t *xreq, @@ -962,15 +949,11 @@ static inline void translate_req_to_mfn(blkif_request_t *xreq, xreq->operation = req->operation; xreq->nr_segments = req->nr_segments; xreq->device = req->device; - xreq->id = req->id; // copy id (unlike above) + xreq->id = req->id; /* copy id (unlike above) */ xreq->sector_number = req->sector_number; for ( i = 0; i < req->nr_segments; i++ ) - { - xreq->frame_and_sects[i] = (req->frame_and_sects[i] & ~PAGE_MASK) | - (phys_to_machine_mapping[req->frame_and_sects[i] >> PAGE_SHIFT] << - PAGE_SHIFT); - } + xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]); } @@ -978,7 +961,6 @@ static inline void translate_req_to_mfn(blkif_request_t *xreq, static inline void flush_requests(void) { DISABLE_SCATTERGATHER(); -//printk(KERN_ALERT"flush %d\n",req_prod); wmb(); /* Ensure that the frontend can see the requests. */ blk_ring->req_prod = req_prod; notify_via_evtchn(blkif_evtchn); @@ -1010,8 +992,6 @@ void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req.id = id; rec_ring[id].id = (unsigned long) req; -//printk("c: %d req %p (%ld)\n",req_prod,req,id); - translate_req_to_pfn( &rec_ring[id], req ); req_prod++; @@ -1094,13 +1074,13 @@ static void blkif_status_change(blkif_fe_interface_status_changed_t *status) " in state %d\n", blkif_state); break; } + blkif_evtchn = status->evtchn; - blkif_irq = bind_evtchn_to_irq(blkif_evtchn); - if ( (rc=request_irq(blkif_irq, blkif_int, - SA_SAMPLE_RANDOM, "blkif", NULL)) ) - { + blkif_irq = bind_evtchn_to_irq(blkif_evtchn); + + if ( (rc = request_irq(blkif_irq, blkif_int, + SA_SAMPLE_RANDOM, "blkif", NULL)) ) printk(KERN_ALERT"blkfront request_irq failed (%ld)\n",rc); - } if ( recovery ) { @@ -1109,31 +1089,28 @@ static void blkif_status_change(blkif_fe_interface_status_changed_t *status) /* Hmm, requests might be re-ordered when we re-issue them. This will need to be fixed once we have barriers */ - // req_prod = 0; : already is zero - - // stage 1 : find active and move to safety - for ( i=0; i <BLKIF_RING_SIZE; i++ ) + /* Stage 1 : Find active and move to safety. */ + for ( i = 0; i < BLKIF_RING_SIZE; i++ ) { if ( rec_ring[i].id >= PAGE_OFFSET ) { translate_req_to_mfn( - &blk_ring->ring[req_prod].req, &rec_ring[i] ); - + &blk_ring->ring[req_prod].req, &rec_ring[i]); req_prod++; } } -printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod); + printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod); - // stage 2 : set up shadow list - for ( i=0; i<req_prod; i++ ) + /* Stage 2 : Set up shadow list. */ + for ( i = 0; i < req_prod; i++ ) { rec_ring[i].id = blk_ring->ring[i].req.id; blk_ring->ring[i].req.id = i; - translate_req_to_pfn( &rec_ring[i], &blk_ring->ring[i].req ); + translate_req_to_pfn(&rec_ring[i], &blk_ring->ring[i].req); } - // stage 3 : set up free list + /* Stage 3 : Set up free list. */ for ( ; i < BLKIF_RING_SIZE; i++ ) rec_ring[i].id = i+1; rec_ring_free = req_prod; @@ -1150,9 +1127,6 @@ printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod); /* Kicks things back into life. */ flush_requests(); - - - } else { @@ -1270,7 +1244,7 @@ void blkdev_resume(void) /* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */ -void blkif_completion( blkif_request_t *req ) +void blkif_completion(blkif_request_t *req) { int i; @@ -1281,10 +1255,8 @@ void blkif_completion( blkif_request_t *req ) { unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT; unsigned long mfn = phys_to_machine_mapping[pfn]; - queue_machphys_update(mfn, pfn); } - break; } diff --git a/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c index 23b0f87130..009012c9f6 100644 --- a/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c +++ b/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c @@ -204,6 +204,12 @@ static void net_rx_action(unsigned long unused) mdata = virt_to_machine(vdata); new_mfn = get_new_mfn(); + /* + * Set the new P2M table entry before reassigning the old data page. + * Heed the comment in pgtable-2level.h:pte_page(). :-) + */ + phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn; + mmu[0].ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; mmu[0].val = __pa(vdata) >> PAGE_SHIFT; mmu[1].ptr = MMU_EXTENDED_COMMAND; @@ -250,8 +256,6 @@ static void net_rx_action(unsigned long unused) mdata = ((mmu[2].ptr & PAGE_MASK) | ((unsigned long)skb->data & ~PAGE_MASK)); - phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn; - atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->frag_list = NULL; @@ -556,7 +560,7 @@ static void net_tx_action(unsigned long unused) } phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] = - txreq.addr >> PAGE_SHIFT; + FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT); __skb_put(skb, PKT_PROT_LEN); memcpy(skb->data, diff --git a/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c index b2b63441d5..0011273abd 100644 --- a/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c +++ b/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c @@ -263,9 +263,9 @@ static void network_alloc_rx_buffers(struct net_device *dev) rx_pfn_array[nr_pfns] = virt_to_machine(skb->head) >> PAGE_SHIFT; - /* remove this page from pseudo phys map (migration optimization) */ + /* Remove this page from pseudo phys map before passing back to Xen. */ phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] - = 0x80000001; + = INVALID_P2M_ENTRY; rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping; rx_mcl[nr_pfns].args[0] = (unsigned long)skb->head >> PAGE_SHIFT; @@ -478,15 +478,6 @@ static int netif_poll(struct net_device *dev, int *pbudget) mcl->args[2] = 0; mcl++; (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); - -#if 0 - if (unlikely(rx_mcl[0].args[5] != 0)) - printk(KERN_ALERT"Hypercall0 failed %u\n",np->rx->resp_prod); - - if (unlikely(rx_mcl[1].args[5] != 0)) - printk(KERN_ALERT"Hypercall1 failed %u\n",np->rx->resp_prod); -#endif - } while ( (skb = __skb_dequeue(&rxq)) != NULL ) diff --git a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h index 760569f95d..f30bd2b83d 100644 --- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h +++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h @@ -88,30 +88,33 @@ static inline pte_t ptep_get_and_clear(pte_t *xp) * not have MFN in our p2m table. Conversely, if the page is ours, * then we'll have p2m(m2p(MFN))==MFN. * If we detect a special mapping then it doesn't have a 'struct page'. - * We force !VALID_PAGE() by returning an out-of-range pointer. + * We force !pfn_valid() by returning an out-of-range pointer. + * + * NB. These checks require that, for any MFN that is not in our reservation, + * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if + * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. + * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. + * + * NB2. When deliberately mapping foreign pages into the p2m table, you *must* + * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we + * require. In all the cases we care about, the high bit gets shifted out + * (e.g., phys_to_machine()) so behaviour there is correct. */ -#define pte_page(_pte) \ -({ \ - unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT; \ - unsigned long pfn = mfn_to_pfn(mfn); \ - if ( (pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn) ) \ - pfn = max_mapnr; /* special: force !VALID_PAGE() */ \ - pfn_to_page(pfn); \ -}) - -#define pte_none(x) (!(x).pte_low) -/* See comments above pte_page */ -/* XXXcl check pte_present because msync.c:filemap_sync_pte calls - * without pte_present check */ +#define INVALID_P2M_ENTRY (~0UL) +#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1))) #define pte_pfn(_pte) \ ({ \ unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT; \ - unsigned long pfn = pte_present(_pte) ? mfn_to_pfn(mfn) : mfn; \ + unsigned long pfn = mfn_to_pfn(mfn); \ if ( (pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn) ) \ pfn = max_mapnr; /* special: force !pfn_valid() */ \ pfn; \ }) +#define pte_page(_pte) pfn_to_page(pte_pfn(_pte)) + +#define pte_none(x) (!(x).pte_low) + #define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pfn_pte_ma(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) diff --git a/tools/libxc/xc_linux_save.c b/tools/libxc/xc_linux_save.c index 5a47b30f56..6e4bb55b12 100644 --- a/tools/libxc/xc_linux_save.c +++ b/tools/libxc/xc_linux_save.c @@ -295,7 +295,7 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) int rc = 1, i, j, k, last_iter, iter = 0; unsigned long mfn; u32 domid = ioctxt->domain; - int live = 0; // (ioctxt->flags & XCFLAGS_LIVE); + int live = (ioctxt->flags & XCFLAGS_LIVE); int debug = (ioctxt->flags & XCFLAGS_DEBUG); int sent_last_iter, skip_this_iter; @@ -440,7 +440,8 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) /* Domain is still running at this point */ - if( live ){ + if( live ){ +printf("GO LIVE!!\n"); if ( xc_shadow_control( xc_handle, domid, DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY, NULL, 0, NULL ) < 0 ) { diff --git a/xen/arch/x86/shadow.c b/xen/arch/x86/shadow.c index 362b0f4560..06445943ab 100644 --- a/xen/arch/x86/shadow.c +++ b/xen/arch/x86/shadow.c @@ -29,41 +29,6 @@ hypercall lock anyhow (at least initially). ********/ -/** - -FIXME: - -The shadow table flush command is dangerous on SMP systems as the -guest may be using the L2 on one CPU while the other is trying to -blow the table away. - -The current save restore code works around this by not calling FLUSH, -but by calling CLEAN2 which leaves all L2s in tact (this is probably -quicker anyhow). - -Even so, we have to be very careful. The flush code may need to cause -a TLB flush on another CPU. It needs to do this while holding the -shadow table lock. The trouble is, the guest may be in the shadow page -fault handler spinning waiting to grab the shadow lock. It may have -intterupts disabled, hence we can't use the normal flush_tlb_cpu -mechanism. - -For the moment, we have a grim race whereby the spinlock in the shadow -fault handler is actually a try lock, in a loop with a helper for the -tlb flush code. - -A better soloution would be to take a new flush lock, then raise a -per-domain soft irq on the other CPU. The softirq will switch to -init's PTs, then do an atomic inc of a variable to count himself in, -then spin on a lock. Having noticed that the other guy has counted -in, flush the shadow table, then release him by dropping the lock. He -will then reload cr3 from mm.page_table on the way out of the softirq. - -In domian-softirq context we know that the guy holds no locks and has -interrupts enabled. Nothing can go wrong ;-) - -**/ - static inline void free_shadow_page(struct mm_struct *m, struct pfn_info *page) { @@ -381,9 +346,9 @@ static int shadow_mode_table_op(struct domain *d, d->mm.shadow_dirty_net_count = 0; d->mm.shadow_dirty_block_count = 0; - sc->pages = d->tot_pages; + sc->pages = d->max_pages; - if( d->tot_pages > sc->pages || + if( d->max_pages > sc->pages || !sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap ) { rc = -EINVAL; @@ -393,10 +358,10 @@ static int shadow_mode_table_op(struct domain *d, #define chunk (8*1024) // do this in 1KB chunks for L1 cache - for(i=0;i<d->tot_pages;i+=chunk) + for(i=0;i<d->max_pages;i+=chunk) { - int bytes = (( ((d->tot_pages-i) > (chunk))? - (chunk):(d->tot_pages-i) ) + 7) / 8; + int bytes = (( ((d->max_pages-i) > (chunk))? + (chunk):(d->max_pages-i) ) + 7) / 8; copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))), d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))), @@ -428,21 +393,21 @@ static int shadow_mode_table_op(struct domain *d, sc->stats.dirty_net_count = d->mm.shadow_dirty_net_count; sc->stats.dirty_block_count = d->mm.shadow_dirty_block_count; - if( d->tot_pages > sc->pages || + if( d->max_pages > sc->pages || !sc->dirty_bitmap || !d->mm.shadow_dirty_bitmap ) { rc = -EINVAL; goto out; } - sc->pages = d->tot_pages; + sc->pages = d->max_pages; #define chunk (8*1024) // do this in 1KB chunks for L1 cache - for(i=0;i<d->tot_pages;i+=chunk) + for(i=0;i<d->max_pages;i+=chunk) { - int bytes = (( ((d->tot_pages-i) > (chunk))? - (chunk):(d->tot_pages-i) ) + 7) / 8; + int bytes = (( ((d->max_pages-i) > (chunk))? + (chunk):(d->max_pages-i) ) + 7) / 8; copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))), d->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))), @@ -475,7 +440,13 @@ int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc) unsigned int cmd = sc->op; int rc = 0; - spin_lock(&d->mm.shadow_lock); + if (d == current) + printk("Attempt to control your _own_ shadow tables. I hope you know what you're doing!\n"); + + domain_pause(d); + synchronise_pagetables(d->processor); + + spin_lock(&d->mm.shadow_lock); if ( cmd == DOM0_SHADOW_CONTROL_OP_OFF ) { @@ -502,10 +473,10 @@ int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc) rc = -EINVAL; } - flush_tlb_cpu(d->processor); - spin_unlock(&d->mm.shadow_lock); + domain_unpause(d); + return rc; } @@ -518,6 +489,7 @@ static inline struct pfn_info *alloc_shadow_page(struct mm_struct *m) void unshadow_table( unsigned long gpfn, unsigned int type ) { unsigned long spfn; + struct domain *d = frame_table[gpfn].u.inuse.domain; SH_VLOG("unshadow_table type=%08x gpfn=%08lx", type, @@ -530,11 +502,11 @@ void unshadow_table( unsigned long gpfn, unsigned int type ) // even in the SMP guest case, there won't be a race here as // this CPU was the one that cmpxchg'ed the page to invalid - spfn = __shadow_status(¤t->mm, gpfn) & PSH_pfn_mask; + spfn = __shadow_status(&d->mm, gpfn) & PSH_pfn_mask; - delete_shadow_status(¤t->mm, gpfn); + delete_shadow_status(&d->mm, gpfn); - free_shadow_page( ¤t->mm, &frame_table[spfn] ); + free_shadow_page(&d->mm, &frame_table[spfn] ); } @@ -651,15 +623,7 @@ int shadow_fault( unsigned long va, long error_code ) // take the lock and reread gpte - while( unlikely(!spin_trylock(¤t->mm.shadow_lock)) ) - { - extern volatile unsigned long flush_cpumask; - if ( test_and_clear_bit(smp_processor_id(), &flush_cpumask) ) - local_flush_tlb(); - rep_nop(); - } - - ASSERT(spin_is_locked(¤t->mm.shadow_lock)); + spin_lock(¤t->mm.shadow_lock); if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) ) { diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index a57d43b9c3..b3b056fd68 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -422,7 +422,7 @@ void free_domheap_pages(struct pfn_info *pg, int order) drop_dom_ref = (d->xenheap_pages == 0); spin_unlock_recursive(&d->page_alloc_lock); } - else + else if ( likely(d != NULL) ) { /* NB. May recursively lock from domain_relinquish_memory(). */ spin_lock_recursive(&d->page_alloc_lock); @@ -442,6 +442,12 @@ void free_domheap_pages(struct pfn_info *pg, int order) free_heap_pages(MEMZONE_DOM, pg, order); } + else + { + /* Freeing an anonymous domain-heap page. */ + free_heap_pages(MEMZONE_DOM, pg, order); + drop_dom_ref = 0; + } if ( drop_dom_ref ) put_domain(d); |