diff options
author | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-05-07 14:53:28 +0000 |
---|---|---|
committer | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-05-07 14:53:28 +0000 |
commit | 1de448f4c54eac94a966d65e72b15bcbef3a7e5d (patch) | |
tree | cc2a452db9507208580071b8676288d9b1dde5ea /xenolinux-2.4.26-sparse/arch | |
parent | 747a8d04495070f12d625e2047b07eb3967ca9b8 (diff) | |
download | xen-1de448f4c54eac94a966d65e72b15bcbef3a7e5d.tar.gz xen-1de448f4c54eac94a966d65e72b15bcbef3a7e5d.tar.bz2 xen-1de448f4c54eac94a966d65e72b15bcbef3a7e5d.zip |
bitkeeper revision 1.891.1.5 (409ba2e8A6F60eP06BqyZUGapsn8XA)
Network interface for new IO model is now completed.
Diffstat (limited to 'xenolinux-2.4.26-sparse/arch')
17 files changed, 395 insertions, 292 deletions
diff --git a/xenolinux-2.4.26-sparse/arch/xen/config.in b/xenolinux-2.4.26-sparse/arch/xen/config.in index 16fa5e66d4..7f961d8521 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/config.in +++ b/xenolinux-2.4.26-sparse/arch/xen/config.in @@ -101,6 +101,8 @@ if [ "$CONFIG_HIGHMEM" = "y" ]; then bool 'HIGHMEM I/O support' CONFIG_HIGHIO fi +define_int CONFIG_FORCE_MAX_ZONEORDER 12 + #bool 'Symmetric multi-processing support' CONFIG_SMP #if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then # define_bool CONFIG_HAVE_DEC_LOCK y diff --git a/xenolinux-2.4.26-sparse/arch/xen/defconfig b/xenolinux-2.4.26-sparse/arch/xen/defconfig index eaa9171b1f..013e732c3f 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/defconfig +++ b/xenolinux-2.4.26-sparse/arch/xen/defconfig @@ -50,6 +50,7 @@ CONFIG_X86_TSC=y CONFIG_X86_L1_CACHE_SHIFT=5 CONFIG_NOHIGHMEM=y # CONFIG_HIGHMEM4G is not set +CONFIG_FORCE_MAX_ZONEORDER=12 # # General setup @@ -156,6 +157,7 @@ CONFIG_IP_NF_TARGET_ULOG=y # Network testing # # CONFIG_NET_PKTGEN is not set +CONFIG_NETDEVICES=y # # Block devices diff --git a/xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev b/xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev index 41b05aaaa7..3be5b50bfa 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev +++ b/xenolinux-2.4.26-sparse/arch/xen/defconfig-physdev @@ -51,6 +51,7 @@ CONFIG_X86_TSC=y CONFIG_X86_L1_CACHE_SHIFT=5 CONFIG_NOHIGHMEM=y # CONFIG_HIGHMEM4G is not set +CONFIG_FORCE_MAX_ZONEORDER=12 # # General setup diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h index e6004b4a8e..e80435fbbb 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h @@ -10,6 +10,7 @@ #include <linux/rbtree.h> #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/blkdev.h> #include <asm/ctrl_if.h> #include <asm/io.h> #include "../blkif.h" diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c index 0746ecfab0..0b26224651 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c @@ -74,7 +74,8 @@ void blkif_ctrlif_init(void) ctrl_msg_t cmsg; blkif_be_driver_status_changed_t st; - (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx); + (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, + CALLBACK_IN_BLOCKING_CONTEXT); /* Send a driver-UP notification to the domain controller. */ cmsg.type = CMSG_BLKIF_BE; diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c index 9acbac35ab..14a6ab324d 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c @@ -70,7 +70,7 @@ void blkif_create(blkif_be_create_t *create) unsigned int handle = create->blkif_handle; blkif_t **pblkif, *blkif; - if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_ATOMIC)) == NULL ) + if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL ) { DPRINTK("Could not create blkif: out of memory\n"); create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c index 4b11ad9a8e..eb3e32c75f 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c @@ -24,17 +24,15 @@ #define MAX_PENDING_REQS 64 #define BATCH_PER_DOMAIN 16 -static struct vm_struct *mmap_vma; -#define MMAP_PAGES_PER_SEGMENT \ - ((BLKIF_MAX_SEGMENTS_PER_REQUEST >> (PAGE_SHIFT-9)) + 1) +static unsigned long mmap_vstart; #define MMAP_PAGES_PER_REQUEST \ - (2 * BLKIF_MAX_SEGMENTS_PER_REQUEST * MMAP_PAGES_PER_SEGMENT) + (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) #define MMAP_PAGES \ (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) -#define MMAP_VADDR(_req,_seg) \ - ((unsigned long)mmap_vma->addr + \ +#define MMAP_VADDR(_req,_seg) \ + (mmap_vstart + \ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ - ((_seg) * MMAP_PAGES_PER_SEGMENT * PAGE_SIZE)) + ((_seg) * PAGE_SIZE)) /* * Each outstanding request that we've passed to the lower device layers has a @@ -259,11 +257,13 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req) prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW); for ( i = 0; i < req->nr_segments; i++ ) { - if ( (req->buffer_and_sects[i] & ~PAGE_MASK) != (PAGE_SIZE / 512) ) + /* Make sure the buffer is page-sized. */ + if ( (blkif_first_sect(req->frame_and_sects[i]) != 0) || + (blkif_last_sect(req->frame_and_sects[i]) != 7) ) goto bad_descriptor; rc = direct_remap_area_pages(&init_mm, MMAP_VADDR(pending_idx, i), - req->buffer_and_sects[i] & PAGE_MASK, + req->frame_and_sects[i] & PAGE_MASK, PAGE_SIZE, prot, blkif->domid); if ( rc != 0 ) goto bad_descriptor; @@ -288,15 +288,15 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); struct buffer_head *bh; int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ; - unsigned short nr_sects; - unsigned long buffer; + short nr_sects; + unsigned long buffer, fas; int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; pending_req_t *pending_req; pgprot_t prot; /* We map virtual scatter/gather segments to physical segments. */ int new_segs, nr_psegs = 0; - phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2]; + phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST + 1]; /* Check that number of segments is sane. */ if ( unlikely(req->nr_segments == 0) || @@ -314,17 +314,12 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) */ for ( i = tot_sects = 0; i < req->nr_segments; i++, tot_sects += nr_sects ) { - buffer = req->buffer_and_sects[i] & ~0x1FF; - nr_sects = req->buffer_and_sects[i] & 0x1FF; + fas = req->frame_and_sects[i]; + buffer = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9); + nr_sects = blkif_last_sect(fas) - blkif_first_sect(fas) + 1; - if ( unlikely(nr_sects == 0) ) - continue; - - if ( unlikely(nr_sects > BLKIF_MAX_SECTORS_PER_SEGMENT) ) - { - DPRINTK("Too many sectors in segment\n"); + if ( nr_sects <= 0 ) goto bad_descriptor; - } phys_seg[nr_psegs].dev = req->device; phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects; @@ -344,7 +339,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) } nr_psegs += new_segs; - ASSERT(nr_psegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST*2); + ASSERT(nr_psegs <= (BLKIF_MAX_SEGMENTS_PER_REQUEST+1)); } /* Nonsensical zero-sized request? */ @@ -358,13 +353,10 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) for ( i = 0; i < nr_psegs; i++ ) { - unsigned long sz = ((phys_seg[i].buffer & ~PAGE_MASK) + - (phys_seg[i].nr_sects << 9) + - (PAGE_SIZE - 1)) & PAGE_MASK; int rc = direct_remap_area_pages(&init_mm, MMAP_VADDR(pending_idx, i), phys_seg[i].buffer & PAGE_MASK, - sz, prot, blkif->domid); + PAGE_SIZE, prot, blkif->domid); if ( rc != 0 ) { DPRINTK("invalid buffer\n"); @@ -372,6 +364,8 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) MMAP_PAGES_PER_REQUEST * PAGE_SIZE); goto bad_descriptor; } + phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = + phys_seg[i].buffer >> PAGE_SHIFT; } pending_req = &pending_reqs[pending_idx]; @@ -399,6 +393,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) bh->b_rsector = (unsigned long)phys_seg[i].sector_number; bh->b_data = (char *)MMAP_VADDR(pending_idx, i) + (phys_seg[i].buffer & ~PAGE_MASK); +// bh->b_page = virt_to_page(MMAP_VADDR(pending_idx, i)); bh->b_end_io = end_block_io_op; bh->b_private = pending_req; @@ -456,13 +451,13 @@ static int __init init_module(void) { int i; + if ( !(start_info.flags & SIF_INITDOMAIN) ) + return 0; + blkif_interface_init(); - if ( (mmap_vma = get_vm_area(MMAP_PAGES * PAGE_SIZE, VM_IOREMAP)) == NULL ) - { - printk(KERN_WARNING "Could not allocate VMA for blkif backend.\n"); - return -ENOMEM; - } + if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) + BUG(); pending_cons = 0; pending_prod = MAX_PENDING_REQS; @@ -484,6 +479,7 @@ static int __init init_module(void) static void cleanup_module(void) { + BUG(); } module_init(init_module); diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c index 19b0b3015d..bb5b6ea743 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c @@ -47,7 +47,7 @@ void vbd_create(blkif_be_vbd_create_t *create) } } - if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_ATOMIC)) == NULL) ) + if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) ) { DPRINTK("vbd_create: out of memory\n"); create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; @@ -111,7 +111,7 @@ void vbd_grow(blkif_be_vbd_grow_t *grow) } if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t), - GFP_ATOMIC)) == NULL) ) + GFP_KERNEL)) == NULL) ) { DPRINTK("vbd_grow: out of memory\n"); grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h index 1938f68f8e..0a90744c59 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h @@ -26,19 +26,22 @@ */ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 -#define BLKIF_MAX_SECTORS_PER_SEGMENT 16 - typedef struct { u8 operation; /* BLKIF_OP_??? */ u8 nr_segments; /* number of segments */ blkif_vdev_t device; /* only for read/write requests */ unsigned long id; /* private guest value, echoed in resp */ blkif_sector_t sector_number; /* start sector idx on disk (r/w only) */ - /* Least 9 bits is 'nr_sects'. High 23 bits is the address. */ - /* We must have '0 <= nr_sects <= BLKIF_MAX_SECTORS_PER_SEGMENT'. */ - unsigned long buffer_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + /* @f_a_s[2:0]=last_sect ; @f_a_s[5:3]=first_sect ; @f_a_s[:12]=frame. */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + /* @frame: machine page frame number. */ + unsigned long frame_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST]; } blkif_request_t; +#define blkif_first_sect(_fas) (((_fas)>>3)&7) +#define blkif_last_sect(_fas) ((_fas)&7) + typedef struct { unsigned long id; /* copied from request */ u8 operation; /* copied from request */ @@ -79,8 +82,8 @@ typedef struct { * @device == unused (zero) * @id == any value (echoed in response message) * @sector_num == unused (zero) - * @buffer_and_sects == list of page-aligned, page-sized buffers. - * (i.e., nr_sects == 8). + * @frame_and_sects == list of page-sized buffers. + * (i.e., @first_sect == 0, @last_sect == 7). * * The response is a list of vdisk_t elements copied into the out-of-band * probe buffer. On success the response status field contains the number diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c index 29cc01d087..63f1aeea26 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c @@ -24,8 +24,6 @@ typedef unsigned char byte; /* from linux/ide.h */ static unsigned int blkif_state = BLKIF_STATE_CLOSED; static unsigned int blkif_evtchn, blkif_irq; -static struct tq_struct blkif_statechange_tq; - static int blkif_control_rsp_valid; static blkif_response_t blkif_control_rsp; @@ -302,11 +300,18 @@ static int blkif_queue_request(unsigned long id, struct gendisk *gd; blkif_request_t *req; struct buffer_head *bh; + unsigned int fsect, lsect; - if ( unlikely(nr_sectors >= (1<<9)) ) - BUG(); + fsect = (buffer_ma & ~PAGE_MASK) >> 9; + lsect = fsect + nr_sectors - 1; + + /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */ if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) BUG(); + if ( lsect > 7 ) + BUG(); + + buffer_ma &= PAGE_MASK; if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) return 1; @@ -341,8 +346,9 @@ static int blkif_queue_request(unsigned long id, bh = (struct buffer_head *)id; bh->b_reqnext = (struct buffer_head *)req->id; req->id = id; - req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors; - if ( ++req->nr_segments < MAX_BLK_SEGS ) + req->frame_and_sects[req->nr_segments] = + buffer_ma | (fsect<<3) | lsect; + if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST ) sg_next_sect += nr_sectors; else DISABLE_SCATTERGATHER(); @@ -371,7 +377,7 @@ static int blkif_queue_request(unsigned long id, req->sector_number = (blkif_sector_t)sector_number; req->device = device; req->nr_segments = 1; - req->buffer_and_sects[0] = buffer_ma | nr_sectors; + req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect; req_prod++; return 0; @@ -556,46 +562,11 @@ void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) } -static void blkif_bringup_phase1(void *unused) +static void blkif_status_change(blkif_fe_interface_status_changed_t *status) { ctrl_msg_t cmsg; blkif_fe_interface_connect_t up; - /* Move from CLOSED to DISCONNECTED state. */ - blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL); - blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; - blkif_state = BLKIF_STATE_DISCONNECTED; - - /* Construct an interface-CONNECT message for the domain controller. */ - cmsg.type = CMSG_BLKIF_FE; - cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT; - cmsg.length = sizeof(blkif_fe_interface_connect_t); - up.handle = 0; - up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT; - memcpy(cmsg.msg, &up, sizeof(up)); - - /* Tell the controller to bring up the interface. */ - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} - -static void blkif_bringup_phase2(void *unused) -{ - blkif_irq = bind_evtchn_to_irq(blkif_evtchn); - (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL); - - /* Probe for discs that are attached to the interface. */ - xlvbd_init(); - - blkif_state = BLKIF_STATE_CONNECTED; - - /* Kick pending requests. */ - spin_lock_irq(&io_request_lock); - kick_pending_request_queues(); - spin_unlock_irq(&io_request_lock); -} - -static void blkif_status_change(blkif_fe_interface_status_changed_t *status) -{ if ( status->handle != 0 ) { printk(KERN_WARNING "Status change on unsupported blkif %d\n", @@ -617,8 +588,22 @@ static void blkif_status_change(blkif_fe_interface_status_changed_t *status) " in state %d\n", blkif_state); break; } - blkif_statechange_tq.routine = blkif_bringup_phase1; - schedule_task(&blkif_statechange_tq); + + /* Move from CLOSED to DISCONNECTED state. */ + blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL); + blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; + blkif_state = BLKIF_STATE_DISCONNECTED; + + /* Construct an interface-CONNECT message for the domain controller. */ + cmsg.type = CMSG_BLKIF_FE; + cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT; + cmsg.length = sizeof(blkif_fe_interface_connect_t); + up.handle = 0; + up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT; + memcpy(cmsg.msg, &up, sizeof(up)); + + /* Tell the controller to bring up the interface. */ + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); break; case BLKIF_INTERFACE_STATUS_CONNECTED: @@ -628,9 +613,20 @@ static void blkif_status_change(blkif_fe_interface_status_changed_t *status) " in state %d\n", blkif_state); break; } + blkif_evtchn = status->evtchn; - blkif_statechange_tq.routine = blkif_bringup_phase2; - schedule_task(&blkif_statechange_tq); + blkif_irq = bind_evtchn_to_irq(blkif_evtchn); + (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL); + + /* Probe for discs that are attached to the interface. */ + xlvbd_init(); + + blkif_state = BLKIF_STATE_CONNECTED; + + /* Kick pending requests. */ + spin_lock_irq(&io_request_lock); + kick_pending_request_queues(); + spin_unlock_irq(&io_request_lock); break; default: @@ -675,7 +671,11 @@ int __init xlblk_init(void) ctrl_msg_t cmsg; blkif_fe_driver_status_changed_t st; - (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx); + if ( start_info.flags & SIF_INITDOMAIN ) + return 0; + + (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, + CALLBACK_IN_BLOCKING_CONTEXT); /* Send a driver-UP notification to the domain controller. */ cmsg.type = CMSG_BLKIF_FE; diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c index b26907192a..12ce976cb5 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c @@ -67,7 +67,7 @@ static int xlvbd_get_vbd_info(vdisk_t *disk_info) memset(&req, 0, sizeof(req)); req.operation = BLKIF_OP_PROBE; req.nr_segments = 1; - req.buffer_and_sects[0] = virt_to_machine(buf) | (PAGE_SIZE/512); + req.frame_and_sects[0] = virt_to_machine(buf) | 7; blkif_control_send(&req, &rsp); diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/console/console.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/console/console.c index e01896385b..244f309467 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/console/console.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/console/console.c @@ -513,7 +513,7 @@ static int __init xencons_init(void) } else { - (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx); + (void)ctrl_if_register_receiver(CMSG_CONSOLE, xencons_rx, 0); } printk("Xen virtual console successfully installed\n"); diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/control.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/control.c index e0e43ff2cc..cf1b075031 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/control.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/control.c @@ -10,8 +10,6 @@ static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) { - DPRINTK("Received netif backend message, subtype=%d\n", msg->subtype); - switch ( msg->subtype ) { case CMSG_NETIF_BE_CREATE: @@ -54,7 +52,8 @@ void netif_ctrlif_init(void) ctrl_msg_t cmsg; netif_be_driver_status_changed_t st; - (void)ctrl_if_register_receiver(CMSG_NETIF_BE, netif_ctrlif_rx); + (void)ctrl_if_register_receiver(CMSG_NETIF_BE, netif_ctrlif_rx, + CALLBACK_IN_BLOCKING_CONTEXT); /* Send a driver-UP notification to the domain controller. */ cmsg.type = CMSG_NETIF_BE; diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c index 8623d8214b..b6a9cff692 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/interface.c @@ -7,6 +7,7 @@ */ #include "common.h" +#include <linux/rtnetlink.h> #define NETIF_HASHSZ 1024 #define NETIF_HASH(_d,_h) \ @@ -14,6 +15,7 @@ static netif_t *netif_hash[NETIF_HASHSZ]; static struct net_device *bridge_dev; +static struct net_bridge *bridge_br; netif_t *netif_find_by_handle(domid_t domid, unsigned int handle) { @@ -36,8 +38,10 @@ void __netif_disconnect_complete(netif_t *netif) */ unbind_evtchn_from_irq(netif->evtchn); vfree(netif->tx); /* Frees netif->rx as well. */ - (void)br_del_if((struct net_bridge *)bridge_dev->priv, netif->dev); + rtnl_lock(); + (void)br_del_if(bridge_br, netif->dev); (void)dev_close(netif->dev); + rtnl_unlock(); /* Construct the deferred response message. */ cmsg.type = CMSG_NETIF_BE; @@ -73,7 +77,7 @@ void netif_create(netif_be_create_t *create) struct net_device *dev; netif_t **pnetif, *netif; - dev = alloc_netdev(sizeof(netif_t), "netif-be-%d", ether_setup); + dev = alloc_netdev(sizeof(netif_t), "nbe-if%d", ether_setup); if ( dev == NULL ) { DPRINTK("Could not create netif: out of memory\n"); @@ -111,7 +115,10 @@ void netif_create(netif_be_create_t *create) dev->hard_start_xmit = netif_be_start_xmit; dev->get_stats = netif_be_get_stats; memcpy(dev->dev_addr, create->mac, ETH_ALEN); - + + /* XXX In bridge mode we should force a different MAC from remote end. */ + dev->dev_addr[2] ^= 1; + if ( register_netdev(dev) != 0 ) { DPRINTK("Could not register new net device\n"); @@ -225,15 +232,27 @@ void netif_connect(netif_be_connect_t *connect) netif->status = CONNECTED; netif_get(netif); + rtnl_lock(); + (void)dev_open(netif->dev); - (void)br_add_if((struct net_bridge *)bridge_dev->priv, netif->dev); - /* At this point we try to ensure that eth0 is attached to the bridge. */ + (void)br_add_if(bridge_br, netif->dev); + + /* + * The default config is a very simple binding to eth0. + * If eth0 is being used as an IP interface by this OS then someone + * must add eth0's IP address to nbe-br, and change the routing table + * to refer to nbe-br instead of eth0. + */ + (void)dev_open(bridge_dev); if ( (eth0_dev = __dev_get_by_name("eth0")) != NULL ) { (void)dev_open(eth0_dev); - (void)br_add_if((struct net_bridge *)bridge_dev->priv, eth0_dev); + (void)br_add_if(bridge_br, eth0_dev); } - (void)request_irq(netif->irq, netif_be_int, 0, "netif-backend", netif); + + rtnl_unlock(); + + (void)request_irq(netif->irq, netif_be_int, 0, netif->dev->name, netif); netif_start_queue(netif->dev); connect->status = NETIF_BE_STATUS_OKAY; @@ -271,8 +290,11 @@ int netif_disconnect(netif_be_disconnect_t *disconnect, u8 rsp_id) void netif_interface_init(void) { memset(netif_hash, 0, sizeof(netif_hash)); - if ( br_add_bridge("netif-backend") != 0 ) + if ( br_add_bridge("nbe-br") != 0 ) BUG(); - bridge_dev = __dev_get_by_name("netif-be-bridge"); - (void)dev_open(bridge_dev); + bridge_dev = __dev_get_by_name("nbe-br"); + bridge_br = (struct net_bridge *)bridge_dev->priv; + bridge_br->bridge_hello_time = bridge_br->hello_time = 0; + bridge_br->bridge_forward_delay = bridge_br->forward_delay = 0; + bridge_br->stp_enabled = 0; } diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c index 5b84eba9bc..62a4adf27d 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c @@ -14,7 +14,7 @@ #include <asm/hypervisor-ifs/dom_mem_ops.h> static void net_tx_action(unsigned long unused); -static void tx_skb_release(struct sk_buff *skb); +static void netif_page_release(struct page *page); static void make_tx_response(netif_t *netif, u16 id, s8 st); @@ -30,13 +30,13 @@ static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0); #define tx_work_exists(_if) (1) #define MAX_PENDING_REQS 256 -unsigned long mmap_vstart; +static unsigned long mmap_vstart; #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE)) #define PKT_PROT_LEN (ETH_HLEN + 20) -/*static pending_req_t pending_reqs[MAX_PENDING_REQS];*/ static u16 pending_id[MAX_PENDING_REQS]; +static netif_t *pending_netif[MAX_PENDING_REQS]; static u16 pending_ring[MAX_PENDING_REQS]; static spinlock_t pend_prod_lock = SPIN_LOCK_UNLOCKED; typedef unsigned int PEND_RING_IDX; @@ -60,8 +60,7 @@ static void __refresh_mfn_list(void) op.u.increase.pages = mfn_list; if ( (ret = HYPERVISOR_dom_mem_op(&op)) != MAX_MFN_ALLOC ) { - printk(KERN_WARNING "Unable to increase memory reservation (%d)\n", - ret); + printk(KERN_ALERT "Unable to increase memory reservation (%d)\n", ret); BUG(); } alloc_index = MAX_MFN_ALLOC; @@ -100,10 +99,10 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) { netif_t *netif = (netif_t *)dev->priv; s8 status = NETIF_RSP_OKAY; - u16 size, id; + u16 size=0, id; mmu_update_t mmu[6]; pgd_t *pgd; pmd_t *pmd; pte_t *pte; - unsigned long vdata, new_mfn; + unsigned long vdata, mdata=0, new_mfn; /* Drop the packet if the target domain has no receive buffers. */ if ( (netif->rx_req_cons == netif->rx->req_prod) || @@ -126,16 +125,23 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) (((unsigned long)skb->end ^ (unsigned long)skb->head) & PAGE_MASK) || ((skb->end - skb->head) < (PAGE_SIZE/2)) ) { - struct sk_buff *nskb = dev_alloc_skb(PAGE_SIZE-1024); + struct sk_buff *nskb = alloc_skb(PAGE_SIZE-1024, GFP_ATOMIC); int hlen = skb->data - skb->head; + if ( unlikely(nskb == NULL) ) + { + DPRINTK("DOM%llu couldn't get memory for skb.\n", netif->domid); + status = NETIF_RSP_ERROR; + goto out; + } skb_reserve(nskb, hlen); - skb_put(nskb, skb->len); + __skb_put(nskb, skb->len); (void)skb_copy_bits(skb, -hlen, nskb->head, hlen + skb->len); dev_kfree_skb(skb); skb = nskb; } vdata = (unsigned long)skb->data; + mdata = virt_to_machine(vdata); size = skb->tail - skb->data; new_mfn = get_new_mfn(); @@ -153,7 +159,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) mmu[1].ptr |= MMU_EXTENDED_COMMAND; mmu[1].val |= MMUEXT_SET_SUBJECTDOM_H; - mmu[2].ptr = virt_to_machine(vdata & PAGE_MASK) | MMU_EXTENDED_COMMAND; + mmu[2].ptr = (mdata & PAGE_MASK) | MMU_EXTENDED_COMMAND; mmu[2].val = MMUEXT_REASSIGN_PAGE; mmu[3].ptr = MMU_EXTENDED_COMMAND; @@ -167,6 +173,7 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) if ( unlikely(HYPERVISOR_mmu_update(mmu, 6) < 0) ) { + DPRINTK("Failed MMU update transferring to DOM%llu\n", netif->domid); dealloc_mfn(new_mfn); status = NETIF_RSP_ERROR; goto out; @@ -174,12 +181,12 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) phys_to_machine_mapping[__pa(vdata) >> PAGE_SHIFT] = new_mfn; - netif->stats.tx_bytes += size; - netif->stats.tx_packets++; + netif->stats.rx_bytes += size; + netif->stats.rx_packets++; out: spin_lock(&netif->rx_lock); - make_rx_response(netif, id, status, virt_to_machine(vdata), size); + make_rx_response(netif, id, status, mdata, size); spin_unlock(&netif->rx_lock); dev_kfree_skb(skb); return 0; @@ -220,6 +227,16 @@ static void add_to_net_schedule_list_tail(netif_t *netif) spin_unlock(&net_schedule_list_lock); } +static inline void netif_schedule_work(netif_t *netif) +{ + if ( (netif->tx_req_cons != netif->tx->req_prod) && + ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) ) + { + add_to_net_schedule_list_tail(netif); + maybe_schedule_tx_action(); + } +} + void netif_deschedule(netif_t *netif) { remove_from_net_schedule_list(netif); @@ -229,14 +246,8 @@ void netif_deschedule(netif_t *netif) static void tx_credit_callback(unsigned long data) { netif_t *netif = (netif_t *)data; - netif->remaining_credit = netif->credit_bytes; - - if ( tx_work_exists(netif) ) - { - add_to_net_schedule_list_tail(netif); - maybe_schedule_tx_action(); - } + netif_schedule_work(netif); } #endif @@ -249,6 +260,7 @@ static void net_tx_action(unsigned long unused) u16 pending_idx; NETIF_RING_IDX i; pgprot_t prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED); + struct page *page; while ( (NR_PENDING_REQS < MAX_PENDING_REQS) && !list_empty(&net_schedule_list) ) @@ -261,7 +273,7 @@ static void net_tx_action(unsigned long unused) /* Work to do? */ i = netif->tx_req_cons; - if ( (i == netif->tx->req_prod) && + if ( (i == netif->tx->req_prod) || ((i-netif->tx_resp_prod) == NETIF_TX_RING_SIZE) ) { netif_put(netif); @@ -296,7 +308,7 @@ static void net_tx_action(unsigned long unused) netif->remaining_credit -= tx.size; #endif - add_to_net_schedule_list_tail(netif); + netif_schedule_work(netif); if ( unlikely(txreq.size <= PKT_PROT_LEN) || unlikely(txreq.size > ETH_FRAME_LEN) ) @@ -335,6 +347,7 @@ static void net_tx_action(unsigned long unused) if ( unlikely((skb = alloc_skb(PKT_PROT_LEN, GFP_ATOMIC)) == NULL) ) { + DPRINTK("Can't allocate a skb in start_xmit.\n"); make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); netif_put(netif); vmfree_area_pages(MMAP_VADDR(pending_idx), PAGE_SIZE); @@ -346,29 +359,29 @@ static void net_tx_action(unsigned long unused) (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)), PKT_PROT_LEN); - skb->dev = netif->dev; - skb->protocol = eth_type_trans(skb, skb->dev); - + page = virt_to_page(MMAP_VADDR(pending_idx)); + /* Append the packet payload as a fragment. */ - skb_shinfo(skb)->frags[0].page = - virt_to_page(MMAP_VADDR(pending_idx)); - skb_shinfo(skb)->frags[0].size = - txreq.size - PKT_PROT_LEN; + skb_shinfo(skb)->frags[0].page = page; + skb_shinfo(skb)->frags[0].size = txreq.size - PKT_PROT_LEN; skb_shinfo(skb)->frags[0].page_offset = (txreq.addr + PKT_PROT_LEN) & ~PAGE_MASK; skb_shinfo(skb)->nr_frags = 1; skb->data_len = txreq.size - PKT_PROT_LEN; skb->len += skb->data_len; + skb->dev = netif->dev; + skb->protocol = eth_type_trans(skb, skb->dev); + /* Destructor information. */ - skb->destructor = tx_skb_release; - skb_shinfo(skb)->frags[MAX_SKB_FRAGS-1].page = (struct page *)netif; - skb_shinfo(skb)->frags[MAX_SKB_FRAGS-1].size = pending_idx; + atomic_set(&page->count, 1); + page->mapping = (struct address_space *)netif_page_release; + pending_id[pending_idx] = txreq.id; + pending_netif[pending_idx] = netif; - netif->stats.rx_bytes += txreq.size; - netif->stats.rx_packets++; + netif->stats.tx_bytes += txreq.size; + netif->stats.tx_packets++; - pending_id[pending_idx] = txreq.id; pending_cons++; netif_rx(skb); @@ -376,28 +389,34 @@ static void net_tx_action(unsigned long unused) } } -/* Destructor function for tx skbs. */ -static void tx_skb_release(struct sk_buff *skb) +static void netif_page_release(struct page *page) { unsigned long flags; - netif_t *netif = (netif_t *)skb_shinfo(skb)->frags[MAX_SKB_FRAGS-1].page; - u16 pending_idx = skb_shinfo(skb)->frags[MAX_SKB_FRAGS-1].size; + netif_t *netif; + u16 pending_idx; + + pending_idx = page - virt_to_page(mmap_vstart); + + netif = pending_netif[pending_idx]; vmfree_area_pages(MMAP_VADDR(pending_idx), PAGE_SIZE); - - skb_shinfo(skb)->nr_frags = 0; - + spin_lock(&netif->tx_lock); make_tx_response(netif, pending_id[pending_idx], NETIF_RSP_OKAY); spin_unlock(&netif->tx_lock); - + + /* + * Scheduling checks must happen after the above response is posted. + * This avoids a possible race with a guest OS on another CPU. + */ + mb(); + netif_schedule_work(netif); + netif_put(netif); spin_lock_irqsave(&pend_prod_lock, flags); pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; spin_unlock_irqrestore(&pend_prod_lock, flags); - - maybe_schedule_tx_action(); } #if 0 @@ -493,9 +512,26 @@ static void make_rx_response(netif_t *netif, static int __init init_module(void) { + int i; + + if ( !(start_info.flags & SIF_INITDOMAIN) ) + return 0; + netif_interface_init(); - mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS); + + if ( (mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS)) == 0 ) + BUG(); + + pending_cons = 0; + pending_prod = MAX_PENDING_REQS; + for ( i = 0; i < MAX_PENDING_REQS; i++ ) + pending_ring[i] = i; + + spin_lock_init(&net_schedule_list_lock); + INIT_LIST_HEAD(&net_schedule_list); + netif_ctrlif_init(); + return 0; } diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c index af8e660b7c..cc5ac31e82 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/frontend/main.c @@ -25,20 +25,18 @@ #include <net/sock.h> #include <net/pkt_sched.h> -#include "../netif.h" +#include <asm/evtchn.h> +#include <asm/ctrl_if.h> +#include <asm/hypervisor-ifs/dom_mem_ops.h> -static struct tq_struct netif_statechange_tq; +#include "../netif.h" #define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ -static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs); static void network_tx_buf_gc(struct net_device *dev); static void network_alloc_rx_buffers(struct net_device *dev); static void cleanup_module(void); -/* Dynamically-mapped IRQs. */ -static int network_irq, debug_irq; - static struct list_head dev_list; struct net_private @@ -47,7 +45,7 @@ struct net_private struct net_device *dev; struct net_device_stats stats; - NET_RING_IDX rx_resp_cons, tx_resp_cons; + NETIF_RING_IDX rx_resp_cons, tx_resp_cons; unsigned int tx_full; netif_tx_interface_t *tx; @@ -69,8 +67,8 @@ struct net_private * {tx,rx}_skbs store outstanding skbuffs. The first entry in each * array is an index into a chain of free entries. */ - struct sk_buff *tx_skbs[XENNET_TX_RING_SIZE+1]; - struct sk_buff *rx_skbs[XENNET_RX_RING_SIZE+1]; + struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1]; + struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1]; }; /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ @@ -91,7 +89,7 @@ static struct net_device *find_dev_by_handle(unsigned int handle) { np = list_entry(ent, struct net_private, list); if ( np->handle == handle ) - return np; + return np->dev; } return NULL; } @@ -100,8 +98,7 @@ static struct net_device *find_dev_by_handle(unsigned int handle) static int network_open(struct net_device *dev) { struct net_private *np = dev->priv; - netop_t netop; - int i, ret; + int i; if ( np->state != NETIF_STATE_CONNECTED ) return -EINVAL; @@ -111,15 +108,16 @@ static int network_open(struct net_device *dev) spin_lock_init(&np->tx_lock); /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ - for ( i = 0; i <= XENNET_TX_RING_SIZE; i++ ) + for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ ) np->tx_skbs[i] = (void *)(i+1); - for ( i = 0; i <= XENNET_RX_RING_SIZE; i++ ) + for ( i = 0; i <= NETIF_RX_RING_SIZE; i++ ) np->rx_skbs[i] = (void *)(i+1); wmb(); np->state = NETIF_STATE_ACTIVE; network_alloc_rx_buffers(dev); + np->rx->event = np->rx_resp_cons + 1; netif_start_queue(dev); @@ -131,18 +129,17 @@ static int network_open(struct net_device *dev) static void network_tx_buf_gc(struct net_device *dev) { - NET_RING_IDX i, prod; + NETIF_RING_IDX i, prod; unsigned short id; struct net_private *np = dev->priv; struct sk_buff *skb; - tx_entry_t *tx_ring = np->net_ring->tx_ring; do { - prod = np->net_idx->tx_resp_prod; + prod = np->tx->resp_prod; for ( i = np->tx_resp_cons; i != prod; i++ ) { - id = tx_ring[MASK_NET_TX_IDX(i)].resp.id; + id = np->tx->ring[MASK_NET_TX_IDX(i)].resp.id; skb = np->tx_skbs[id]; ADD_ID_TO_FREELIST(np->tx_skbs, id); dev_kfree_skb_any(skb); @@ -158,14 +155,14 @@ static void network_tx_buf_gc(struct net_device *dev) * in such cases notification from Xen is likely to be the only kick * that we'll get. */ - np->net_idx->tx_event = - prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1; + np->tx->event = + prod + ((np->tx->req_prod - prod) >> 1) + 1; mb(); } - while ( prod != np->net_idx->tx_resp_prod ); + while ( prod != np->tx->resp_prod ); if ( np->tx_full && - ((np->net_idx->tx_req_prod - prod) < XENNET_TX_RING_SIZE) ) + ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE) ) { np->tx_full = 0; if ( np->state == NETIF_STATE_ACTIVE ) @@ -189,10 +186,14 @@ static void network_alloc_rx_buffers(struct net_device *dev) unsigned short id; struct net_private *np = dev->priv; struct sk_buff *skb; - netop_t netop; - NET_RING_IDX i = np->net_idx->rx_req_prod; - - if ( unlikely((i - np->rx_resp_cons) == XENNET_RX_RING_SIZE) || + NETIF_RING_IDX i = np->rx->req_prod; + dom_mem_op_t op; + unsigned long pfn_array[NETIF_RX_RING_SIZE]; + int ret, nr_pfns = 0; + pte_t *pte; + + /* Make sure the batch is large enough to be worthwhile (1/2 ring). */ + if ( unlikely((i - np->rx_resp_cons) > (NETIF_RX_RING_SIZE/2)) || unlikely(np->state != NETIF_STATE_ACTIVE) ) return; @@ -209,13 +210,13 @@ static void network_alloc_rx_buffers(struct net_device *dev) id = GET_ID_FROM_FREELIST(np->rx_skbs); np->rx_skbs[id] = skb; - np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id = id; - np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = - virt_to_machine(get_ppte(skb->head)); - - np->rx_bufs_to_notify++; + np->rx->ring[MASK_NET_RX_IDX(i)].req.id = id; + + pte = get_ppte(skb->head); + pfn_array[nr_pfns++] = pte->pte_low >> PAGE_SHIFT; + queue_l1_entry_update(pte, 0); } - while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE ); + while ( (++i - np->rx_resp_cons) != NETIF_RX_RING_SIZE ); /* * We may have allocated buffers which have entries outstanding in the page @@ -223,17 +224,16 @@ static void network_alloc_rx_buffers(struct net_device *dev) */ flush_page_update_queue(); - np->net_idx->rx_req_prod = i; - np->net_idx->rx_event = np->rx_resp_cons + 1; - - /* Batch Xen notifications. */ - if ( np->rx_bufs_to_notify > (XENNET_RX_RING_SIZE/4) ) + op.op = MEMOP_RESERVATION_DECREASE; + op.u.decrease.size = nr_pfns; + op.u.decrease.pages = pfn_array; + if ( (ret = HYPERVISOR_dom_mem_op(&op)) != nr_pfns ) { - netop.cmd = NETOP_PUSH_BUFFERS; - netop.vif = np->idx; - (void)HYPERVISOR_net_io_op(&netop); - np->rx_bufs_to_notify = 0; + printk(KERN_WARNING "Unable to reduce memory reservation (%d)\n", ret); + BUG(); } + + np->rx->req_prod = i; } @@ -241,9 +241,8 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned short id; struct net_private *np = (struct net_private *)dev->priv; - tx_req_entry_t *tx; - netop_t netop; - NET_RING_IDX i; + netif_tx_request_t *tx; + NETIF_RING_IDX i; if ( unlikely(np->tx_full) ) { @@ -262,27 +261,27 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) memcpy(new_skb->data, skb->data, skb->len); dev_kfree_skb(skb); skb = new_skb; - } + } spin_lock_irq(&np->tx_lock); - i = np->net_idx->tx_req_prod; + i = np->tx->req_prod; id = GET_ID_FROM_FREELIST(np->tx_skbs); np->tx_skbs[id] = skb; - tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req; + tx = &np->tx->ring[MASK_NET_TX_IDX(i)].req; tx->id = id; - tx->addr = phys_to_machine(virt_to_phys(skb->data)); + tx->addr = virt_to_machine(skb->data); tx->size = skb->len; wmb(); - np->net_idx->tx_req_prod = i + 1; + np->tx->req_prod = i + 1; network_tx_buf_gc(dev); - if ( (i - np->tx_resp_cons) == (XENNET_TX_RING_SIZE - 1) ) + if ( (i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1) ) { np->tx_full = 1; netif_stop_queue(dev); @@ -295,12 +294,8 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Only notify Xen if there are no outstanding responses. */ mb(); - if ( np->net_idx->tx_resp_prod == i ) - { - netop.cmd = NETOP_PUSH_BUFFERS; - netop.vif = np->idx; - (void)HYPERVISOR_net_io_op(&netop); - } + if ( np->tx->resp_prod == i ) + notify_via_evtchn(np->evtchn); return 0; } @@ -312,22 +307,24 @@ static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs) struct net_private *np = dev->priv; unsigned long flags; struct sk_buff *skb; - rx_resp_entry_t *rx; - NET_RING_IDX i; + netif_rx_response_t *rx; + NETIF_RING_IDX i; + mmu_update_t mmu[2]; + pte_t *pte; spin_lock_irqsave(&np->tx_lock, flags); network_tx_buf_gc(dev); spin_unlock_irqrestore(&np->tx_lock, flags); again: - for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ ) + for ( i = np->rx_resp_cons; i != np->rx->resp_prod; i++ ) { - rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp; + rx = &np->rx->ring[MASK_NET_RX_IDX(i)].resp; skb = np->rx_skbs[rx->id]; ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); - if ( unlikely(rx->status != RING_STATUS_OK) ) + if ( unlikely(rx->status <= 0) ) { /* Gate this error. We get a (valid) slew of them on suspend. */ if ( np->state == NETIF_STATE_ACTIVE ) @@ -336,6 +333,17 @@ static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs) continue; } + /* Remap the page. */ + pte = get_ppte(skb->head); + mmu[0].ptr = virt_to_machine(pte); + mmu[0].val = (rx->addr & PAGE_MASK) | __PAGE_KERNEL; + mmu[1].ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE; + mmu[1].val = __pa(skb->head) >> PAGE_SHIFT; + if ( HYPERVISOR_mmu_update(mmu, 2) != 0 ) + BUG(); + phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = + rx->addr >> PAGE_SHIFT; + /* * Set up shinfo -- from alloc_skb This was particularily nasty: the * shared info is hidden at the back of the data area (presumably so it @@ -348,13 +356,13 @@ static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs) phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; - skb->data = skb->tail = skb->head + rx->offset; - skb_put(skb, rx->size); + skb->data = skb->tail = skb->head + (rx->addr & ~PAGE_MASK); + skb_put(skb, rx->status); skb->protocol = eth_type_trans(skb, dev); np->stats.rx_packets++; - np->stats.rx_bytes += rx->size; + np->stats.rx_bytes += rx->status; netif_rx(skb); dev->last_rx = jiffies; } @@ -362,10 +370,11 @@ static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs) np->rx_resp_cons = i; network_alloc_rx_buffers(dev); + np->rx->event = np->rx_resp_cons + 1; /* Deal with hypervisor racing our resetting of rx_event. */ mb(); - if ( np->net_idx->rx_resp_prod != i ) + if ( np->rx->resp_prod != i ) goto again; } @@ -373,16 +382,11 @@ static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs) static int network_close(struct net_device *dev) { struct net_private *np = dev->priv; - netop_t netop; netif_stop_queue(np->dev); - netop.cmd = NETOP_FLUSH_BUFFERS; - netop.vif = np->idx; - (void)HYPERVISOR_net_io_op(&netop); - - while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) || - (np->tx_resp_cons != np->net_idx->tx_req_prod) ) + while ( (np->rx_resp_cons != np->rx->req_prod) || + (np->tx_resp_cons != np->tx->req_prod) ) { barrier(); current->state = TASK_INTERRUPTIBLE; @@ -406,55 +410,12 @@ static struct net_device_stats *network_get_stats(struct net_device *dev) } -static void netif_bringup_phase1(void *unused) +static void netif_status_change(netif_fe_interface_status_changed_t *status) { ctrl_msg_t cmsg; netif_fe_interface_connect_t up; struct net_device *dev; struct net_private *np; - - dev = find_dev_by_handle(0); - np = dev->priv; - - /* Move from CLOSED to DISCONNECTED state. */ - np->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL); - np->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL); - memset(np->tx, 0, PAGE_SIZE); - memset(np->rx, 0, PAGE_SIZE); - np->state = NETIF_STATE_DISCONNECTED; - - /* Construct an interface-CONNECT message for the domain controller. */ - cmsg.type = CMSG_NETIF_FE; - cmsg.subtype = CMSG_NETIF_FE_INTERFACE_CONNECT; - cmsg.length = sizeof(netif_fe_interface_connect_t); - up.handle = 0; - up.tx_shmem_frame = virt_to_machine(np->tx) >> PAGE_SHIFT; - up.rx_shmem_frame = virt_to_machine(np->rx) >> PAGE_SHIFT; - memcpy(cmsg.msg, &up, sizeof(up)); - - /* Tell the controller to bring up the interface. */ - ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); -} - -static void netif_bringup_phase2(void *unused) -{ - struct net_device *dev; - struct net_private *np; - - dev = find_dev_by_handle(0); - np = dev->priv; - - np->irq = bind_evtchn_to_irq(np->evtchn); - (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, - "netif", dev); - - np->state = NETIF_STATE_CONNECTED; -} - -static void netif_status_change(netif_fe_interface_status_changed_t *status) -{ - struct net_device *dev; - struct net_private *np; if ( status->handle != 0 ) { @@ -470,31 +431,53 @@ static void netif_status_change(netif_fe_interface_status_changed_t *status) { case NETIF_INTERFACE_STATUS_DESTROYED: printk(KERN_WARNING "Unexpected netif-DESTROYED message in state %d\n", - netif_state); + np->state); break; case NETIF_INTERFACE_STATUS_DISCONNECTED: if ( np->state != NETIF_STATE_CLOSED ) { printk(KERN_WARNING "Unexpected netif-DISCONNECTED message" - " in state %d\n", netif_state); + " in state %d\n", np->state); break; } - netif_statechange_tq.routine = netif_bringup_phase1; - schedule_task(&netif_statechange_tq); + + /* Move from CLOSED to DISCONNECTED state. */ + np->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL); + np->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL); + memset(np->tx, 0, PAGE_SIZE); + memset(np->rx, 0, PAGE_SIZE); + np->state = NETIF_STATE_DISCONNECTED; + + /* Construct an interface-CONNECT message for the domain controller. */ + cmsg.type = CMSG_NETIF_FE; + cmsg.subtype = CMSG_NETIF_FE_INTERFACE_CONNECT; + cmsg.length = sizeof(netif_fe_interface_connect_t); + up.handle = 0; + up.tx_shmem_frame = virt_to_machine(np->tx) >> PAGE_SHIFT; + up.rx_shmem_frame = virt_to_machine(np->rx) >> PAGE_SHIFT; + memcpy(cmsg.msg, &up, sizeof(up)); + + /* Tell the controller to bring up the interface. */ + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); break; case NETIF_INTERFACE_STATUS_CONNECTED: if ( np->state == NETIF_STATE_CLOSED ) { printk(KERN_WARNING "Unexpected netif-CONNECTED message" - " in state %d\n", netif_state); + " in state %d\n", np->state); break; } - np->evtchn = status->evtchn; + memcpy(dev->dev_addr, status->mac, ETH_ALEN); - netif_statechange_tq.routine = netif_bringup_phase2; - schedule_task(&netif_statechange_tq); + + np->evtchn = status->evtchn; + np->irq = bind_evtchn_to_irq(np->evtchn); + (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, + dev->name, dev); + + np->state = NETIF_STATE_CONNECTED; break; default: @@ -532,10 +515,13 @@ static int __init init_module(void) { ctrl_msg_t cmsg; netif_fe_driver_status_changed_t st; - int i, err; + int err; struct net_device *dev; struct net_private *np; + if ( start_info.flags & SIF_INITDOMAIN ) + return 0; + INIT_LIST_HEAD(&dev_list); if ( (dev = alloc_etherdev(sizeof(struct net_private))) == NULL ) @@ -562,7 +548,8 @@ static int __init init_module(void) np->dev = dev; list_add(&np->list, &dev_list); - (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx); + (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx, + CALLBACK_IN_BLOCKING_CONTEXT); /* Send a driver-UP notification to the domain controller. */ cmsg.type = CMSG_NETIF_FE; diff --git a/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c b/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c index 715f707eb0..19cb9a3326 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c @@ -33,8 +33,19 @@ static struct irqaction ctrl_if_irq_action; static CONTROL_RING_IDX ctrl_if_tx_resp_cons; static CONTROL_RING_IDX ctrl_if_rx_req_cons; -/* Incoming message requests: primary message type -> message handler. */ +/* Incoming message requests. */ + /* Primary message type -> message handler. */ static ctrl_msg_handler_t ctrl_if_rxmsg_handler[256]; + /* Primary message type -> callback in process context? */ +static unsigned long ctrl_if_rxmsg_blocking_context[256/sizeof(unsigned long)]; + /* Is it late enough during bootstrap to use schedule_task()? */ +static int safe_to_schedule_task; + /* Passed to schedule_task(). */ +static struct tq_struct ctrl_if_rxmsg_deferred_tq; + /* Queue up messages to be handled in process context. */ +static ctrl_msg_t ctrl_if_rxmsg_deferred[CONTROL_RING_SIZE]; +static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_prod; +static CONTROL_RING_IDX ctrl_if_rxmsg_deferred_cons; /* Incoming message responses: message identifier -> message handler/id. */ static struct { @@ -99,22 +110,40 @@ static void __ctrl_if_tx_tasklet(unsigned long data) } } +static void __ctrl_if_rxmsg_deferred(void *unused) +{ + ctrl_msg_t *msg; + + while ( ctrl_if_rxmsg_deferred_cons != ctrl_if_rxmsg_deferred_prod ) + { + msg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX( + ctrl_if_rxmsg_deferred_cons++)]; + (*ctrl_if_rxmsg_handler[msg->type])(msg, 0); + } +} + static void __ctrl_if_rx_tasklet(unsigned long data) { control_if_t *ctrl_if = get_ctrl_if(); - ctrl_msg_t *msg; + ctrl_msg_t msg, *pmsg; while ( ctrl_if_rx_req_cons != ctrl_if->rx_req_prod ) { - /* - * We need no locking or barriers here. There will be one and only one - * response as a result of each callback, so the callback handler - * doesn't need to worry about the 'msg' being overwritten until: - * 1. It returns (if the message must persist then it must be copied). - * 2. A response is sent (the response may overwrite the request). - */ - msg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if_rx_req_cons++)]; - (*ctrl_if_rxmsg_handler[msg->type])(msg, 0); + pmsg = &ctrl_if->rx_ring[MASK_CONTROL_IDX(ctrl_if_rx_req_cons++)]; + memcpy(&msg, pmsg, offsetof(ctrl_msg_t, msg)); + if ( msg.length != 0 ) + memcpy(msg.msg, pmsg->msg, msg.length); + if ( test_bit(msg.type, &ctrl_if_rxmsg_blocking_context) ) + { + pmsg = &ctrl_if_rxmsg_deferred[MASK_CONTROL_IDX( + ctrl_if_rxmsg_deferred_prod++)]; + memcpy(pmsg, &msg, offsetof(ctrl_msg_t, msg) + msg.length); + schedule_task(&ctrl_if_rxmsg_deferred_tq); + } + else + { + (*ctrl_if_rxmsg_handler[msg.type])(&msg, 0); + } } } @@ -243,22 +272,36 @@ void ctrl_if_send_response(ctrl_msg_t *msg) ctrl_if_notify_controller(); } -int ctrl_if_register_receiver(u8 type, ctrl_msg_handler_t hnd) +int ctrl_if_register_receiver( + u8 type, + ctrl_msg_handler_t hnd, + unsigned int flags) { - unsigned long flags; + unsigned long _flags; int inuse; - spin_lock_irqsave(&ctrl_if_lock, flags); + spin_lock_irqsave(&ctrl_if_lock, _flags); inuse = (ctrl_if_rxmsg_handler[type] != ctrl_if_rxmsg_default_handler); if ( inuse ) + { printk(KERN_INFO "Receiver %p already established for control " "messages of type %d.\n", ctrl_if_rxmsg_handler[type], type); + } else + { ctrl_if_rxmsg_handler[type] = hnd; + clear_bit(type, &ctrl_if_rxmsg_blocking_context); + if ( flags == CALLBACK_IN_BLOCKING_CONTEXT ) + { + set_bit(type, &ctrl_if_rxmsg_blocking_context); + if ( !safe_to_schedule_task ) + BUG(); + } + } - spin_unlock_irqrestore(&ctrl_if_lock, flags); + spin_unlock_irqrestore(&ctrl_if_lock, _flags); return !inuse; } @@ -326,6 +369,7 @@ void __init ctrl_if_init(void) for ( i = 0; i < 256; i++ ) ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler; + ctrl_if_rxmsg_deferred_tq.routine = __ctrl_if_rxmsg_deferred; spin_lock_init(&ctrl_if_lock); @@ -333,6 +377,15 @@ void __init ctrl_if_init(void) } +/* This is called after it is safe to call schedule_task(). */ +static int __init ctrl_if_late_setup(void) +{ + safe_to_schedule_task = 1; + return 0; +} +__initcall(ctrl_if_late_setup); + + /* * !! The following are DANGEROUS FUNCTIONS !! * Use with care [for example, see xencons_force_flush()]. |