diff options
author | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2003-04-17 17:12:24 +0000 |
---|---|---|
committer | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2003-04-17 17:12:24 +0000 |
commit | afb32424f491d60cad70f27f7b79e8828ea9238b (patch) | |
tree | 1d3ef06de63ea8a9404a4284add85f0675574cd6 | |
parent | f967810e1518a302a01a84e0719e3628202156c2 (diff) | |
parent | 2ccb44d10f5c07368a4191d59a7f413d20872acc (diff) | |
download | xen-afb32424f491d60cad70f27f7b79e8828ea9238b.tar.gz xen-afb32424f491d60cad70f27f7b79e8828ea9238b.tar.bz2 xen-afb32424f491d60cad70f27f7b79e8828ea9238b.zip |
bitkeeper revision 1.181 (3e9ee078FteSsgdMh0SKyoaSZBrrhg)
Merge scramble.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into scramble.cl.cam.ac.uk:/local/scratch/kaf24/xeno
-rw-r--r-- | xen/TODO | 48 | ||||
-rw-r--r-- | xen/common/network.c | 31 | ||||
-rw-r--r-- | xen/include/hypervisor-ifs/network.h | 80 | ||||
-rw-r--r-- | xen/include/xeno/sched.h | 2 | ||||
-rw-r--r-- | xen/include/xeno/vif.h | 50 | ||||
-rw-r--r-- | xen/net/dev.c | 388 | ||||
-rw-r--r-- | xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c | 132 |
7 files changed, 368 insertions, 363 deletions
@@ -7,20 +7,7 @@ longer-term goals. -- Keir (16/3/03) -1. FIX HANDLING OF NETWORK RINGS --------------------------------- -Handling of the transmit rings is currently very broken (for example, -sending an inter-domain packet will wedge the hypervisor). This is -because we may handle packets out of order (eg. inter-domain packets -are handled eagerly, while packets for real interfaces are queued), -but our current ring design really assumes in-order handling. - -A neat fix will be to allow responses to be queued in a different -order to requests, just as we already do with block-device -rings. We'll need to add an opaque identifier to ring entries, -allowing matching of requests and responses, but that's about it. - -2. ACCURATE TIMERS AND WALL-CLOCK TIME +1. ACCURATE TIMERS AND WALL-CLOCK TIME -------------------------------------- Currently our long-term timebase free runs on CPU0, with no external calibration. We should run ntpd on domain 0 and allow this to warp @@ -28,7 +15,7 @@ Xen's timebase. Once this is done, we can have a timebase per CPU and not worry about relative drift (since they'll all get sync'ed periodically by ntp). -3. ASSIGNING DOMAINS TO PROCESSORS +2. ASSIGNING DOMAINS TO PROCESSORS ---------------------------------- More intelligent assignment of domains to processors. In particular, we don't play well with hyperthreading: we will assign @@ -40,17 +27,17 @@ relationships between processors in the system (eg. which ones are siblings in the same package). We then use this to balance domains across packages, and across virtual processors within a package. -4. PROPER DESTRUCTION OF DOMAINS --------------------------------- -Currently we do not free resources when destroying a domain. This is -because they may be tied up in subsystems, and there is no way of -pulling them back in a safe manner. +3. DOMAIN 0 MANAGEMENT DAEMON +----------------------------- +A better control daemon is required for domain 0, which keeps proper +track of machine resources and can make sensible policy choices. This +may require support in Xen; for example, notifications (eg. DOMn is +killed), and requests (eg. can DOMn allocate x frames of memory?). -The fix is probably to reference count resources and automatically -free them when the count reaches zero. We may get away with one count -per domain (for all its resources). When this reaches zero we know it -is safe to free everything: block-device rings, network rings, and all -the rest. +4. SANE NETWORK ROUTING +----------------------- +The current virtual firewall/router is completely broken. Needs a new +design and implementation! 5. NETWORK CHECKSUM OFFLOAD --------------------------- @@ -60,14 +47,7 @@ indicate, on transmit, which packets need the checksum added and, on receive, which packets have been checked out as okay. We can steal Linux's interface, which is entirely sane given NIC limitations. -6. DOMAIN 0 MANAGEMENT DAEMON ------------------------------ -A better control daemon is required for domain 0, which keeps proper -track of machine resources and can make sensible policy choices. This -may require support in Xen; for example, notifications (eg. DOMn is -killed), and requests (eg. can DOMn allocate x frames of memory?). - -7. MODULE SUPPORT FOR XEN +6. MODULE SUPPORT FOR XEN ------------------------- Network and blkdev drivers are bloating Xen. At some point we want to build drivers as modules, stick them in a cheesy ramfs, then relocate @@ -79,7 +59,7 @@ which drivers to load. Most of the hard stuff (relocating and the like) is done for us by Linux's module system. -8. NEW DESIGN FEATURES +7. NEW DESIGN FEATURES ---------------------- This includes the last-chance page cache, and the unified buffer cache. diff --git a/xen/common/network.c b/xen/common/network.c index 9e1bf7a00b..2cdf11a9a5 100644 --- a/xen/common/network.c +++ b/xen/common/network.c @@ -5,7 +5,7 @@ * with the virtual interfaces (vifs) and the virtual firewall/router through * the use of rules. * - * Copyright (c) 2002, A K Warfield and K A Fraser + * Copyright (c) 2002-2003, A K Warfield and K A Fraser */ #include <hypervisor-ifs/network.h> @@ -67,7 +67,8 @@ net_vif_t *create_net_vif(int domain) shadow_ring = kmalloc(sizeof(net_shadow_ring_t), GFP_KERNEL); if ( shadow_ring == NULL ) goto fail; - + memset(shadow_ring, 0, sizeof(*shadow_ring)); + shadow_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_shadow_entry_t), GFP_KERNEL); shadow_ring->tx_ring = kmalloc(TX_RING_SIZE @@ -75,9 +76,6 @@ net_vif_t *create_net_vif(int domain) if ( (shadow_ring->rx_ring == NULL) || (shadow_ring->tx_ring == NULL) ) goto fail; - shadow_ring->rx_prod = shadow_ring->rx_cons = shadow_ring->rx_idx = 0; - shadow_ring->tx_prod = shadow_ring->tx_cons = shadow_ring->tx_idx = 0; - /* * Fill in the new vif struct. Note that, while the vif's refcnt is * non-zero, we hold a reference to the task structure. @@ -121,7 +119,7 @@ void destroy_net_vif(net_vif_t *vif) /* Return any outstanding receive buffers to the guest OS. */ spin_lock_irqsave(&p->page_lock, flags); for ( i = vif->shadow_ring->rx_idx; - i != vif->shadow_ring->rx_prod; + i != vif->shadow_ring->rx_req_cons; i = ((i+1) & (RX_RING_SIZE-1)) ) { rx_shadow_entry_t *rx = vif->shadow_ring->rx_ring + i; @@ -263,7 +261,7 @@ void add_default_net_rule(int vif_id, u32 ipaddr) memset(&new_rule, 0, sizeof(net_rule_t)); new_rule.dst_addr = ipaddr; new_rule.dst_addr_mask = 0xffffffff; - new_rule.src_interface = VIF_PHYSICAL_INTERFACE; + new_rule.src_interface = VIF_ANY_INTERFACE; new_rule.dst_interface = vif_id; new_rule.action = NETWORK_ACTION_ACCEPT; new_rule.proto = NETWORK_PROTO_ANY; @@ -319,9 +317,8 @@ void print_net_rule_list() * Apply the rules to this skbuff and return the vif id that it is bound for. * If there is no match, VIF_DROP is returned. */ - -int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, u16 src_port, u16 dst_port, - int src_vif) +int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, + u16 src_port, u16 dst_port, int src_vif) { net_rule_ent_t *ent; int dest = VIF_DROP; @@ -330,7 +327,7 @@ int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, u16 src_port ent = net_rule_list; - while (ent) + while ( ent != NULL ) { if ( ((ent->r.src_interface == src_vif) || (ent->r.src_interface == VIF_ANY_INTERFACE)) && @@ -351,12 +348,19 @@ int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, u16 src_port (tproto == IPPROTO_UDP))) ) { - break; + /* + * XXX FFS! We keep going to find the "best" rule. Where best + * corresponds to vaguely sane routing of a packet. We need a less + * shafted model for aour "virtual firewall/router" methinks! + */ + if ( dest < 0 ) + dest = ent->r.dst_interface; + if ( dest >= 0 ) + break; } ent = ent->next; } - if (ent) (dest = ent->r.dst_interface); read_unlock(&net_rule_lock); return dest; } @@ -423,6 +427,7 @@ int __net_get_target_vif(u8 *data, unsigned int len, int src_vif) return target; drop: + printk("VIF%d: pkt to drop!\n", src_vif); return VIF_DROP; } diff --git a/xen/include/hypervisor-ifs/network.h b/xen/include/hypervisor-ifs/network.h index 56a8f92881..4d4cfe93e6 100644 --- a/xen/include/hypervisor-ifs/network.h +++ b/xen/include/hypervisor-ifs/network.h @@ -14,50 +14,70 @@ #include <linux/types.h> -typedef struct tx_entry_st { - unsigned long addr; /* machine address of packet (IN VAR) */ - unsigned short size; /* in bytes (IN VAR) */ - unsigned char status; /* per descriptor status (OUT VAR) */ - unsigned char _unused; + +typedef struct tx_req_entry_st +{ + unsigned long id; + unsigned long addr; /* machine address of packet */ + unsigned short size; /* packet size in bytes */ +} tx_req_entry_t; + +typedef struct tx_resp_entry_st +{ + unsigned long id; + unsigned char status; +} tx_resp_entry_t; + +typedef union tx_entry_st +{ + tx_req_entry_t req; + tx_resp_entry_t resp; } tx_entry_t; -typedef struct rx_entry_st { - unsigned long addr; /* machine address of PTE to swizzle (IN VAR) */ - unsigned short size; /* in bytes (OUT VAR) */ - unsigned char status; /* per descriptor status (OUT VAR) */ - unsigned char offset; /* offset in page of received pkt (OUT VAR) */ + +typedef struct rx_req_entry_st +{ + unsigned long id; + unsigned long addr; /* machine address of PTE to swizzle */ +} rx_req_entry_t; + +typedef struct rx_resp_entry_st +{ + unsigned long id; + unsigned short size; /* received packet size in bytes */ + unsigned char status; /* per descriptor status */ + unsigned char offset; /* offset in page of received pkt */ +} rx_resp_entry_t; + +typedef union rx_entry_st +{ + rx_req_entry_t req; + rx_resp_entry_t resp; } rx_entry_t; + #define TX_RING_SIZE 256 #define RX_RING_SIZE 256 -typedef struct net_ring_st { + +typedef struct net_ring_st +{ /* - * Guest OS places packets into ring at tx_prod. - * Hypervisor removes at tx_cons. - * Ring is empty when tx_prod == tx_cons. - * Guest OS receives a DOMAIN_EVENT_NET_TX when tx_cons passes tx_event. - * Hypervisor may be prodded whenever tx_prod is updated, but this is - * only necessary when tx_cons == old_tx_prod (ie. transmitter stalled). + * Guest OS places packets into ring at tx_req_prod. + * Guest OS receives DOMAIN_EVENT_NET_TX when tx_resp_prod passes tx_event. */ tx_entry_t *tx_ring; - unsigned int tx_prod, tx_cons, tx_event; + unsigned int tx_req_prod, tx_resp_prod, tx_event; /* - * Guest OS places empty buffers into ring at rx_prod. - * Hypervisor fills buffers as rx_cons. - * Ring is empty when rx_prod == rx_cons. - * Guest OS receives a DOMAIN_EVENT_NET_RX when rx_cons passes rx_event. - * Hypervisor may be prodded whenever rx_prod is updated, but this is - * only necessary when rx_cons == old_rx_prod (ie. receiver stalled). + * Guest OS places empty buffers into ring at rx_req_prod. + * Guest OS receives DOMAIN_EVENT_NET_RX when rx_rssp_prod passes rx_event. */ rx_entry_t *rx_ring; - unsigned int rx_prod, rx_cons, rx_event; + unsigned int rx_req_prod, rx_resp_prod, rx_event; } net_ring_t; -/* Specify base of per-domain array. Get returned free slot in the array. */ -/*net_ring_t *create_net_vif(int domain);*/ - -/* Packet routing/filtering code follows: +/* + * Packet routing/filtering code follows: */ #define NETWORK_ACTION_ACCEPT 0 @@ -89,7 +109,7 @@ typedef struct net_rule_st typedef struct vif_query_st { unsigned int domain; - char *buf; // where to put the reply -- guest virtual address + char *buf; /* reply buffer -- guest virtual address */ } vif_query_t; /* Network trap operations and associated structure. diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h index 5534ee45f3..3bc997bed3 100644 --- a/xen/include/xeno/sched.h +++ b/xen/include/xeno/sched.h @@ -50,7 +50,7 @@ extern struct mm_struct init_mm; } #define _HYP_EVENT_NEED_RESCHED 0 -#define _HYP_EVENT_NET_RX 1 +#define _HYP_EVENT_NET 1 #define _HYP_EVENT_DIE 2 #define PF_DONEFPUINIT 0x1 /* Has the FPU been initialised for this task? */ diff --git a/xen/include/xeno/vif.h b/xen/include/xeno/vif.h index 22c6c25392..730c1cb084 100644 --- a/xen/include/xeno/vif.h +++ b/xen/include/xeno/vif.h @@ -3,7 +3,7 @@ * This is the hypervisor end of the network code. The net_ring structure * stored in each vif is placed on a shared page to interact with the guest VM. * - * Copyright (c) 2002, A K Warfield and K A Fraser + * Copyright (c) 2002-2003, A K Warfield and K A Fraser */ /* virtual network interface struct and associated defines. */ @@ -25,45 +25,51 @@ * TX_RING_SIZE and RX_RING_SIZE are defined in the shared network.h. */ -typedef struct rx_shadow_entry_st { +typedef struct rx_shadow_entry_st +{ + unsigned long id; + /* IN vars */ unsigned long addr; + /* OUT vars */ unsigned short size; unsigned char status; unsigned char offset; + /* PRIVATE vars */ unsigned long flush_count; } rx_shadow_entry_t; -typedef struct tx_shadow_entry_st { +typedef struct tx_shadow_entry_st +{ + unsigned long id; + /* IN vars */ void *header; unsigned long payload; unsigned short size; + /* OUT vars */ unsigned char status; - unsigned char _unused; } tx_shadow_entry_t; typedef struct net_shadow_ring_st { rx_shadow_entry_t *rx_ring; - tx_shadow_entry_t *tx_ring; - - /* - * Private copy of producer. Follows guest OS version, but never - * catches up with our consumer index. - */ - unsigned int rx_prod; - /* Points at next buffer to be filled by NIC. Chases rx_prod. */ - unsigned int rx_idx; - /* Points at next buffer to be returned to the guest OS. Chases rx_idx. */ - unsigned int rx_cons; + unsigned int rx_prod; /* More buffers for filling go here. */ + unsigned int rx_idx; /* Next buffer to fill is here. */ + unsigned int rx_cons; /* Next buffer to create response for is here. */ + tx_shadow_entry_t *tx_ring; /* - * Private copy of producer. Follows guest OS version, but never - * catches up with our consumer index. + * These cannot be derived from shared variables, as not all packets + * will end up on the shadow ring (eg. locally delivered packets). */ - unsigned int tx_prod; - /* Points at next buffer to be scheduled. Chases tx_prod. */ - unsigned int tx_idx; - /* Points at next buffer to be returned to the guest OS. Chases tx_idx. */ - unsigned int tx_cons; + unsigned int tx_prod; /* More packets for sending go here. */ + unsigned int tx_idx; /* Next packet to send is here. */ + unsigned int tx_transmitted_prod; /* Next packet to finish transmission. */ + unsigned int tx_cons; /* Next packet to create response for is here. */ + + /* Indexes into shared ring. */ + unsigned int rx_req_cons; + unsigned int rx_resp_prod; /* private version of shared variable */ + unsigned int tx_req_cons; + unsigned int tx_resp_prod; /* private version of shared variable */ } net_shadow_ring_t; typedef struct net_vif_st { diff --git a/xen/net/dev.c b/xen/net/dev.c index cd9c2d18ee..7fbf165b0e 100644 --- a/xen/net/dev.c +++ b/xen/net/dev.c @@ -49,6 +49,15 @@ #define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1)) #define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) +static void make_tx_response(net_vif_t *vif, + unsigned long id, + unsigned char st); +static void make_rx_response(net_vif_t *vif, + unsigned long id, + unsigned short size, + unsigned char st, + unsigned char off); + struct net_device *the_dev = NULL; /* @@ -482,6 +491,49 @@ illegal_highdma(struct net_device *dev, struct sk_buff *skb) struct netif_rx_stats netdev_rx_stat[NR_CPUS]; +/* + * update_shared_ring(void) + * + * This replaces flush_rx_queue as the guest event handler to move packets + * queued in the guest ring up to the guest. Really, the packet is already + * there, it was page flipped in deliver_packet, but this moves the ring + * descriptor across from the shadow ring and increments the pointers. + */ +void update_shared_ring(void) +{ + rx_shadow_entry_t *rx; + tx_shadow_entry_t *tx; + net_ring_t *net_ring; + net_shadow_ring_t *shadow_ring; + net_vif_t *vif; + struct list_head *ent; + + clear_bit(_HYP_EVENT_NET, ¤t->hyp_events); + + list_for_each(ent, ¤t->net_vifs) + { + vif = list_entry(ent, net_vif_t, dom_list); + net_ring = vif->net_ring; + shadow_ring = vif->shadow_ring; + + while ( shadow_ring->rx_cons != shadow_ring->rx_idx ) + { + rx = shadow_ring->rx_ring + shadow_ring->rx_cons; + if ( rx->flush_count == tlb_flush_count[smp_processor_id()] ) + __flush_tlb(); + shadow_ring->rx_cons = RX_RING_INC(shadow_ring->rx_cons); + make_rx_response(vif, rx->id, rx->size, rx->status, rx->offset); + } + + while ( shadow_ring->tx_cons != shadow_ring->tx_transmitted_prod ) + { + tx = shadow_ring->tx_ring + shadow_ring->tx_cons; + shadow_ring->tx_cons = RX_RING_INC(shadow_ring->tx_cons); + make_tx_response(vif, tx->id, tx->status); + } + } +} + void deliver_packet(struct sk_buff *skb, net_vif_t *vif) { net_shadow_ring_t *shadow_ring; @@ -489,7 +541,6 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif) unsigned long *g_pte; struct pfn_info *g_pfn, *h_pfn; unsigned int i; - unsigned long flags; memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN); if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP ) @@ -501,17 +552,13 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif) rx = shadow_ring->rx_ring + i; - if ( rx->status != RING_STATUS_OK ) - { - DPRINTK("Bad buffer in deliver_packet()\n"); - goto inc_and_out; - } - + ASSERT(rx->status == RING_STATUS_OK); ASSERT(skb->len <= PAGE_SIZE); + rx->size = skb->len; rx->offset = (unsigned char)((unsigned long)skb->data & ~PAGE_MASK); - spin_lock_irqsave(&vif->domain->page_lock, flags); + spin_lock(&vif->domain->page_lock); g_pte = map_domain_mem(rx->addr); @@ -541,12 +588,11 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif) list_del(&g_pfn->list); list_add(&h_pfn->list, &vif->domain->pg_head); - spin_unlock_irqrestore(&vif->domain->page_lock, flags); + spin_unlock(&vif->domain->page_lock); /* Our skbuff now points at the guest's old frame. */ skb->pf = g_pfn; - inc_and_out: smp_wmb(); /* updates must happen before releasing the descriptor. */ shadow_ring->rx_idx = RX_RING_INC(i); } @@ -595,11 +641,11 @@ int netif_rx(struct sk_buff *skb) if ( skb->dst_vif == VIF_UNKNOWN_INTERFACE ) skb->dst_vif = __net_get_target_vif(skb->data, skb->len, skb->src_vif); - read_lock_irqsave(&sys_vif_lock, flags); + read_lock(&sys_vif_lock); if ( (skb->dst_vif <= VIF_PHYSICAL_INTERFACE) || ((vif = sys_vif_list[skb->dst_vif]) == NULL) ) { - read_unlock_irqrestore(&sys_vif_lock, flags); + read_unlock(&sys_vif_lock); netdev_rx_stat[this_cpu].dropped++; unmap_domain_mem(skb->head); kfree_skb(skb); @@ -608,10 +654,10 @@ int netif_rx(struct sk_buff *skb) } get_vif(vif); - read_unlock_irqrestore(&sys_vif_lock, flags); + read_unlock(&sys_vif_lock); deliver_packet(skb, vif); - cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET_RX); + cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET); put_vif(vif); unmap_domain_mem(skb->head); @@ -676,10 +722,8 @@ static void add_to_net_schedule_list_tail(net_vif_t *vif) /* Destructor function for tx skbs. */ static void tx_skb_release(struct sk_buff *skb) { - int i, send = 0; + int i; net_vif_t *vif = sys_vif_list[skb->src_vif]; - unsigned int idx; - tx_shadow_entry_t *tx; unsigned long cpu_mask, flags; spin_lock_irqsave(&vif->domain->page_lock, flags); @@ -692,51 +736,10 @@ static void tx_skb_release(struct sk_buff *skb) skb_shinfo(skb)->nr_frags = 0; - /* This would mean that the guest OS has fiddled with our index. */ - if ( vif->shadow_ring->tx_cons != vif->net_ring->tx_cons ) - DPRINTK("Shadow and shared rings out of sync (%d/%d)\n", - vif->shadow_ring->tx_cons, vif->net_ring->tx_cons); - - /* - * XXX This assumes that, per vif, SKBs are processed in-order! - * Also assumes no concurrency. This is safe because each vif - * maps to one NIC. This is executed in NIC interrupt code, so we have - * mutual exclusion from do_IRQ(). - */ - - smp_wmb(); /* make sure any status updates occur before inc'ing tx_cons. */ - - /* Skip over a sequence of bad descriptors, plus the first good one. */ - do { - idx = vif->shadow_ring->tx_cons; - /* There must be at least one good descriptor outstanding. */ - if ( idx == vif->shadow_ring->tx_idx ) BUG(); - tx = &vif->shadow_ring->tx_ring[idx]; - vif->shadow_ring->tx_cons = TX_RING_INC(idx); - if ( vif->shadow_ring->tx_cons == vif->net_ring->tx_event ) send = 1; - } while ( tx->status != RING_STATUS_OK ); - - /* Now skip over any more bad descriptors, up to the next good one. */ - do { - idx = vif->shadow_ring->tx_cons; - tx = &vif->shadow_ring->tx_ring[idx]; - /* Carry on until we find a good descriptor, or reach scheduler idx. */ - if ( (idx == vif->shadow_ring->tx_idx) || - (tx->status == RING_STATUS_OK) ) - break; - vif->shadow_ring->tx_cons = TX_RING_INC(idx); - if ( vif->shadow_ring->tx_cons == vif->net_ring->tx_event ) send = 1; - } while ( 1 ); - - /* Update shared consumer index to the new private value. */ - vif->net_ring->tx_cons = vif->shadow_ring->tx_cons; - - /* Send a transmit event if requested. */ - if ( send ) - { - cpu_mask = mark_guest_event(vif->domain, _EVENT_NET_TX); - guest_event_notify(cpu_mask); - } + vif->shadow_ring->tx_transmitted_prod = + TX_RING_INC(vif->shadow_ring->tx_transmitted_prod); + cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET); + hyp_event_notify(cpu_mask); put_vif(vif); } @@ -765,27 +768,22 @@ static void net_tx_action(unsigned long unused) continue; } - /* Pick an entry from the transmit queue. */ - tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx]; - vif->shadow_ring->tx_idx = TX_RING_INC(vif->shadow_ring->tx_idx); - if ( vif->shadow_ring->tx_idx != vif->shadow_ring->tx_prod ) - add_to_net_schedule_list_tail(vif); - - /* Check the chosen entry is good. */ - if ( tx->status != RING_STATUS_OK ) - { - put_vif(vif); - continue; - } - if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL ) { printk("Out of memory in net_tx_action()!\n"); - tx->status = RING_STATUS_BAD_PAGE; + add_to_net_schedule_list_tail(vif); put_vif(vif); break; } + /* Pick an entry from the transmit queue. */ + tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx]; + vif->shadow_ring->tx_idx = TX_RING_INC(vif->shadow_ring->tx_idx); + if ( vif->shadow_ring->tx_idx != vif->shadow_ring->tx_prod ) + add_to_net_schedule_list_tail(vif); + + ASSERT(tx->status == RING_STATUS_OK); + skb->destructor = tx_skb_release; skb->head = skb->data = tx->header; @@ -828,57 +826,6 @@ static inline void maybe_schedule_tx_action(void) /* - * update_shared_ring(void) - * - * This replaces flush_rx_queue as the guest event handler to move packets - * queued in the guest ring up to the guest. Really, the packet is already - * there, it was page flipped in deliver_packet, but this moves the ring - * descriptor across from the shadow ring and increments the pointers. - */ - -void update_shared_ring(void) -{ - rx_shadow_entry_t *rx; - shared_info_t *s = current->shared_info; - net_ring_t *net_ring; - net_shadow_ring_t *shadow_ring; - net_vif_t *vif; - struct list_head *ent; - - clear_bit(_HYP_EVENT_NET_RX, ¤t->hyp_events); - - list_for_each(ent, ¤t->net_vifs) - { - vif = list_entry(ent, net_vif_t, dom_list); - net_ring = vif->net_ring; - shadow_ring = vif->shadow_ring; - - /* This would mean that the guest OS has fiddled with our index. */ - if ( shadow_ring->rx_cons != net_ring->rx_cons ) - DPRINTK("Shadow and shared rings out of sync (%d/%d)\n", - shadow_ring->rx_cons, net_ring->rx_cons); - - while ( shadow_ring->rx_cons != shadow_ring->rx_idx ) - { - rx = shadow_ring->rx_ring + shadow_ring->rx_cons; - copy_to_user(net_ring->rx_ring + shadow_ring->rx_cons, rx, - sizeof(rx_entry_t)); - - if ( rx->flush_count == tlb_flush_count[smp_processor_id()] ) - __flush_tlb(); - - smp_wmb(); /* copy descriptor before inc'ing rx_cons */ - shadow_ring->rx_cons = RX_RING_INC(shadow_ring->rx_cons); - - if ( shadow_ring->rx_cons == net_ring->rx_event ) - set_bit(_EVENT_NET_RX, &s->events); - } - net_ring->rx_cons = shadow_ring->rx_cons; - } -} - - -/* * We need this ioctl for efficient implementation of the * if_indextoname() function required by the IPv6 API. Without * it, we would have to search all the interfaces to find a @@ -1847,10 +1794,10 @@ long do_net_update(void) net_ring_t *net_ring; net_shadow_ring_t *shadow_ring; net_vif_t *current_vif; - unsigned int i; + unsigned int i, j; struct sk_buff *skb; - tx_entry_t tx; - rx_shadow_entry_t *rx; + tx_req_entry_t tx; + rx_req_entry_t rx; unsigned long pfn; struct pfn_info *page; unsigned long *g_pte; @@ -1873,31 +1820,32 @@ long do_net_update(void) * new producer index, but take care not to catch up with our own * consumer index. */ - for ( i = shadow_ring->tx_prod; - (i != net_ring->tx_prod) && - (((shadow_ring->tx_cons-i) & (TX_RING_SIZE-1)) != 1); + j = shadow_ring->tx_prod; + for ( i = shadow_ring->tx_req_cons; + (i != net_ring->tx_req_prod) && + (((shadow_ring->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); i = TX_RING_INC(i) ) { - if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) ) + if ( copy_from_user(&tx, &net_ring->tx_ring[i].req, sizeof(tx)) ) { DPRINTK("Bad copy_from_user for tx net descriptor\n"); - shadow_ring->tx_ring[i].status = RING_STATUS_ERR_CFU; + make_tx_response(current_vif, tx.id, RING_STATUS_ERR_CFU); continue; } - shadow_ring->tx_ring[i].size = tx.size; - shadow_ring->tx_ring[i].status = RING_STATUS_BAD_PAGE; - - if ( tx.size < PKT_PROT_LEN ) + if ( (tx.size < PKT_PROT_LEN) || (tx.size > ETH_FRAME_LEN) ) { - DPRINTK("Runt packet %d\n", tx.size); + DPRINTK("Bad packet size: %d\n", tx.size); + make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE); continue; } + /* No crossing a page boundary as the payload mustn't fragment. */ if ( ((tx.addr & ~PAGE_MASK) + tx.size) >= PAGE_SIZE ) { DPRINTK("tx.addr: %lx, size: %u, end: %lu\n", tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size); + make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE); continue; } @@ -1909,6 +1857,7 @@ long do_net_update(void) { DPRINTK("Bad page frame\n"); spin_unlock_irq(¤t->page_lock); + make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE); continue; } @@ -1917,45 +1866,61 @@ long do_net_update(void) protocol = __constant_htons( init_tx_header(g_data, tx.size, the_dev)); if ( protocol == 0 ) + { + make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE); goto tx_unmap_and_continue; + } target = __net_get_target_vif(g_data, tx.size, current_vif->id); if ( target > VIF_PHYSICAL_INTERFACE ) { /* Local delivery */ - if ( (skb = dev_alloc_skb(tx.size)) == NULL ) + if ( (skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL ) + { + make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE); goto tx_unmap_and_continue; - - skb->destructor = tx_skb_release; - get_vif(current_vif); - - shadow_ring->tx_ring[i].status = RING_STATUS_OK; + } skb->src_vif = current_vif->id; skb->dst_vif = target; - skb->protocol = protocol; - + skb->protocol = protocol; + + /* + * We don't need a well-formed skb as netif_rx will fill these + * fields in as necessary. All we actually need is the right + * page offset in skb->data, and the right length in skb->len. + * Note that the correct address/length *excludes* link header. + */ skb->head = (u8 *)map_domain_mem( ((skb->pf - frame_table) << PAGE_SHIFT)); - skb->data = skb->head + 16; - skb_reserve(skb,2); + skb->data = skb->head + 18; memcpy(skb->data, g_data, tx.size); - skb->len = tx.size; - unmap_domain_mem(skb->head); skb->data += ETH_HLEN; + skb->len = tx.size - ETH_HLEN; + unmap_domain_mem(skb->head); + (void)netif_rx(skb); + + make_tx_response(current_vif, tx.id, RING_STATUS_OK); } else if ( target == VIF_PHYSICAL_INTERFACE ) { - shadow_ring->tx_ring[i].header = + shadow_ring->tx_ring[j].id = tx.id; + shadow_ring->tx_ring[j].size = tx.size; + shadow_ring->tx_ring[j].status = RING_STATUS_OK; + shadow_ring->tx_ring[j].header = kmem_cache_alloc(net_header_cachep, GFP_KERNEL); - if ( shadow_ring->tx_ring[i].header == NULL ) + if ( shadow_ring->tx_ring[j].header == NULL ) + { + make_tx_response(current_vif, tx.id, RING_STATUS_OK); goto tx_unmap_and_continue; - memcpy(shadow_ring->tx_ring[i].header, g_data, PKT_PROT_LEN); - shadow_ring->tx_ring[i].payload = tx.addr + PKT_PROT_LEN; - shadow_ring->tx_ring[i].status = RING_STATUS_OK; + } + + memcpy(shadow_ring->tx_ring[j].header, g_data, PKT_PROT_LEN); + shadow_ring->tx_ring[j].payload = tx.addr + PKT_PROT_LEN; get_page_tot(page); + j = TX_RING_INC(j); } tx_unmap_and_continue: @@ -1963,10 +1928,12 @@ long do_net_update(void) spin_unlock_irq(¤t->page_lock); } - if ( shadow_ring->tx_prod != i ) + shadow_ring->tx_req_cons = i; + + if ( shadow_ring->tx_prod != j ) { smp_mb(); /* Let other CPUs see new descriptors first. */ - shadow_ring->tx_prod = i; + shadow_ring->tx_prod = j; add_to_net_schedule_list_tail(current_vif); maybe_schedule_tx_action(); } @@ -1980,29 +1947,23 @@ long do_net_update(void) * new producer index, but take care not to catch up with our own * consumer index. */ - for ( i = shadow_ring->rx_prod; - (i != net_ring->rx_prod) && - (((shadow_ring->rx_cons-i) & (RX_RING_SIZE-1)) != 1); + j = shadow_ring->rx_prod; + for ( i = shadow_ring->rx_req_cons; + (i != net_ring->rx_req_prod) && + (((shadow_ring->rx_resp_prod-i) & (RX_RING_SIZE-1)) != 1); i = RX_RING_INC(i) ) { - /* - * This copy assumes that rx_shadow_entry_t is an extension of - * rx_net_entry_t extra fields must be tacked on to the end. - */ - if ( copy_from_user(shadow_ring->rx_ring+i, net_ring->rx_ring+i, - sizeof (rx_entry_t) ) ) + if ( copy_from_user(&rx, &net_ring->rx_ring[i].req, sizeof(rx)) ) { - DPRINTK("Bad copy_from_user for rx ring\n"); - shadow_ring->rx_ring[i].status = RING_STATUS_ERR_CFU; + DPRINTK("Bad copy_from_user for rx net descriptor\n"); + make_rx_response(current_vif, + rx.id, 0, RING_STATUS_ERR_CFU, 0); continue; - } + } - rx = shadow_ring->rx_ring + i; - pfn = rx->addr >> PAGE_SHIFT; + pfn = rx.addr >> PAGE_SHIFT; page = frame_table + pfn; - shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE; - spin_lock_irq(¤t->page_lock); if ( (pfn >= max_page) || (page->flags != (PGT_l1_page_table | current->domain)) ) @@ -2010,14 +1971,18 @@ long do_net_update(void) DPRINTK("Bad page frame for ppte %d,%08lx,%08lx,%08lx\n", current->domain, pfn, max_page, page->flags); spin_unlock_irq(¤t->page_lock); + make_rx_response(current_vif, + rx.id, 0, RING_STATUS_BAD_PAGE, 0); continue; } - g_pte = map_domain_mem(rx->addr); + g_pte = map_domain_mem(rx.addr); if ( !(*g_pte & _PAGE_PRESENT) ) { - DPRINTK("Inavlid PTE passed down (not present)\n"); + DPRINTK("Invalid PTE passed down (not present)\n"); + make_rx_response(current_vif, + rx.id, 0, RING_STATUS_BAD_PAGE, 0); goto rx_unmap_and_continue; } @@ -2027,25 +1992,32 @@ long do_net_update(void) { DPRINTK("RX page mapped multple times (%d/%d/%08x)\n", page->type_count, page->tot_count, page->flags); - + make_rx_response(current_vif, + rx.id, 0, RING_STATUS_BAD_PAGE, 0); goto rx_unmap_and_continue; } /* The pte they passed was good, so take it away from them. */ - shadow_ring->rx_ring[i].status = RING_STATUS_OK; *g_pte &= ~_PAGE_PRESENT; page->flags = (page->flags & ~PG_type_mask) | PGT_net_rx_buf; - rx->flush_count = tlb_flush_count[smp_processor_id()]; + shadow_ring->rx_ring[j].id = rx.id; + shadow_ring->rx_ring[j].addr = rx.addr; + shadow_ring->rx_ring[j].status = RING_STATUS_OK; + shadow_ring->rx_ring[j].flush_count = + tlb_flush_count[smp_processor_id()]; + j = RX_RING_INC(j); rx_unmap_and_continue: unmap_domain_mem(g_pte); spin_unlock_irq(¤t->page_lock); } - if ( shadow_ring->rx_prod != i ) + shadow_ring->rx_req_cons = i; + + if ( shadow_ring->rx_prod != j ) { smp_mb(); /* Let other CPUs see new descriptors first. */ - shadow_ring->rx_prod = i; + shadow_ring->rx_prod = j; } } @@ -2053,6 +2025,58 @@ long do_net_update(void) } +static void make_tx_response(net_vif_t *vif, + unsigned long id, + unsigned char st) +{ + unsigned long flags; + net_shadow_ring_t *shadow = vif->shadow_ring; + unsigned int pos; + tx_resp_entry_t *resp, privresp; + + /* Place on the response ring for the relevant domain. */ + local_irq_save(flags); + pos = shadow->tx_resp_prod; + resp = &vif->net_ring->tx_ring[pos].resp; + privresp.id = id; + privresp.status = st; + copy_to_user(resp, &privresp, sizeof(privresp)); + pos = TX_RING_INC(pos); + shadow->tx_resp_prod = vif->net_ring->tx_resp_prod = pos; + if ( pos == vif->net_ring->rx_event ) + set_bit(_EVENT_NET_TX, ¤t->shared_info->events); + local_irq_restore(flags); +} + + +static void make_rx_response(net_vif_t *vif, + unsigned long id, + unsigned short size, + unsigned char st, + unsigned char off) +{ + unsigned long flags; + net_shadow_ring_t *shadow = vif->shadow_ring; + unsigned int pos; + rx_resp_entry_t *resp, privresp; + + /* Place on the response ring for the relevant domain. */ + local_irq_save(flags); + pos = shadow->rx_resp_prod; + resp = &vif->net_ring->rx_ring[pos].resp; + privresp.id = id; + privresp.size = size; + privresp.status = st; + privresp.offset = off; + copy_to_user(resp, &privresp, sizeof(privresp)); + pos = RX_RING_INC(pos); + shadow->rx_resp_prod = vif->net_ring->rx_resp_prod = pos; + if ( pos == vif->net_ring->rx_event ) + set_bit(_EVENT_NET_RX, ¤t->shared_info->events); + local_irq_restore(flags); +} + + int setup_network_devices(void) { int ret; diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c index 01e81e0cf0..3ae5e3d1a0 100644 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c @@ -3,7 +3,7 @@ * * Virtual network driver for XenoLinux. * - * Copyright (c) 2002, K A Fraser + * Copyright (c) 2002-2003, K A Fraser */ #include <linux/config.h> @@ -47,21 +47,14 @@ static void cleanup_module(void); static struct list_head dev_list; -/* - * RX RING: RX_IDX <= rx_cons <= rx_prod - * TX RING: TX_IDX <= tx_cons <= tx_prod - * (*_IDX allocated privately here, *_cons & *_prod shared with hypervisor) - */ struct net_private { struct list_head list; struct net_device *dev; struct net_device_stats stats; - struct sk_buff **tx_skb_ring; - struct sk_buff **rx_skb_ring; atomic_t tx_entries; - unsigned int rx_idx, tx_idx, tx_full; + unsigned int rx_resp_cons, tx_resp_cons, tx_full; net_ring_t *net_ring; spinlock_t tx_lock; }; @@ -71,10 +64,10 @@ static void dbg_network_int(int irq, void *dev_id, struct pt_regs *ptregs) { struct net_device *dev = (struct net_device *)dev_id; struct net_private *np = dev->priv; - printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_idx = %d," - " tx_cons = %d, tx_prod = %d, tx_event = %d, state=%d\n", - np->tx_full, atomic_read(&np->tx_entries), np->tx_idx, - np->net_ring->tx_cons, np->net_ring->tx_prod, + printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_resp_cons = %d," + " tx_req_prod = %d, tx_resp_prod = %d, tx_event = %d, state=%d\n", + np->tx_full, atomic_read(&np->tx_entries), np->tx_resp_cons, + np->net_ring->tx_req_prod, np->net_ring->tx_resp_prod, np->net_ring->tx_event, test_bit(__LINK_STATE_XOFF, &dev->state)); } @@ -85,29 +78,17 @@ static int network_open(struct net_device *dev) struct net_private *np = dev->priv; int error = 0; - np->rx_idx = np->tx_idx = np->tx_full = 0; - + np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; memset(&np->stats, 0, sizeof(np->stats)); - spin_lock_init(&np->tx_lock); - atomic_set(&np->tx_entries, 0); + memset(np->net_ring, 0, sizeof(*np->net_ring)); - np->net_ring->tx_prod = np->net_ring->tx_cons = np->net_ring->tx_event = 0; - np->net_ring->rx_prod = np->net_ring->rx_cons = np->net_ring->rx_event = 0; - np->net_ring->tx_ring = NULL; - np->net_ring->rx_ring = NULL; - - np->tx_skb_ring = kmalloc(TX_RING_SIZE * sizeof(struct sk_buff *), - GFP_KERNEL); - np->rx_skb_ring = kmalloc(RX_RING_SIZE * sizeof(struct sk_buff *), - GFP_KERNEL); np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t), GFP_KERNEL); np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t), GFP_KERNEL); - if ( (np->tx_skb_ring == NULL) || (np->rx_skb_ring == NULL) || - (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) ) + if ( (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) ) { printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name); error = -ENOBUFS; @@ -156,8 +137,6 @@ static int network_open(struct net_device *dev) fail: if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring); if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring); - if ( np->rx_skb_ring ) kfree(np->rx_skb_ring); - if ( np->tx_skb_ring ) kfree(np->tx_skb_ring); kfree(np); return error; } @@ -169,28 +148,29 @@ static void network_tx_buf_gc(struct net_device *dev) struct net_private *np = dev->priv; struct sk_buff *skb; unsigned long flags; - unsigned int cons; + unsigned int prod; + tx_entry_t *tx_ring = np->net_ring->tx_ring; spin_lock_irqsave(&np->tx_lock, flags); do { - cons = np->net_ring->tx_cons; + prod = np->net_ring->tx_resp_prod; - for ( i = np->tx_idx; i != cons; i = TX_RING_INC(i) ) + for ( i = np->tx_resp_cons; i != prod; i = TX_RING_INC(i) ) { - skb = np->tx_skb_ring[i]; + skb = (struct sk_buff *)tx_ring[i].resp.id; dev_kfree_skb_any(skb); atomic_dec(&np->tx_entries); } - np->tx_idx = i; + np->tx_resp_cons = prod; /* Set a new event, then check for race with update of tx_cons. */ np->net_ring->tx_event = - TX_RING_ADD(cons, (atomic_read(&np->tx_entries)>>1) + 1); + TX_RING_ADD(prod, (atomic_read(&np->tx_entries)>>1) + 1); smp_mb(); } - while ( cons != np->net_ring->tx_cons ); + while ( prod != np->net_ring->tx_resp_prod ); if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) ) { @@ -201,21 +181,13 @@ static void network_tx_buf_gc(struct net_device *dev) spin_unlock_irqrestore(&np->tx_lock, flags); } -inline unsigned long get_ppte(unsigned long addr) +inline pte_t *get_ppte(void *addr) { - unsigned long ppte; - pgd_t *pgd; pmd_t *pmd; pte_t *ptep; - pgd = pgd_offset_k(addr); - - if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG(); - - pmd = pmd_offset(pgd, addr); - if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG(); - - ptep = pte_offset(pmd, addr); - ppte = (unsigned long)phys_to_machine(virt_to_phys(ptep)); - - return ppte; + pgd_t *pgd; pmd_t *pmd; pte_t *pte; + pgd = pgd_offset_k( (unsigned long)addr); + pmd = pmd_offset(pgd, (unsigned long)addr); + pte = pte_offset(pmd, (unsigned long)addr); + return pte; } static void network_alloc_rx_buffers(struct net_device *dev) @@ -223,21 +195,21 @@ static void network_alloc_rx_buffers(struct net_device *dev) unsigned int i; struct net_private *np = dev->priv; struct sk_buff *skb; - unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES); + unsigned int end = RX_RING_ADD(np->rx_resp_cons, RX_MAX_ENTRIES); - for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) ) + for ( i = np->net_ring->rx_req_prod; i != end; i = RX_RING_INC(i) ) { skb = dev_alloc_skb(RX_BUF_SIZE); if ( skb == NULL ) break; skb->dev = dev; - np->rx_skb_ring[i] = skb; - np->net_ring->rx_ring[i].addr = get_ppte((unsigned long)skb->head); - np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */ + np->net_ring->rx_ring[i].req.id = (unsigned long)skb; + np->net_ring->rx_ring[i].req.addr = + virt_to_machine(get_ppte(skb->head)); } - np->net_ring->rx_prod = i; + np->net_ring->rx_req_prod = i; - np->net_ring->rx_event = RX_RING_INC(np->rx_idx); + np->net_ring->rx_event = RX_RING_INC(np->rx_resp_cons); /* * We may have allocated buffers which have entries outstanding in @@ -254,9 +226,11 @@ static void network_free_rx_buffers(struct net_device *dev) struct net_private *np = dev->priv; struct sk_buff *skb; - for ( i = np->rx_idx; i != np->net_ring->rx_prod; i = RX_RING_INC(i) ) + for ( i = np->rx_resp_cons; + i != np->net_ring->rx_req_prod; + i = RX_RING_INC(i) ) { - skb = np->rx_skb_ring[i]; + skb = (struct sk_buff *)np->net_ring->rx_ring[i].req.id; dev_kfree_skb_any(skb); } } @@ -272,7 +246,7 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) netif_stop_queue(dev); return -ENOBUFS; } - i = np->net_ring->tx_prod; + i = np->net_ring->tx_req_prod; if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE ) { @@ -284,11 +258,11 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) skb = new_skb; } - np->tx_skb_ring[i] = skb; - np->net_ring->tx_ring[i].addr = - (unsigned long)phys_to_machine(virt_to_phys(skb->data)); - np->net_ring->tx_ring[i].size = skb->len; - np->net_ring->tx_prod = TX_RING_INC(i); + np->net_ring->tx_ring[i].req.id = (unsigned long)skb; + np->net_ring->tx_ring[i].req.addr = + phys_to_machine(virt_to_phys(skb->data)); + np->net_ring->tx_ring[i].req.size = skb->len; + np->net_ring->tx_req_prod = TX_RING_INC(i); atomic_inc(&np->tx_entries); np->stats.tx_bytes += skb->len; @@ -316,13 +290,15 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) struct net_device *dev = (struct net_device *)dev_id; struct net_private *np = dev->priv; struct sk_buff *skb; - rx_entry_t *rx; + rx_resp_entry_t *rx; again: - for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) ) + for ( i = np->rx_resp_cons; + i != np->net_ring->rx_resp_prod; + i = RX_RING_INC(i) ) { - rx = &np->net_ring->rx_ring[i]; - skb = np->rx_skb_ring[i]; + rx = &np->net_ring->rx_ring[i].resp; + skb = (struct sk_buff *)rx->id; if ( rx->status != RING_STATUS_OK ) { @@ -341,8 +317,7 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) skb_shinfo(skb)->frag_list = NULL; phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = - (*(unsigned long *)phys_to_virt(machine_to_phys(rx->addr)) - ) >> PAGE_SHIFT; + (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; if ( rx->offset < 16 ) { @@ -353,23 +328,23 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) skb_reserve(skb, rx->offset - 16); - skb_put(skb, np->net_ring->rx_ring[i].size); + skb_put(skb, rx->size); skb->protocol = eth_type_trans(skb, dev); np->stats.rx_packets++; - np->stats.rx_bytes += np->net_ring->rx_ring[i].size; + np->stats.rx_bytes += rx->size; netif_rx(skb); dev->last_rx = jiffies; } - np->rx_idx = i; + np->rx_resp_cons = i; network_alloc_rx_buffers(dev); /* Deal with hypervisor racing our resetting of rx_event. */ smp_mb(); - if ( np->net_ring->rx_cons != i ) goto again; + if ( np->net_ring->rx_resp_prod != i ) goto again; } @@ -382,8 +357,6 @@ static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs) int network_close(struct net_device *dev) { - struct net_private *np = dev->priv; - netif_stop_queue(dev); free_irq(NET_RX_IRQ, dev); @@ -401,9 +374,6 @@ int network_close(struct net_device *dev) kfree(np->net_ring->tx_ring); #endif - kfree(np->rx_skb_ring); - kfree(np->tx_skb_ring); - MOD_DEC_USE_COUNT; return 0; |