aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2003-04-17 17:12:21 +0000
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2003-04-17 17:12:21 +0000
commit2ccb44d10f5c07368a4191d59a7f413d20872acc (patch)
tree2db8eb5cdd47f9fcf766485c7a8e444cd18a401e
parent2c6f432e14345aacd41430870a7fc7fc68f8f8a2 (diff)
downloadxen-2ccb44d10f5c07368a4191d59a7f413d20872acc.tar.gz
xen-2ccb44d10f5c07368a4191d59a7f413d20872acc.tar.bz2
xen-2ccb44d10f5c07368a4191d59a7f413d20872acc.zip
bitkeeper revision 1.179.1.1 (3e9ee075wJmtFBkJEk-QAC5VB7htXg)
network.c, dev.c, vif.h, sched.h, network.h, TODO: Fixed network rings so we can have out-of-order responses. This made it possible to fix local packet delivery. However, the virtual firewall/router stuff needs urgent redesigning.
-rw-r--r--xen/TODO48
-rw-r--r--xen/common/network.c31
-rw-r--r--xen/include/hypervisor-ifs/network.h80
-rw-r--r--xen/include/xeno/sched.h2
-rw-r--r--xen/include/xeno/vif.h50
-rw-r--r--xen/net/dev.c388
-rw-r--r--xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c132
7 files changed, 368 insertions, 363 deletions
diff --git a/xen/TODO b/xen/TODO
index 9f834dd958..e5df50ac5b 100644
--- a/xen/TODO
+++ b/xen/TODO
@@ -7,20 +7,7 @@ longer-term goals.
-- Keir (16/3/03)
-1. FIX HANDLING OF NETWORK RINGS
---------------------------------
-Handling of the transmit rings is currently very broken (for example,
-sending an inter-domain packet will wedge the hypervisor). This is
-because we may handle packets out of order (eg. inter-domain packets
-are handled eagerly, while packets for real interfaces are queued),
-but our current ring design really assumes in-order handling.
-
-A neat fix will be to allow responses to be queued in a different
-order to requests, just as we already do with block-device
-rings. We'll need to add an opaque identifier to ring entries,
-allowing matching of requests and responses, but that's about it.
-
-2. ACCURATE TIMERS AND WALL-CLOCK TIME
+1. ACCURATE TIMERS AND WALL-CLOCK TIME
--------------------------------------
Currently our long-term timebase free runs on CPU0, with no external
calibration. We should run ntpd on domain 0 and allow this to warp
@@ -28,7 +15,7 @@ Xen's timebase. Once this is done, we can have a timebase per CPU and
not worry about relative drift (since they'll all get sync'ed
periodically by ntp).
-3. ASSIGNING DOMAINS TO PROCESSORS
+2. ASSIGNING DOMAINS TO PROCESSORS
----------------------------------
More intelligent assignment of domains to processors. In
particular, we don't play well with hyperthreading: we will assign
@@ -40,17 +27,17 @@ relationships between processors in the system (eg. which ones are
siblings in the same package). We then use this to balance domains
across packages, and across virtual processors within a package.
-4. PROPER DESTRUCTION OF DOMAINS
---------------------------------
-Currently we do not free resources when destroying a domain. This is
-because they may be tied up in subsystems, and there is no way of
-pulling them back in a safe manner.
+3. DOMAIN 0 MANAGEMENT DAEMON
+-----------------------------
+A better control daemon is required for domain 0, which keeps proper
+track of machine resources and can make sensible policy choices. This
+may require support in Xen; for example, notifications (eg. DOMn is
+killed), and requests (eg. can DOMn allocate x frames of memory?).
-The fix is probably to reference count resources and automatically
-free them when the count reaches zero. We may get away with one count
-per domain (for all its resources). When this reaches zero we know it
-is safe to free everything: block-device rings, network rings, and all
-the rest.
+4. SANE NETWORK ROUTING
+-----------------------
+The current virtual firewall/router is completely broken. Needs a new
+design and implementation!
5. NETWORK CHECKSUM OFFLOAD
---------------------------
@@ -60,14 +47,7 @@ indicate, on transmit, which packets need the checksum added and, on
receive, which packets have been checked out as okay. We can steal
Linux's interface, which is entirely sane given NIC limitations.
-6. DOMAIN 0 MANAGEMENT DAEMON
------------------------------
-A better control daemon is required for domain 0, which keeps proper
-track of machine resources and can make sensible policy choices. This
-may require support in Xen; for example, notifications (eg. DOMn is
-killed), and requests (eg. can DOMn allocate x frames of memory?).
-
-7. MODULE SUPPORT FOR XEN
+6. MODULE SUPPORT FOR XEN
-------------------------
Network and blkdev drivers are bloating Xen. At some point we want to
build drivers as modules, stick them in a cheesy ramfs, then relocate
@@ -79,7 +59,7 @@ which drivers to load.
Most of the hard stuff (relocating and the like) is done for us by
Linux's module system.
-8. NEW DESIGN FEATURES
+7. NEW DESIGN FEATURES
----------------------
This includes the last-chance page cache, and the unified buffer cache.
diff --git a/xen/common/network.c b/xen/common/network.c
index 9e1bf7a00b..2cdf11a9a5 100644
--- a/xen/common/network.c
+++ b/xen/common/network.c
@@ -5,7 +5,7 @@
* with the virtual interfaces (vifs) and the virtual firewall/router through
* the use of rules.
*
- * Copyright (c) 2002, A K Warfield and K A Fraser
+ * Copyright (c) 2002-2003, A K Warfield and K A Fraser
*/
#include <hypervisor-ifs/network.h>
@@ -67,7 +67,8 @@ net_vif_t *create_net_vif(int domain)
shadow_ring = kmalloc(sizeof(net_shadow_ring_t), GFP_KERNEL);
if ( shadow_ring == NULL ) goto fail;
-
+ memset(shadow_ring, 0, sizeof(*shadow_ring));
+
shadow_ring->rx_ring = kmalloc(RX_RING_SIZE
* sizeof(rx_shadow_entry_t), GFP_KERNEL);
shadow_ring->tx_ring = kmalloc(TX_RING_SIZE
@@ -75,9 +76,6 @@ net_vif_t *create_net_vif(int domain)
if ( (shadow_ring->rx_ring == NULL) || (shadow_ring->tx_ring == NULL) )
goto fail;
- shadow_ring->rx_prod = shadow_ring->rx_cons = shadow_ring->rx_idx = 0;
- shadow_ring->tx_prod = shadow_ring->tx_cons = shadow_ring->tx_idx = 0;
-
/*
* Fill in the new vif struct. Note that, while the vif's refcnt is
* non-zero, we hold a reference to the task structure.
@@ -121,7 +119,7 @@ void destroy_net_vif(net_vif_t *vif)
/* Return any outstanding receive buffers to the guest OS. */
spin_lock_irqsave(&p->page_lock, flags);
for ( i = vif->shadow_ring->rx_idx;
- i != vif->shadow_ring->rx_prod;
+ i != vif->shadow_ring->rx_req_cons;
i = ((i+1) & (RX_RING_SIZE-1)) )
{
rx_shadow_entry_t *rx = vif->shadow_ring->rx_ring + i;
@@ -263,7 +261,7 @@ void add_default_net_rule(int vif_id, u32 ipaddr)
memset(&new_rule, 0, sizeof(net_rule_t));
new_rule.dst_addr = ipaddr;
new_rule.dst_addr_mask = 0xffffffff;
- new_rule.src_interface = VIF_PHYSICAL_INTERFACE;
+ new_rule.src_interface = VIF_ANY_INTERFACE;
new_rule.dst_interface = vif_id;
new_rule.action = NETWORK_ACTION_ACCEPT;
new_rule.proto = NETWORK_PROTO_ANY;
@@ -319,9 +317,8 @@ void print_net_rule_list()
* Apply the rules to this skbuff and return the vif id that it is bound for.
* If there is no match, VIF_DROP is returned.
*/
-
-int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, u16 src_port, u16 dst_port,
- int src_vif)
+int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr,
+ u16 src_port, u16 dst_port, int src_vif)
{
net_rule_ent_t *ent;
int dest = VIF_DROP;
@@ -330,7 +327,7 @@ int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, u16 src_port
ent = net_rule_list;
- while (ent)
+ while ( ent != NULL )
{
if ( ((ent->r.src_interface == src_vif)
|| (ent->r.src_interface == VIF_ANY_INTERFACE)) &&
@@ -351,12 +348,19 @@ int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, u16 src_port
(tproto == IPPROTO_UDP)))
)
{
- break;
+ /*
+ * XXX FFS! We keep going to find the "best" rule. Where best
+ * corresponds to vaguely sane routing of a packet. We need a less
+ * shafted model for aour "virtual firewall/router" methinks!
+ */
+ if ( dest < 0 )
+ dest = ent->r.dst_interface;
+ if ( dest >= 0 )
+ break;
}
ent = ent->next;
}
- if (ent) (dest = ent->r.dst_interface);
read_unlock(&net_rule_lock);
return dest;
}
@@ -423,6 +427,7 @@ int __net_get_target_vif(u8 *data, unsigned int len, int src_vif)
return target;
drop:
+ printk("VIF%d: pkt to drop!\n", src_vif);
return VIF_DROP;
}
diff --git a/xen/include/hypervisor-ifs/network.h b/xen/include/hypervisor-ifs/network.h
index 56a8f92881..4d4cfe93e6 100644
--- a/xen/include/hypervisor-ifs/network.h
+++ b/xen/include/hypervisor-ifs/network.h
@@ -14,50 +14,70 @@
#include <linux/types.h>
-typedef struct tx_entry_st {
- unsigned long addr; /* machine address of packet (IN VAR) */
- unsigned short size; /* in bytes (IN VAR) */
- unsigned char status; /* per descriptor status (OUT VAR) */
- unsigned char _unused;
+
+typedef struct tx_req_entry_st
+{
+ unsigned long id;
+ unsigned long addr; /* machine address of packet */
+ unsigned short size; /* packet size in bytes */
+} tx_req_entry_t;
+
+typedef struct tx_resp_entry_st
+{
+ unsigned long id;
+ unsigned char status;
+} tx_resp_entry_t;
+
+typedef union tx_entry_st
+{
+ tx_req_entry_t req;
+ tx_resp_entry_t resp;
} tx_entry_t;
-typedef struct rx_entry_st {
- unsigned long addr; /* machine address of PTE to swizzle (IN VAR) */
- unsigned short size; /* in bytes (OUT VAR) */
- unsigned char status; /* per descriptor status (OUT VAR) */
- unsigned char offset; /* offset in page of received pkt (OUT VAR) */
+
+typedef struct rx_req_entry_st
+{
+ unsigned long id;
+ unsigned long addr; /* machine address of PTE to swizzle */
+} rx_req_entry_t;
+
+typedef struct rx_resp_entry_st
+{
+ unsigned long id;
+ unsigned short size; /* received packet size in bytes */
+ unsigned char status; /* per descriptor status */
+ unsigned char offset; /* offset in page of received pkt */
+} rx_resp_entry_t;
+
+typedef union rx_entry_st
+{
+ rx_req_entry_t req;
+ rx_resp_entry_t resp;
} rx_entry_t;
+
#define TX_RING_SIZE 256
#define RX_RING_SIZE 256
-typedef struct net_ring_st {
+
+typedef struct net_ring_st
+{
/*
- * Guest OS places packets into ring at tx_prod.
- * Hypervisor removes at tx_cons.
- * Ring is empty when tx_prod == tx_cons.
- * Guest OS receives a DOMAIN_EVENT_NET_TX when tx_cons passes tx_event.
- * Hypervisor may be prodded whenever tx_prod is updated, but this is
- * only necessary when tx_cons == old_tx_prod (ie. transmitter stalled).
+ * Guest OS places packets into ring at tx_req_prod.
+ * Guest OS receives DOMAIN_EVENT_NET_TX when tx_resp_prod passes tx_event.
*/
tx_entry_t *tx_ring;
- unsigned int tx_prod, tx_cons, tx_event;
+ unsigned int tx_req_prod, tx_resp_prod, tx_event;
/*
- * Guest OS places empty buffers into ring at rx_prod.
- * Hypervisor fills buffers as rx_cons.
- * Ring is empty when rx_prod == rx_cons.
- * Guest OS receives a DOMAIN_EVENT_NET_RX when rx_cons passes rx_event.
- * Hypervisor may be prodded whenever rx_prod is updated, but this is
- * only necessary when rx_cons == old_rx_prod (ie. receiver stalled).
+ * Guest OS places empty buffers into ring at rx_req_prod.
+ * Guest OS receives DOMAIN_EVENT_NET_RX when rx_rssp_prod passes rx_event.
*/
rx_entry_t *rx_ring;
- unsigned int rx_prod, rx_cons, rx_event;
+ unsigned int rx_req_prod, rx_resp_prod, rx_event;
} net_ring_t;
-/* Specify base of per-domain array. Get returned free slot in the array. */
-/*net_ring_t *create_net_vif(int domain);*/
-
-/* Packet routing/filtering code follows:
+/*
+ * Packet routing/filtering code follows:
*/
#define NETWORK_ACTION_ACCEPT 0
@@ -89,7 +109,7 @@ typedef struct net_rule_st
typedef struct vif_query_st
{
unsigned int domain;
- char *buf; // where to put the reply -- guest virtual address
+ char *buf; /* reply buffer -- guest virtual address */
} vif_query_t;
/* Network trap operations and associated structure.
diff --git a/xen/include/xeno/sched.h b/xen/include/xeno/sched.h
index 5534ee45f3..3bc997bed3 100644
--- a/xen/include/xeno/sched.h
+++ b/xen/include/xeno/sched.h
@@ -50,7 +50,7 @@ extern struct mm_struct init_mm;
}
#define _HYP_EVENT_NEED_RESCHED 0
-#define _HYP_EVENT_NET_RX 1
+#define _HYP_EVENT_NET 1
#define _HYP_EVENT_DIE 2
#define PF_DONEFPUINIT 0x1 /* Has the FPU been initialised for this task? */
diff --git a/xen/include/xeno/vif.h b/xen/include/xeno/vif.h
index 22c6c25392..730c1cb084 100644
--- a/xen/include/xeno/vif.h
+++ b/xen/include/xeno/vif.h
@@ -3,7 +3,7 @@
* This is the hypervisor end of the network code. The net_ring structure
* stored in each vif is placed on a shared page to interact with the guest VM.
*
- * Copyright (c) 2002, A K Warfield and K A Fraser
+ * Copyright (c) 2002-2003, A K Warfield and K A Fraser
*/
/* virtual network interface struct and associated defines. */
@@ -25,45 +25,51 @@
* TX_RING_SIZE and RX_RING_SIZE are defined in the shared network.h.
*/
-typedef struct rx_shadow_entry_st {
+typedef struct rx_shadow_entry_st
+{
+ unsigned long id;
+ /* IN vars */
unsigned long addr;
+ /* OUT vars */
unsigned short size;
unsigned char status;
unsigned char offset;
+ /* PRIVATE vars */
unsigned long flush_count;
} rx_shadow_entry_t;
-typedef struct tx_shadow_entry_st {
+typedef struct tx_shadow_entry_st
+{
+ unsigned long id;
+ /* IN vars */
void *header;
unsigned long payload;
unsigned short size;
+ /* OUT vars */
unsigned char status;
- unsigned char _unused;
} tx_shadow_entry_t;
typedef struct net_shadow_ring_st {
rx_shadow_entry_t *rx_ring;
- tx_shadow_entry_t *tx_ring;
-
- /*
- * Private copy of producer. Follows guest OS version, but never
- * catches up with our consumer index.
- */
- unsigned int rx_prod;
- /* Points at next buffer to be filled by NIC. Chases rx_prod. */
- unsigned int rx_idx;
- /* Points at next buffer to be returned to the guest OS. Chases rx_idx. */
- unsigned int rx_cons;
+ unsigned int rx_prod; /* More buffers for filling go here. */
+ unsigned int rx_idx; /* Next buffer to fill is here. */
+ unsigned int rx_cons; /* Next buffer to create response for is here. */
+ tx_shadow_entry_t *tx_ring;
/*
- * Private copy of producer. Follows guest OS version, but never
- * catches up with our consumer index.
+ * These cannot be derived from shared variables, as not all packets
+ * will end up on the shadow ring (eg. locally delivered packets).
*/
- unsigned int tx_prod;
- /* Points at next buffer to be scheduled. Chases tx_prod. */
- unsigned int tx_idx;
- /* Points at next buffer to be returned to the guest OS. Chases tx_idx. */
- unsigned int tx_cons;
+ unsigned int tx_prod; /* More packets for sending go here. */
+ unsigned int tx_idx; /* Next packet to send is here. */
+ unsigned int tx_transmitted_prod; /* Next packet to finish transmission. */
+ unsigned int tx_cons; /* Next packet to create response for is here. */
+
+ /* Indexes into shared ring. */
+ unsigned int rx_req_cons;
+ unsigned int rx_resp_prod; /* private version of shared variable */
+ unsigned int tx_req_cons;
+ unsigned int tx_resp_prod; /* private version of shared variable */
} net_shadow_ring_t;
typedef struct net_vif_st {
diff --git a/xen/net/dev.c b/xen/net/dev.c
index cd9c2d18ee..7fbf165b0e 100644
--- a/xen/net/dev.c
+++ b/xen/net/dev.c
@@ -49,6 +49,15 @@
#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
+static void make_tx_response(net_vif_t *vif,
+ unsigned long id,
+ unsigned char st);
+static void make_rx_response(net_vif_t *vif,
+ unsigned long id,
+ unsigned short size,
+ unsigned char st,
+ unsigned char off);
+
struct net_device *the_dev = NULL;
/*
@@ -482,6 +491,49 @@ illegal_highdma(struct net_device *dev, struct sk_buff *skb)
struct netif_rx_stats netdev_rx_stat[NR_CPUS];
+/*
+ * update_shared_ring(void)
+ *
+ * This replaces flush_rx_queue as the guest event handler to move packets
+ * queued in the guest ring up to the guest. Really, the packet is already
+ * there, it was page flipped in deliver_packet, but this moves the ring
+ * descriptor across from the shadow ring and increments the pointers.
+ */
+void update_shared_ring(void)
+{
+ rx_shadow_entry_t *rx;
+ tx_shadow_entry_t *tx;
+ net_ring_t *net_ring;
+ net_shadow_ring_t *shadow_ring;
+ net_vif_t *vif;
+ struct list_head *ent;
+
+ clear_bit(_HYP_EVENT_NET, &current->hyp_events);
+
+ list_for_each(ent, &current->net_vifs)
+ {
+ vif = list_entry(ent, net_vif_t, dom_list);
+ net_ring = vif->net_ring;
+ shadow_ring = vif->shadow_ring;
+
+ while ( shadow_ring->rx_cons != shadow_ring->rx_idx )
+ {
+ rx = shadow_ring->rx_ring + shadow_ring->rx_cons;
+ if ( rx->flush_count == tlb_flush_count[smp_processor_id()] )
+ __flush_tlb();
+ shadow_ring->rx_cons = RX_RING_INC(shadow_ring->rx_cons);
+ make_rx_response(vif, rx->id, rx->size, rx->status, rx->offset);
+ }
+
+ while ( shadow_ring->tx_cons != shadow_ring->tx_transmitted_prod )
+ {
+ tx = shadow_ring->tx_ring + shadow_ring->tx_cons;
+ shadow_ring->tx_cons = RX_RING_INC(shadow_ring->tx_cons);
+ make_tx_response(vif, tx->id, tx->status);
+ }
+ }
+}
+
void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
{
net_shadow_ring_t *shadow_ring;
@@ -489,7 +541,6 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
unsigned long *g_pte;
struct pfn_info *g_pfn, *h_pfn;
unsigned int i;
- unsigned long flags;
memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
@@ -501,17 +552,13 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
rx = shadow_ring->rx_ring + i;
- if ( rx->status != RING_STATUS_OK )
- {
- DPRINTK("Bad buffer in deliver_packet()\n");
- goto inc_and_out;
- }
-
+ ASSERT(rx->status == RING_STATUS_OK);
ASSERT(skb->len <= PAGE_SIZE);
+
rx->size = skb->len;
rx->offset = (unsigned char)((unsigned long)skb->data & ~PAGE_MASK);
- spin_lock_irqsave(&vif->domain->page_lock, flags);
+ spin_lock(&vif->domain->page_lock);
g_pte = map_domain_mem(rx->addr);
@@ -541,12 +588,11 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
list_del(&g_pfn->list);
list_add(&h_pfn->list, &vif->domain->pg_head);
- spin_unlock_irqrestore(&vif->domain->page_lock, flags);
+ spin_unlock(&vif->domain->page_lock);
/* Our skbuff now points at the guest's old frame. */
skb->pf = g_pfn;
- inc_and_out:
smp_wmb(); /* updates must happen before releasing the descriptor. */
shadow_ring->rx_idx = RX_RING_INC(i);
}
@@ -595,11 +641,11 @@ int netif_rx(struct sk_buff *skb)
if ( skb->dst_vif == VIF_UNKNOWN_INTERFACE )
skb->dst_vif = __net_get_target_vif(skb->data, skb->len, skb->src_vif);
- read_lock_irqsave(&sys_vif_lock, flags);
+ read_lock(&sys_vif_lock);
if ( (skb->dst_vif <= VIF_PHYSICAL_INTERFACE) ||
((vif = sys_vif_list[skb->dst_vif]) == NULL) )
{
- read_unlock_irqrestore(&sys_vif_lock, flags);
+ read_unlock(&sys_vif_lock);
netdev_rx_stat[this_cpu].dropped++;
unmap_domain_mem(skb->head);
kfree_skb(skb);
@@ -608,10 +654,10 @@ int netif_rx(struct sk_buff *skb)
}
get_vif(vif);
- read_unlock_irqrestore(&sys_vif_lock, flags);
+ read_unlock(&sys_vif_lock);
deliver_packet(skb, vif);
- cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET_RX);
+ cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET);
put_vif(vif);
unmap_domain_mem(skb->head);
@@ -676,10 +722,8 @@ static void add_to_net_schedule_list_tail(net_vif_t *vif)
/* Destructor function for tx skbs. */
static void tx_skb_release(struct sk_buff *skb)
{
- int i, send = 0;
+ int i;
net_vif_t *vif = sys_vif_list[skb->src_vif];
- unsigned int idx;
- tx_shadow_entry_t *tx;
unsigned long cpu_mask, flags;
spin_lock_irqsave(&vif->domain->page_lock, flags);
@@ -692,51 +736,10 @@ static void tx_skb_release(struct sk_buff *skb)
skb_shinfo(skb)->nr_frags = 0;
- /* This would mean that the guest OS has fiddled with our index. */
- if ( vif->shadow_ring->tx_cons != vif->net_ring->tx_cons )
- DPRINTK("Shadow and shared rings out of sync (%d/%d)\n",
- vif->shadow_ring->tx_cons, vif->net_ring->tx_cons);
-
- /*
- * XXX This assumes that, per vif, SKBs are processed in-order!
- * Also assumes no concurrency. This is safe because each vif
- * maps to one NIC. This is executed in NIC interrupt code, so we have
- * mutual exclusion from do_IRQ().
- */
-
- smp_wmb(); /* make sure any status updates occur before inc'ing tx_cons. */
-
- /* Skip over a sequence of bad descriptors, plus the first good one. */
- do {
- idx = vif->shadow_ring->tx_cons;
- /* There must be at least one good descriptor outstanding. */
- if ( idx == vif->shadow_ring->tx_idx ) BUG();
- tx = &vif->shadow_ring->tx_ring[idx];
- vif->shadow_ring->tx_cons = TX_RING_INC(idx);
- if ( vif->shadow_ring->tx_cons == vif->net_ring->tx_event ) send = 1;
- } while ( tx->status != RING_STATUS_OK );
-
- /* Now skip over any more bad descriptors, up to the next good one. */
- do {
- idx = vif->shadow_ring->tx_cons;
- tx = &vif->shadow_ring->tx_ring[idx];
- /* Carry on until we find a good descriptor, or reach scheduler idx. */
- if ( (idx == vif->shadow_ring->tx_idx) ||
- (tx->status == RING_STATUS_OK) )
- break;
- vif->shadow_ring->tx_cons = TX_RING_INC(idx);
- if ( vif->shadow_ring->tx_cons == vif->net_ring->tx_event ) send = 1;
- } while ( 1 );
-
- /* Update shared consumer index to the new private value. */
- vif->net_ring->tx_cons = vif->shadow_ring->tx_cons;
-
- /* Send a transmit event if requested. */
- if ( send )
- {
- cpu_mask = mark_guest_event(vif->domain, _EVENT_NET_TX);
- guest_event_notify(cpu_mask);
- }
+ vif->shadow_ring->tx_transmitted_prod =
+ TX_RING_INC(vif->shadow_ring->tx_transmitted_prod);
+ cpu_mask = mark_hyp_event(vif->domain, _HYP_EVENT_NET);
+ hyp_event_notify(cpu_mask);
put_vif(vif);
}
@@ -765,27 +768,22 @@ static void net_tx_action(unsigned long unused)
continue;
}
- /* Pick an entry from the transmit queue. */
- tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx];
- vif->shadow_ring->tx_idx = TX_RING_INC(vif->shadow_ring->tx_idx);
- if ( vif->shadow_ring->tx_idx != vif->shadow_ring->tx_prod )
- add_to_net_schedule_list_tail(vif);
-
- /* Check the chosen entry is good. */
- if ( tx->status != RING_STATUS_OK )
- {
- put_vif(vif);
- continue;
- }
-
if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL )
{
printk("Out of memory in net_tx_action()!\n");
- tx->status = RING_STATUS_BAD_PAGE;
+ add_to_net_schedule_list_tail(vif);
put_vif(vif);
break;
}
+ /* Pick an entry from the transmit queue. */
+ tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx];
+ vif->shadow_ring->tx_idx = TX_RING_INC(vif->shadow_ring->tx_idx);
+ if ( vif->shadow_ring->tx_idx != vif->shadow_ring->tx_prod )
+ add_to_net_schedule_list_tail(vif);
+
+ ASSERT(tx->status == RING_STATUS_OK);
+
skb->destructor = tx_skb_release;
skb->head = skb->data = tx->header;
@@ -828,57 +826,6 @@ static inline void maybe_schedule_tx_action(void)
/*
- * update_shared_ring(void)
- *
- * This replaces flush_rx_queue as the guest event handler to move packets
- * queued in the guest ring up to the guest. Really, the packet is already
- * there, it was page flipped in deliver_packet, but this moves the ring
- * descriptor across from the shadow ring and increments the pointers.
- */
-
-void update_shared_ring(void)
-{
- rx_shadow_entry_t *rx;
- shared_info_t *s = current->shared_info;
- net_ring_t *net_ring;
- net_shadow_ring_t *shadow_ring;
- net_vif_t *vif;
- struct list_head *ent;
-
- clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
-
- list_for_each(ent, &current->net_vifs)
- {
- vif = list_entry(ent, net_vif_t, dom_list);
- net_ring = vif->net_ring;
- shadow_ring = vif->shadow_ring;
-
- /* This would mean that the guest OS has fiddled with our index. */
- if ( shadow_ring->rx_cons != net_ring->rx_cons )
- DPRINTK("Shadow and shared rings out of sync (%d/%d)\n",
- shadow_ring->rx_cons, net_ring->rx_cons);
-
- while ( shadow_ring->rx_cons != shadow_ring->rx_idx )
- {
- rx = shadow_ring->rx_ring + shadow_ring->rx_cons;
- copy_to_user(net_ring->rx_ring + shadow_ring->rx_cons, rx,
- sizeof(rx_entry_t));
-
- if ( rx->flush_count == tlb_flush_count[smp_processor_id()] )
- __flush_tlb();
-
- smp_wmb(); /* copy descriptor before inc'ing rx_cons */
- shadow_ring->rx_cons = RX_RING_INC(shadow_ring->rx_cons);
-
- if ( shadow_ring->rx_cons == net_ring->rx_event )
- set_bit(_EVENT_NET_RX, &s->events);
- }
- net_ring->rx_cons = shadow_ring->rx_cons;
- }
-}
-
-
-/*
* We need this ioctl for efficient implementation of the
* if_indextoname() function required by the IPv6 API. Without
* it, we would have to search all the interfaces to find a
@@ -1847,10 +1794,10 @@ long do_net_update(void)
net_ring_t *net_ring;
net_shadow_ring_t *shadow_ring;
net_vif_t *current_vif;
- unsigned int i;
+ unsigned int i, j;
struct sk_buff *skb;
- tx_entry_t tx;
- rx_shadow_entry_t *rx;
+ tx_req_entry_t tx;
+ rx_req_entry_t rx;
unsigned long pfn;
struct pfn_info *page;
unsigned long *g_pte;
@@ -1873,31 +1820,32 @@ long do_net_update(void)
* new producer index, but take care not to catch up with our own
* consumer index.
*/
- for ( i = shadow_ring->tx_prod;
- (i != net_ring->tx_prod) &&
- (((shadow_ring->tx_cons-i) & (TX_RING_SIZE-1)) != 1);
+ j = shadow_ring->tx_prod;
+ for ( i = shadow_ring->tx_req_cons;
+ (i != net_ring->tx_req_prod) &&
+ (((shadow_ring->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1);
i = TX_RING_INC(i) )
{
- if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) )
+ if ( copy_from_user(&tx, &net_ring->tx_ring[i].req, sizeof(tx)) )
{
DPRINTK("Bad copy_from_user for tx net descriptor\n");
- shadow_ring->tx_ring[i].status = RING_STATUS_ERR_CFU;
+ make_tx_response(current_vif, tx.id, RING_STATUS_ERR_CFU);
continue;
}
- shadow_ring->tx_ring[i].size = tx.size;
- shadow_ring->tx_ring[i].status = RING_STATUS_BAD_PAGE;
-
- if ( tx.size < PKT_PROT_LEN )
+ if ( (tx.size < PKT_PROT_LEN) || (tx.size > ETH_FRAME_LEN) )
{
- DPRINTK("Runt packet %d\n", tx.size);
+ DPRINTK("Bad packet size: %d\n", tx.size);
+ make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
continue;
}
+ /* No crossing a page boundary as the payload mustn't fragment. */
if ( ((tx.addr & ~PAGE_MASK) + tx.size) >= PAGE_SIZE )
{
DPRINTK("tx.addr: %lx, size: %u, end: %lu\n",
tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size);
+ make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
continue;
}
@@ -1909,6 +1857,7 @@ long do_net_update(void)
{
DPRINTK("Bad page frame\n");
spin_unlock_irq(&current->page_lock);
+ make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
continue;
}
@@ -1917,45 +1866,61 @@ long do_net_update(void)
protocol = __constant_htons(
init_tx_header(g_data, tx.size, the_dev));
if ( protocol == 0 )
+ {
+ make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
goto tx_unmap_and_continue;
+ }
target = __net_get_target_vif(g_data, tx.size, current_vif->id);
if ( target > VIF_PHYSICAL_INTERFACE )
{
/* Local delivery */
- if ( (skb = dev_alloc_skb(tx.size)) == NULL )
+ if ( (skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL )
+ {
+ make_tx_response(current_vif, tx.id, RING_STATUS_BAD_PAGE);
goto tx_unmap_and_continue;
-
- skb->destructor = tx_skb_release;
- get_vif(current_vif);
-
- shadow_ring->tx_ring[i].status = RING_STATUS_OK;
+ }
skb->src_vif = current_vif->id;
skb->dst_vif = target;
- skb->protocol = protocol;
-
+ skb->protocol = protocol;
+
+ /*
+ * We don't need a well-formed skb as netif_rx will fill these
+ * fields in as necessary. All we actually need is the right
+ * page offset in skb->data, and the right length in skb->len.
+ * Note that the correct address/length *excludes* link header.
+ */
skb->head = (u8 *)map_domain_mem(
((skb->pf - frame_table) << PAGE_SHIFT));
- skb->data = skb->head + 16;
- skb_reserve(skb,2);
+ skb->data = skb->head + 18;
memcpy(skb->data, g_data, tx.size);
- skb->len = tx.size;
- unmap_domain_mem(skb->head);
skb->data += ETH_HLEN;
+ skb->len = tx.size - ETH_HLEN;
+ unmap_domain_mem(skb->head);
+
(void)netif_rx(skb);
+
+ make_tx_response(current_vif, tx.id, RING_STATUS_OK);
}
else if ( target == VIF_PHYSICAL_INTERFACE )
{
- shadow_ring->tx_ring[i].header =
+ shadow_ring->tx_ring[j].id = tx.id;
+ shadow_ring->tx_ring[j].size = tx.size;
+ shadow_ring->tx_ring[j].status = RING_STATUS_OK;
+ shadow_ring->tx_ring[j].header =
kmem_cache_alloc(net_header_cachep, GFP_KERNEL);
- if ( shadow_ring->tx_ring[i].header == NULL )
+ if ( shadow_ring->tx_ring[j].header == NULL )
+ {
+ make_tx_response(current_vif, tx.id, RING_STATUS_OK);
goto tx_unmap_and_continue;
- memcpy(shadow_ring->tx_ring[i].header, g_data, PKT_PROT_LEN);
- shadow_ring->tx_ring[i].payload = tx.addr + PKT_PROT_LEN;
- shadow_ring->tx_ring[i].status = RING_STATUS_OK;
+ }
+
+ memcpy(shadow_ring->tx_ring[j].header, g_data, PKT_PROT_LEN);
+ shadow_ring->tx_ring[j].payload = tx.addr + PKT_PROT_LEN;
get_page_tot(page);
+ j = TX_RING_INC(j);
}
tx_unmap_and_continue:
@@ -1963,10 +1928,12 @@ long do_net_update(void)
spin_unlock_irq(&current->page_lock);
}
- if ( shadow_ring->tx_prod != i )
+ shadow_ring->tx_req_cons = i;
+
+ if ( shadow_ring->tx_prod != j )
{
smp_mb(); /* Let other CPUs see new descriptors first. */
- shadow_ring->tx_prod = i;
+ shadow_ring->tx_prod = j;
add_to_net_schedule_list_tail(current_vif);
maybe_schedule_tx_action();
}
@@ -1980,29 +1947,23 @@ long do_net_update(void)
* new producer index, but take care not to catch up with our own
* consumer index.
*/
- for ( i = shadow_ring->rx_prod;
- (i != net_ring->rx_prod) &&
- (((shadow_ring->rx_cons-i) & (RX_RING_SIZE-1)) != 1);
+ j = shadow_ring->rx_prod;
+ for ( i = shadow_ring->rx_req_cons;
+ (i != net_ring->rx_req_prod) &&
+ (((shadow_ring->rx_resp_prod-i) & (RX_RING_SIZE-1)) != 1);
i = RX_RING_INC(i) )
{
- /*
- * This copy assumes that rx_shadow_entry_t is an extension of
- * rx_net_entry_t extra fields must be tacked on to the end.
- */
- if ( copy_from_user(shadow_ring->rx_ring+i, net_ring->rx_ring+i,
- sizeof (rx_entry_t) ) )
+ if ( copy_from_user(&rx, &net_ring->rx_ring[i].req, sizeof(rx)) )
{
- DPRINTK("Bad copy_from_user for rx ring\n");
- shadow_ring->rx_ring[i].status = RING_STATUS_ERR_CFU;
+ DPRINTK("Bad copy_from_user for rx net descriptor\n");
+ make_rx_response(current_vif,
+ rx.id, 0, RING_STATUS_ERR_CFU, 0);
continue;
- }
+ }
- rx = shadow_ring->rx_ring + i;
- pfn = rx->addr >> PAGE_SHIFT;
+ pfn = rx.addr >> PAGE_SHIFT;
page = frame_table + pfn;
- shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE;
-
spin_lock_irq(&current->page_lock);
if ( (pfn >= max_page) ||
(page->flags != (PGT_l1_page_table | current->domain)) )
@@ -2010,14 +1971,18 @@ long do_net_update(void)
DPRINTK("Bad page frame for ppte %d,%08lx,%08lx,%08lx\n",
current->domain, pfn, max_page, page->flags);
spin_unlock_irq(&current->page_lock);
+ make_rx_response(current_vif,
+ rx.id, 0, RING_STATUS_BAD_PAGE, 0);
continue;
}
- g_pte = map_domain_mem(rx->addr);
+ g_pte = map_domain_mem(rx.addr);
if ( !(*g_pte & _PAGE_PRESENT) )
{
- DPRINTK("Inavlid PTE passed down (not present)\n");
+ DPRINTK("Invalid PTE passed down (not present)\n");
+ make_rx_response(current_vif,
+ rx.id, 0, RING_STATUS_BAD_PAGE, 0);
goto rx_unmap_and_continue;
}
@@ -2027,25 +1992,32 @@ long do_net_update(void)
{
DPRINTK("RX page mapped multple times (%d/%d/%08x)\n",
page->type_count, page->tot_count, page->flags);
-
+ make_rx_response(current_vif,
+ rx.id, 0, RING_STATUS_BAD_PAGE, 0);
goto rx_unmap_and_continue;
}
/* The pte they passed was good, so take it away from them. */
- shadow_ring->rx_ring[i].status = RING_STATUS_OK;
*g_pte &= ~_PAGE_PRESENT;
page->flags = (page->flags & ~PG_type_mask) | PGT_net_rx_buf;
- rx->flush_count = tlb_flush_count[smp_processor_id()];
+ shadow_ring->rx_ring[j].id = rx.id;
+ shadow_ring->rx_ring[j].addr = rx.addr;
+ shadow_ring->rx_ring[j].status = RING_STATUS_OK;
+ shadow_ring->rx_ring[j].flush_count =
+ tlb_flush_count[smp_processor_id()];
+ j = RX_RING_INC(j);
rx_unmap_and_continue:
unmap_domain_mem(g_pte);
spin_unlock_irq(&current->page_lock);
}
- if ( shadow_ring->rx_prod != i )
+ shadow_ring->rx_req_cons = i;
+
+ if ( shadow_ring->rx_prod != j )
{
smp_mb(); /* Let other CPUs see new descriptors first. */
- shadow_ring->rx_prod = i;
+ shadow_ring->rx_prod = j;
}
}
@@ -2053,6 +2025,58 @@ long do_net_update(void)
}
+static void make_tx_response(net_vif_t *vif,
+ unsigned long id,
+ unsigned char st)
+{
+ unsigned long flags;
+ net_shadow_ring_t *shadow = vif->shadow_ring;
+ unsigned int pos;
+ tx_resp_entry_t *resp, privresp;
+
+ /* Place on the response ring for the relevant domain. */
+ local_irq_save(flags);
+ pos = shadow->tx_resp_prod;
+ resp = &vif->net_ring->tx_ring[pos].resp;
+ privresp.id = id;
+ privresp.status = st;
+ copy_to_user(resp, &privresp, sizeof(privresp));
+ pos = TX_RING_INC(pos);
+ shadow->tx_resp_prod = vif->net_ring->tx_resp_prod = pos;
+ if ( pos == vif->net_ring->rx_event )
+ set_bit(_EVENT_NET_TX, &current->shared_info->events);
+ local_irq_restore(flags);
+}
+
+
+static void make_rx_response(net_vif_t *vif,
+ unsigned long id,
+ unsigned short size,
+ unsigned char st,
+ unsigned char off)
+{
+ unsigned long flags;
+ net_shadow_ring_t *shadow = vif->shadow_ring;
+ unsigned int pos;
+ rx_resp_entry_t *resp, privresp;
+
+ /* Place on the response ring for the relevant domain. */
+ local_irq_save(flags);
+ pos = shadow->rx_resp_prod;
+ resp = &vif->net_ring->rx_ring[pos].resp;
+ privresp.id = id;
+ privresp.size = size;
+ privresp.status = st;
+ privresp.offset = off;
+ copy_to_user(resp, &privresp, sizeof(privresp));
+ pos = RX_RING_INC(pos);
+ shadow->rx_resp_prod = vif->net_ring->rx_resp_prod = pos;
+ if ( pos == vif->net_ring->rx_event )
+ set_bit(_EVENT_NET_RX, &current->shared_info->events);
+ local_irq_restore(flags);
+}
+
+
int setup_network_devices(void)
{
int ret;
diff --git a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c
index 01e81e0cf0..3ae5e3d1a0 100644
--- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c
+++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/drivers/network/network.c
@@ -3,7 +3,7 @@
*
* Virtual network driver for XenoLinux.
*
- * Copyright (c) 2002, K A Fraser
+ * Copyright (c) 2002-2003, K A Fraser
*/
#include <linux/config.h>
@@ -47,21 +47,14 @@ static void cleanup_module(void);
static struct list_head dev_list;
-/*
- * RX RING: RX_IDX <= rx_cons <= rx_prod
- * TX RING: TX_IDX <= tx_cons <= tx_prod
- * (*_IDX allocated privately here, *_cons & *_prod shared with hypervisor)
- */
struct net_private
{
struct list_head list;
struct net_device *dev;
struct net_device_stats stats;
- struct sk_buff **tx_skb_ring;
- struct sk_buff **rx_skb_ring;
atomic_t tx_entries;
- unsigned int rx_idx, tx_idx, tx_full;
+ unsigned int rx_resp_cons, tx_resp_cons, tx_full;
net_ring_t *net_ring;
spinlock_t tx_lock;
};
@@ -71,10 +64,10 @@ static void dbg_network_int(int irq, void *dev_id, struct pt_regs *ptregs)
{
struct net_device *dev = (struct net_device *)dev_id;
struct net_private *np = dev->priv;
- printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_idx = %d,"
- " tx_cons = %d, tx_prod = %d, tx_event = %d, state=%d\n",
- np->tx_full, atomic_read(&np->tx_entries), np->tx_idx,
- np->net_ring->tx_cons, np->net_ring->tx_prod,
+ printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_resp_cons = %d,"
+ " tx_req_prod = %d, tx_resp_prod = %d, tx_event = %d, state=%d\n",
+ np->tx_full, atomic_read(&np->tx_entries), np->tx_resp_cons,
+ np->net_ring->tx_req_prod, np->net_ring->tx_resp_prod,
np->net_ring->tx_event,
test_bit(__LINK_STATE_XOFF, &dev->state));
}
@@ -85,29 +78,17 @@ static int network_open(struct net_device *dev)
struct net_private *np = dev->priv;
int error = 0;
- np->rx_idx = np->tx_idx = np->tx_full = 0;
-
+ np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
memset(&np->stats, 0, sizeof(np->stats));
-
spin_lock_init(&np->tx_lock);
-
atomic_set(&np->tx_entries, 0);
+ memset(np->net_ring, 0, sizeof(*np->net_ring));
- np->net_ring->tx_prod = np->net_ring->tx_cons = np->net_ring->tx_event = 0;
- np->net_ring->rx_prod = np->net_ring->rx_cons = np->net_ring->rx_event = 0;
- np->net_ring->tx_ring = NULL;
- np->net_ring->rx_ring = NULL;
-
- np->tx_skb_ring = kmalloc(TX_RING_SIZE * sizeof(struct sk_buff *),
- GFP_KERNEL);
- np->rx_skb_ring = kmalloc(RX_RING_SIZE * sizeof(struct sk_buff *),
- GFP_KERNEL);
np->net_ring->tx_ring = kmalloc(TX_RING_SIZE * sizeof(tx_entry_t),
GFP_KERNEL);
np->net_ring->rx_ring = kmalloc(RX_RING_SIZE * sizeof(rx_entry_t),
GFP_KERNEL);
- if ( (np->tx_skb_ring == NULL) || (np->rx_skb_ring == NULL) ||
- (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) )
+ if ( (np->net_ring->tx_ring == NULL) || (np->net_ring->rx_ring == NULL) )
{
printk(KERN_WARNING "%s; Could not allocate ring memory\n", dev->name);
error = -ENOBUFS;
@@ -156,8 +137,6 @@ static int network_open(struct net_device *dev)
fail:
if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring);
if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring);
- if ( np->rx_skb_ring ) kfree(np->rx_skb_ring);
- if ( np->tx_skb_ring ) kfree(np->tx_skb_ring);
kfree(np);
return error;
}
@@ -169,28 +148,29 @@ static void network_tx_buf_gc(struct net_device *dev)
struct net_private *np = dev->priv;
struct sk_buff *skb;
unsigned long flags;
- unsigned int cons;
+ unsigned int prod;
+ tx_entry_t *tx_ring = np->net_ring->tx_ring;
spin_lock_irqsave(&np->tx_lock, flags);
do {
- cons = np->net_ring->tx_cons;
+ prod = np->net_ring->tx_resp_prod;
- for ( i = np->tx_idx; i != cons; i = TX_RING_INC(i) )
+ for ( i = np->tx_resp_cons; i != prod; i = TX_RING_INC(i) )
{
- skb = np->tx_skb_ring[i];
+ skb = (struct sk_buff *)tx_ring[i].resp.id;
dev_kfree_skb_any(skb);
atomic_dec(&np->tx_entries);
}
- np->tx_idx = i;
+ np->tx_resp_cons = prod;
/* Set a new event, then check for race with update of tx_cons. */
np->net_ring->tx_event =
- TX_RING_ADD(cons, (atomic_read(&np->tx_entries)>>1) + 1);
+ TX_RING_ADD(prod, (atomic_read(&np->tx_entries)>>1) + 1);
smp_mb();
}
- while ( cons != np->net_ring->tx_cons );
+ while ( prod != np->net_ring->tx_resp_prod );
if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) )
{
@@ -201,21 +181,13 @@ static void network_tx_buf_gc(struct net_device *dev)
spin_unlock_irqrestore(&np->tx_lock, flags);
}
-inline unsigned long get_ppte(unsigned long addr)
+inline pte_t *get_ppte(void *addr)
{
- unsigned long ppte;
- pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
- pgd = pgd_offset_k(addr);
-
- if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG();
-
- pmd = pmd_offset(pgd, addr);
- if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG();
-
- ptep = pte_offset(pmd, addr);
- ppte = (unsigned long)phys_to_machine(virt_to_phys(ptep));
-
- return ppte;
+ pgd_t *pgd; pmd_t *pmd; pte_t *pte;
+ pgd = pgd_offset_k( (unsigned long)addr);
+ pmd = pmd_offset(pgd, (unsigned long)addr);
+ pte = pte_offset(pmd, (unsigned long)addr);
+ return pte;
}
static void network_alloc_rx_buffers(struct net_device *dev)
@@ -223,21 +195,21 @@ static void network_alloc_rx_buffers(struct net_device *dev)
unsigned int i;
struct net_private *np = dev->priv;
struct sk_buff *skb;
- unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES);
+ unsigned int end = RX_RING_ADD(np->rx_resp_cons, RX_MAX_ENTRIES);
- for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) )
+ for ( i = np->net_ring->rx_req_prod; i != end; i = RX_RING_INC(i) )
{
skb = dev_alloc_skb(RX_BUF_SIZE);
if ( skb == NULL ) break;
skb->dev = dev;
- np->rx_skb_ring[i] = skb;
- np->net_ring->rx_ring[i].addr = get_ppte((unsigned long)skb->head);
- np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */
+ np->net_ring->rx_ring[i].req.id = (unsigned long)skb;
+ np->net_ring->rx_ring[i].req.addr =
+ virt_to_machine(get_ppte(skb->head));
}
- np->net_ring->rx_prod = i;
+ np->net_ring->rx_req_prod = i;
- np->net_ring->rx_event = RX_RING_INC(np->rx_idx);
+ np->net_ring->rx_event = RX_RING_INC(np->rx_resp_cons);
/*
* We may have allocated buffers which have entries outstanding in
@@ -254,9 +226,11 @@ static void network_free_rx_buffers(struct net_device *dev)
struct net_private *np = dev->priv;
struct sk_buff *skb;
- for ( i = np->rx_idx; i != np->net_ring->rx_prod; i = RX_RING_INC(i) )
+ for ( i = np->rx_resp_cons;
+ i != np->net_ring->rx_req_prod;
+ i = RX_RING_INC(i) )
{
- skb = np->rx_skb_ring[i];
+ skb = (struct sk_buff *)np->net_ring->rx_ring[i].req.id;
dev_kfree_skb_any(skb);
}
}
@@ -272,7 +246,7 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
netif_stop_queue(dev);
return -ENOBUFS;
}
- i = np->net_ring->tx_prod;
+ i = np->net_ring->tx_req_prod;
if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE )
{
@@ -284,11 +258,11 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
skb = new_skb;
}
- np->tx_skb_ring[i] = skb;
- np->net_ring->tx_ring[i].addr =
- (unsigned long)phys_to_machine(virt_to_phys(skb->data));
- np->net_ring->tx_ring[i].size = skb->len;
- np->net_ring->tx_prod = TX_RING_INC(i);
+ np->net_ring->tx_ring[i].req.id = (unsigned long)skb;
+ np->net_ring->tx_ring[i].req.addr =
+ phys_to_machine(virt_to_phys(skb->data));
+ np->net_ring->tx_ring[i].req.size = skb->len;
+ np->net_ring->tx_req_prod = TX_RING_INC(i);
atomic_inc(&np->tx_entries);
np->stats.tx_bytes += skb->len;
@@ -316,13 +290,15 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
struct net_device *dev = (struct net_device *)dev_id;
struct net_private *np = dev->priv;
struct sk_buff *skb;
- rx_entry_t *rx;
+ rx_resp_entry_t *rx;
again:
- for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) )
+ for ( i = np->rx_resp_cons;
+ i != np->net_ring->rx_resp_prod;
+ i = RX_RING_INC(i) )
{
- rx = &np->net_ring->rx_ring[i];
- skb = np->rx_skb_ring[i];
+ rx = &np->net_ring->rx_ring[i].resp;
+ skb = (struct sk_buff *)rx->id;
if ( rx->status != RING_STATUS_OK )
{
@@ -341,8 +317,7 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
skb_shinfo(skb)->frag_list = NULL;
phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
- (*(unsigned long *)phys_to_virt(machine_to_phys(rx->addr))
- ) >> PAGE_SHIFT;
+ (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
if ( rx->offset < 16 )
{
@@ -353,23 +328,23 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs)
skb_reserve(skb, rx->offset - 16);
- skb_put(skb, np->net_ring->rx_ring[i].size);
+ skb_put(skb, rx->size);
skb->protocol = eth_type_trans(skb, dev);
np->stats.rx_packets++;
- np->stats.rx_bytes += np->net_ring->rx_ring[i].size;
+ np->stats.rx_bytes += rx->size;
netif_rx(skb);
dev->last_rx = jiffies;
}
- np->rx_idx = i;
+ np->rx_resp_cons = i;
network_alloc_rx_buffers(dev);
/* Deal with hypervisor racing our resetting of rx_event. */
smp_mb();
- if ( np->net_ring->rx_cons != i ) goto again;
+ if ( np->net_ring->rx_resp_prod != i ) goto again;
}
@@ -382,8 +357,6 @@ static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs)
int network_close(struct net_device *dev)
{
- struct net_private *np = dev->priv;
-
netif_stop_queue(dev);
free_irq(NET_RX_IRQ, dev);
@@ -401,9 +374,6 @@ int network_close(struct net_device *dev)
kfree(np->net_ring->tx_ring);
#endif
- kfree(np->rx_skb_ring);
- kfree(np->tx_skb_ring);
-
MOD_DEC_USE_COUNT;
return 0;