aboutsummaryrefslogtreecommitdiffstats
path: root/linux-2.4.26-xen-sparse/arch/xen/drivers/netif
diff options
context:
space:
mode:
Diffstat (limited to 'linux-2.4.26-xen-sparse/arch/xen/drivers/netif')
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/drivers/netif/Makefile10
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/Makefile3
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/common.h96
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/control.c65
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/interface.c304
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/main.c772
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/Makefile3
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/main.c777
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/drivers/netif/netif.h88
9 files changed, 2118 insertions, 0 deletions
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/Makefile b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/Makefile
new file mode 100644
index 0000000000..20c8192d3d
--- /dev/null
+++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/Makefile
@@ -0,0 +1,10 @@
+
+O_TARGET := drv.o
+
+subdir-y += frontend
+obj-y += frontend/drv.o
+
+subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend
+obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend/drv.o
+
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/Makefile b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/Makefile
new file mode 100644
index 0000000000..9ffb0bd702
--- /dev/null
+++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/Makefile
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-y := main.o control.o interface.o
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/common.h b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/common.h
new file mode 100644
index 0000000000..88881cdf66
--- /dev/null
+++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/common.h
@@ -0,0 +1,96 @@
+/******************************************************************************
+ * arch/xen/drivers/netif/backend/common.h
+ */
+
+#ifndef __NETIF__BACKEND__COMMON_H__
+#define __NETIF__BACKEND__COMMON_H__
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <asm/ctrl_if.h>
+#include <asm/io.h>
+#include "../netif.h"
+#include "../../../../../net/bridge/br_private.h"
+
+#ifndef NDEBUG
+#define ASSERT(_p) \
+ if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \
+ __FILE__ , __LINE__ , ## _a )
+#else
+#define ASSERT(_p) ((void)0)
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+typedef struct netif_st {
+ /* Unique identifier for this interface. */
+ domid_t domid;
+ unsigned int handle;
+
+ /* Physical parameters of the comms window. */
+ unsigned long tx_shmem_frame;
+ unsigned long rx_shmem_frame;
+ unsigned int evtchn;
+ int irq;
+
+ /* The shared rings and indexes. */
+ netif_tx_interface_t *tx;
+ netif_rx_interface_t *rx;
+
+ /* Private indexes into shared ring. */
+ NETIF_RING_IDX rx_req_cons;
+ NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */
+ NETIF_RING_IDX tx_req_cons;
+ NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */
+
+ /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
+ unsigned long credit_bytes;
+ unsigned long credit_usec;
+ unsigned long remaining_credit;
+ struct timer_list credit_timeout;
+
+ /* Miscellaneous private stuff. */
+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+ /*
+ * DISCONNECT response is deferred until pending requests are ack'ed.
+ * We therefore need to store the id from the original request.
+ */
+ u8 disconnect_rspid;
+ struct netif_st *hash_next;
+ struct list_head list; /* scheduling list */
+ atomic_t refcnt;
+ spinlock_t rx_lock, tx_lock;
+ struct net_device *dev;
+ struct net_device_stats stats;
+} netif_t;
+
+void netif_create(netif_be_create_t *create);
+void netif_destroy(netif_be_destroy_t *destroy);
+void netif_connect(netif_be_connect_t *connect);
+int netif_disconnect(netif_be_disconnect_t *disconnect, u8 rsp_id);
+void __netif_disconnect_complete(netif_t *netif);
+netif_t *netif_find_by_handle(domid_t domid, unsigned int handle);
+#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define netif_put(_b) \
+ do { \
+ if ( atomic_dec_and_test(&(_b)->refcnt) ) \
+ __netif_disconnect_complete(_b); \
+ } while (0)
+
+void netif_interface_init(void);
+void netif_ctrlif_init(void);
+
+void netif_deschedule(netif_t *netif);
+
+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
+struct net_device_stats *netif_be_get_stats(struct net_device *dev);
+void netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+
+#endif /* __NETIF__BACKEND__COMMON_H__ */
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/control.c b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/control.c
new file mode 100644
index 0000000000..cf1b075031
--- /dev/null
+++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/control.c
@@ -0,0 +1,65 @@
+/******************************************************************************
+ * arch/xen/drivers/netif/backend/control.c
+ *
+ * Routines for interfacing with the control plane.
+ *
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+
+static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ switch ( msg->subtype )
+ {
+ case CMSG_NETIF_BE_CREATE:
+ if ( msg->length != sizeof(netif_be_create_t) )
+ goto parse_error;
+ netif_create((netif_be_create_t *)&msg->msg[0]);
+ break;
+ case CMSG_NETIF_BE_DESTROY:
+ if ( msg->length != sizeof(netif_be_destroy_t) )
+ goto parse_error;
+ netif_destroy((netif_be_destroy_t *)&msg->msg[0]);
+ break;
+ case CMSG_NETIF_BE_CONNECT:
+ if ( msg->length != sizeof(netif_be_connect_t) )
+ goto parse_error;
+ netif_connect((netif_be_connect_t *)&msg->msg[0]);
+ break;
+ case CMSG_NETIF_BE_DISCONNECT:
+ if ( msg->length != sizeof(netif_be_disconnect_t) )
+ goto parse_error;
+ if ( !netif_disconnect((netif_be_disconnect_t *)&msg->msg[0],msg->id) )
+ return; /* Sending the response is deferred until later. */
+ break;
+ default:
+ goto parse_error;
+ }
+
+ ctrl_if_send_response(msg);
+ return;
+
+ parse_error:
+ DPRINTK("Parse error while reading message subtype %d, len %d\n",
+ msg->subtype, msg->length);
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
+
+void netif_ctrlif_init(void)
+{
+ ctrl_msg_t cmsg;
+ netif_be_driver_status_changed_t st;
+
+ (void)ctrl_if_register_receiver(CMSG_NETIF_BE, netif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_NETIF_BE;
+ cmsg.subtype = CMSG_NETIF_BE_DRIVER_STATUS_CHANGED;
+ cmsg.length = sizeof(netif_be_driver_status_changed_t);
+ st.status = NETIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &st, sizeof(st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/interface.c b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/interface.c
new file mode 100644
index 0000000000..5a2da3d29b
--- /dev/null
+++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/interface.c
@@ -0,0 +1,304 @@
+/******************************************************************************
+ * arch/xen/drivers/netif/backend/interface.c
+ *
+ * Network-device interface management.
+ *
+ * Copyright (c) 2004, Keir Fraser
+ */
+
+#include "common.h"
+#include <linux/rtnetlink.h>
+
+#define NETIF_HASHSZ 1024
+#define NETIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(NETIF_HASHSZ-1))
+
+static netif_t *netif_hash[NETIF_HASHSZ];
+static struct net_device *bridge_dev;
+static struct net_bridge *bridge_br;
+
+netif_t *netif_find_by_handle(domid_t domid, unsigned int handle)
+{
+ netif_t *netif = netif_hash[NETIF_HASH(domid, handle)];
+ while ( (netif != NULL) &&
+ ((netif->domid != domid) || (netif->handle != handle)) )
+ netif = netif->hash_next;
+ return netif;
+}
+
+void __netif_disconnect_complete(netif_t *netif)
+{
+ ctrl_msg_t cmsg;
+ netif_be_disconnect_t disc;
+
+ /*
+ * These can't be done in __netif_disconnect() because at that point there
+ * may be outstanding requests at the disc whose asynchronous responses
+ * must still be notified to the remote driver.
+ */
+ unbind_evtchn_from_irq(netif->evtchn);
+ vfree(netif->tx); /* Frees netif->rx as well. */
+ rtnl_lock();
+ (void)br_del_if(bridge_br, netif->dev);
+ (void)dev_close(netif->dev);
+ rtnl_unlock();
+
+ /* Construct the deferred response message. */
+ cmsg.type = CMSG_NETIF_BE;
+ cmsg.subtype = CMSG_NETIF_BE_DISCONNECT;
+ cmsg.id = netif->disconnect_rspid;
+ cmsg.length = sizeof(netif_be_disconnect_t);
+ disc.domid = netif->domid;
+ disc.netif_handle = netif->handle;
+ disc.status = NETIF_BE_STATUS_OKAY;
+ memcpy(cmsg.msg, &disc, sizeof(disc));
+
+ /*
+ * Make sure message is constructed /before/ status change, because
+ * after the status change the 'netif' structure could be deallocated at
+ * any time. Also make sure we send the response /after/ status change,
+ * as otherwise a subsequent CONNECT request could spuriously fail if
+ * another CPU doesn't see the status change yet.
+ */
+ mb();
+ if ( netif->status != DISCONNECTING )
+ BUG();
+ netif->status = DISCONNECTED;
+ mb();
+
+ /* Send the successful response. */
+ ctrl_if_send_response(&cmsg);
+}
+
+void netif_create(netif_be_create_t *create)
+{
+ domid_t domid = create->domid;
+ unsigned int handle = create->netif_handle;
+ struct net_device *dev;
+ netif_t **pnetif, *netif;
+ char name[IFNAMSIZ];
+
+ snprintf(name, IFNAMSIZ, "vif%u.%u", domid, handle);
+ dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
+ if ( dev == NULL )
+ {
+ DPRINTK("Could not create netif: out of memory\n");
+ create->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ netif = dev->priv;
+ memset(netif, 0, sizeof(*netif));
+ netif->domid = domid;
+ netif->handle = handle;
+ netif->status = DISCONNECTED;
+ spin_lock_init(&netif->rx_lock);
+ spin_lock_init(&netif->tx_lock);
+ atomic_set(&netif->refcnt, 0);
+ netif->dev = dev;
+
+ netif->credit_bytes = netif->remaining_credit = ~0UL;
+ netif->credit_usec = 0UL;
+ /*init_ac_timer(&new_vif->credit_timeout);*/
+
+ pnetif = &netif_hash[NETIF_HASH(domid, handle)];
+ while ( *pnetif != NULL )
+ {
+ if ( ((*pnetif)->domid == domid) && ((*pnetif)->handle == handle) )
+ {
+ DPRINTK("Could not create netif: already exists\n");
+ create->status = NETIF_BE_STATUS_INTERFACE_EXISTS;
+ kfree(dev);
+ return;
+ }
+ pnetif = &(*pnetif)->hash_next;
+ }
+
+ dev->hard_start_xmit = netif_be_start_xmit;
+ dev->get_stats = netif_be_get_stats;
+ memcpy(dev->dev_addr, create->mac, ETH_ALEN);
+
+ /* Disable queuing. */
+ dev->tx_queue_len = 0;
+
+ /* XXX In bridge mode we should force a different MAC from remote end. */
+ dev->dev_addr[2] ^= 1;
+
+ if ( register_netdev(dev) != 0 )
+ {
+ DPRINTK("Could not register new net device\n");
+ create->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
+ kfree(dev);
+ return;
+ }
+
+ netif->hash_next = *pnetif;
+ *pnetif = netif;
+
+ DPRINTK("Successfully created netif\n");
+ create->status = NETIF_BE_STATUS_OKAY;
+}
+
+void netif_destroy(netif_be_destroy_t *destroy)
+{
+ domid_t domid = destroy->domid;
+ unsigned int handle = destroy->netif_handle;
+ netif_t **pnetif, *netif;
+
+ pnetif = &netif_hash[NETIF_HASH(domid, handle)];
+ while ( (netif = *pnetif) != NULL )
+ {
+ if ( (netif->domid == domid) && (netif->handle == handle) )
+ {
+ if ( netif->status != DISCONNECTED )
+ goto still_connected;
+ goto destroy;
+ }
+ pnetif = &netif->hash_next;
+ }
+
+ destroy->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+
+ still_connected:
+ destroy->status = NETIF_BE_STATUS_INTERFACE_CONNECTED;
+ return;
+
+ destroy:
+ *pnetif = netif->hash_next;
+ unregister_netdev(netif->dev);
+ kfree(netif->dev);
+ destroy->status = NETIF_BE_STATUS_OKAY;
+}
+
+void netif_connect(netif_be_connect_t *connect)
+{
+ domid_t domid = connect->domid;
+ unsigned int handle = connect->netif_handle;
+ unsigned int evtchn = connect->evtchn;
+ unsigned long tx_shmem_frame = connect->tx_shmem_frame;
+ unsigned long rx_shmem_frame = connect->rx_shmem_frame;
+ struct vm_struct *vma;
+ pgprot_t prot;
+ int error;
+ netif_t *netif;
+ struct net_device *eth0_dev;
+
+ netif = netif_find_by_handle(domid, handle);
+ if ( unlikely(netif == NULL) )
+ {
+ DPRINTK("netif_connect attempted for non-existent netif (%u,%u)\n",
+ connect->domid, connect->netif_handle);
+ connect->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return;
+ }
+
+ if ( netif->status != DISCONNECTED )
+ {
+ connect->status = NETIF_BE_STATUS_INTERFACE_CONNECTED;
+ return;
+ }
+
+ if ( (vma = get_vm_area(2*PAGE_SIZE, VM_IOREMAP)) == NULL )
+ {
+ connect->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+ error = direct_remap_area_pages(&init_mm,
+ VMALLOC_VMADDR(vma->addr),
+ tx_shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+ prot, domid);
+ error |= direct_remap_area_pages(&init_mm,
+ VMALLOC_VMADDR(vma->addr) + PAGE_SIZE,
+ rx_shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+ prot, domid);
+ if ( error != 0 )
+ {
+ if ( error == -ENOMEM )
+ connect->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
+ else if ( error == -EFAULT )
+ connect->status = NETIF_BE_STATUS_MAPPING_ERROR;
+ else
+ connect->status = NETIF_BE_STATUS_ERROR;
+ vfree(vma->addr);
+ return;
+ }
+
+ netif->evtchn = evtchn;
+ netif->irq = bind_evtchn_to_irq(evtchn);
+ netif->tx_shmem_frame = tx_shmem_frame;
+ netif->rx_shmem_frame = rx_shmem_frame;
+ netif->tx =
+ (netif_tx_interface_t *)vma->addr;
+ netif->rx =
+ (netif_rx_interface_t *)((char *)vma->addr + PAGE_SIZE);
+ netif->status = CONNECTED;
+ netif_get(netif);
+
+ rtnl_lock();
+
+ (void)dev_open(netif->dev);
+ (void)br_add_if(bridge_br, netif->dev);
+
+ /*
+ * The default config is a very simple binding to eth0.
+ * If eth0 is being used as an IP interface by this OS then someone
+ * must add eth0's IP address to nbe-br, and change the routing table
+ * to refer to nbe-br instead of eth0.
+ */
+ (void)dev_open(bridge_dev);
+ if ( (eth0_dev = __dev_get_by_name("eth0")) != NULL )
+ {
+ (void)dev_open(eth0_dev);
+ (void)br_add_if(bridge_br, eth0_dev);
+ }
+
+ rtnl_unlock();
+
+ (void)request_irq(netif->irq, netif_be_int, 0, netif->dev->name, netif);
+ netif_start_queue(netif->dev);
+
+ connect->status = NETIF_BE_STATUS_OKAY;
+}
+
+int netif_disconnect(netif_be_disconnect_t *disconnect, u8 rsp_id)
+{
+ domid_t domid = disconnect->domid;
+ unsigned int handle = disconnect->netif_handle;
+ netif_t *netif;
+
+ netif = netif_find_by_handle(domid, handle);
+ if ( unlikely(netif == NULL) )
+ {
+ DPRINTK("netif_disconnect attempted for non-existent netif"
+ " (%u,%u)\n", disconnect->domid, disconnect->netif_handle);
+ disconnect->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND;
+ return 1; /* Caller will send response error message. */
+ }
+
+ if ( netif->status == CONNECTED )
+ {
+ netif->status = DISCONNECTING;
+ netif->disconnect_rspid = rsp_id;
+ wmb(); /* Let other CPUs see the status change. */
+ netif_stop_queue(netif->dev);
+ free_irq(netif->irq, NULL);
+ netif_deschedule(netif);
+ netif_put(netif);
+ }
+
+ return 0; /* Caller should not send response message. */
+}
+
+void netif_interface_init(void)
+{
+ memset(netif_hash, 0, sizeof(netif_hash));
+ if ( br_add_bridge("nbe-br") != 0 )
+ BUG();
+ bridge_dev = __dev_get_by_name("nbe-br");
+ bridge_br = (struct net_bridge *)bridge_dev->priv;
+ bridge_br->bridge_hello_time = bridge_br->hello_time = 0;
+ bridge_br->bridge_forward_delay = bridge_br->forward_delay = 0;
+ bridge_br->stp_enabled = 0;
+}
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/main.c b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/main.c
new file mode 100644
index 0000000000..2c4e5cb211
--- /dev/null
+++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/main.c
@@ -0,0 +1,772 @@
+/******************************************************************************
+ * arch/xen/drivers/netif/backend/main.c
+ *
+ * Back-end of the driver for virtual block devices. This portion of the
+ * driver exports a 'unified' block-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A
+ * reference front-end implementation can be found in:
+ * arch/xen/drivers/netif/frontend
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ */
+
+#include "common.h"
+
+static void netif_page_release(struct page *page);
+static void make_tx_response(netif_t *netif,
+ u16 id,
+ s8 st);
+static int make_rx_response(netif_t *netif,
+ u16 id,
+ s8 st,
+ memory_t addr,
+ u16 size);
+
+static void net_tx_action(unsigned long unused);
+static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
+
+static void net_rx_action(unsigned long unused);
+static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
+
+typedef struct {
+ u16 id;
+ unsigned long old_mach_ptr;
+ unsigned long new_mach_pfn;
+ netif_t *netif;
+} rx_info_t;
+static struct sk_buff_head rx_queue;
+static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2];
+static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE*3];
+static unsigned char rx_notify[NR_EVENT_CHANNELS];
+
+/* Don't currently gate addition of an interface to the tx scheduling list. */
+#define tx_work_exists(_if) (1)
+
+#define MAX_PENDING_REQS 256
+static unsigned long mmap_vstart;
+#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
+
+#define PKT_PROT_LEN (ETH_HLEN + 20)
+
+static u16 pending_id[MAX_PENDING_REQS];
+static netif_t *pending_netif[MAX_PENDING_REQS];
+static u16 pending_ring[MAX_PENDING_REQS];
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+/* Freed TX SKBs get batched on this ring before return to pending_ring. */
+static u16 dealloc_ring[MAX_PENDING_REQS];
+static spinlock_t dealloc_lock = SPIN_LOCK_UNLOCKED;
+static PEND_RING_IDX dealloc_prod, dealloc_cons;
+
+typedef struct {
+ u16 idx;
+ netif_tx_request_t req;
+ netif_t *netif;
+} tx_info_t;
+static struct sk_buff_head tx_queue;
+static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
+
+static struct list_head net_schedule_list;
+static spinlock_t net_schedule_list_lock;
+
+#define MAX_MFN_ALLOC 64
+static unsigned long mfn_list[MAX_MFN_ALLOC];
+static unsigned int alloc_index = 0;
+static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
+
+static void __refresh_mfn_list(void)
+{
+ int ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
+ mfn_list, MAX_MFN_ALLOC);
+ if ( unlikely(ret != MAX_MFN_ALLOC) )
+ {
+ printk(KERN_ALERT "Unable to increase memory reservation (%d)\n", ret);
+ BUG();
+ }
+ alloc_index = MAX_MFN_ALLOC;
+}
+
+static unsigned long get_new_mfn(void)
+{
+ unsigned long mfn, flags;
+ spin_lock_irqsave(&mfn_lock, flags);
+ if ( alloc_index == 0 )
+ __refresh_mfn_list();
+ mfn = mfn_list[--alloc_index];
+ spin_unlock_irqrestore(&mfn_lock, flags);
+ return mfn;
+}
+
+static void dealloc_mfn(unsigned long mfn)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&mfn_lock, flags);
+ if ( alloc_index != MAX_MFN_ALLOC )
+ mfn_list[alloc_index++] = mfn;
+ else
+ (void)HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, &mfn, 1);
+ spin_unlock_irqrestore(&mfn_lock, flags);
+}
+
+static inline void maybe_schedule_tx_action(void)
+{
+ smp_mb();
+ if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+ !list_empty(&net_schedule_list) )
+ tasklet_schedule(&net_tx_tasklet);
+}
+
+/*
+ * This is the primary RECEIVE function for a network interface.
+ * Note that, from the p.o.v. of /this/ OS it looks like a transmit.
+ */
+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ netif_t *netif = (netif_t *)dev->priv;
+
+ /* Drop the packet if the target domain has no receive buffers. */
+ if ( (netif->rx_req_cons == netif->rx->req_prod) ||
+ ((netif->rx_req_cons-netif->rx_resp_prod) == NETIF_RX_RING_SIZE) )
+ goto drop;
+
+ /*
+ * We do not copy the packet unless:
+ * 1. The data is shared; or
+ * 2. It spans a page boundary; or
+ * 3. We cannot be sure the whole data page is allocated.
+ * The copying method is taken from skb_copy().
+ * NB. We also couldn't cope with fragmented packets, but we won't get
+ * any because we not advertise the NETIF_F_SG feature.
+ */
+ if ( skb_shared(skb) || skb_cloned(skb) ||
+ (((unsigned long)skb->end ^ (unsigned long)skb->head) & PAGE_MASK) ||
+ ((skb->end - skb->head) < (PAGE_SIZE/2)) )
+ {
+ struct sk_buff *nskb = alloc_skb(PAGE_SIZE-1024, GFP_ATOMIC);
+ int hlen = skb->data - skb->head;
+ if ( unlikely(nskb == NULL) )
+ goto drop;
+ skb_reserve(nskb, hlen);
+ __skb_put(nskb, skb->len);
+ (void)skb_copy_bits(skb, -hlen, nskb->head, hlen + skb->len);
+ dev_kfree_skb(skb);
+ skb = nskb;
+ }
+
+ ((rx_info_t *)&skb->cb[0])->id =
+ netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_req_cons++)].req.id;
+ ((rx_info_t *)&skb->cb[0])->netif = netif;
+
+ __skb_queue_tail(&rx_queue, skb);
+ tasklet_schedule(&net_rx_tasklet);
+
+ return 0;
+
+ drop:
+ netif->stats.rx_dropped++;
+ dev_kfree_skb(skb);
+ return 0;
+}
+
+#if 0
+static void xen_network_done_notify(void)
+{
+ static struct net_device *eth0_dev = NULL;
+ if ( unlikely(eth0_dev == NULL) )
+ eth0_dev = __dev_get_by_name("eth0");
+ netif_rx_schedule(eth0_dev);
+}
+/*
+ * Add following to poll() function in NAPI driver (Tigon3 is example):
+ * if ( xen_network_done() )
+ * tg3_enable_ints(tp);
+ */
+int xen_network_done(void)
+{
+ return skb_queue_empty(&rx_queue);
+}
+#endif
+
+static void net_rx_action(unsigned long unused)
+{
+ netif_t *netif;
+ s8 status;
+ u16 size, id, evtchn;
+ mmu_update_t *mmu = rx_mmu;
+ multicall_entry_t *mcl;
+ unsigned long vdata, mdata, new_mfn;
+ struct sk_buff_head rxq;
+ struct sk_buff *skb;
+ u16 notify_list[NETIF_RX_RING_SIZE];
+ int notify_nr = 0;
+
+ skb_queue_head_init(&rxq);
+
+ mcl = rx_mcl;
+ while ( (skb = __skb_dequeue(&rx_queue)) != NULL )
+ {
+ netif = ((rx_info_t *)&skb->cb[0])->netif;
+ vdata = (unsigned long)skb->data;
+ mdata = virt_to_machine(vdata);
+ new_mfn = get_new_mfn();
+
+ mmu[0].ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+ mmu[0].val = __pa(vdata) >> PAGE_SHIFT;
+ mmu[1].val = (unsigned long)(netif->domid<<16) & ~0xFFFFUL;
+ mmu[1].ptr = (unsigned long)(netif->domid<< 0) & ~0xFFFFUL;
+ mmu[1].ptr |= MMU_EXTENDED_COMMAND;
+ mmu[1].val |= MMUEXT_SET_SUBJECTDOM;
+ mmu[2].ptr = (mdata & PAGE_MASK) | MMU_EXTENDED_COMMAND;
+ mmu[2].val = MMUEXT_REASSIGN_PAGE;
+
+ mcl[0].op = __HYPERVISOR_update_va_mapping;
+ mcl[0].args[0] = vdata >> PAGE_SHIFT;
+ mcl[0].args[1] = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL;
+ mcl[0].args[2] = 0;
+ mcl[1].op = __HYPERVISOR_mmu_update;
+ mcl[1].args[0] = (unsigned long)mmu;
+ mcl[1].args[1] = 3;
+ mcl[1].args[2] = 0;
+
+ mmu += 3;
+ mcl += 2;
+
+ ((rx_info_t *)&skb->cb[0])->old_mach_ptr = mdata;
+ ((rx_info_t *)&skb->cb[0])->new_mach_pfn = new_mfn;
+ __skb_queue_tail(&rxq, skb);
+
+ /* Filled the batch queue? */
+ if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
+ break;
+ }
+
+ if ( mcl == rx_mcl )
+ return;
+
+ mcl[-2].args[2] = UVMF_FLUSH_TLB;
+ (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
+
+ mcl = rx_mcl;
+ while ( (skb = __skb_dequeue(&rxq)) != NULL )
+ {
+ netif = ((rx_info_t *)&skb->cb[0])->netif;
+ size = skb->tail - skb->data;
+ id = ((rx_info_t *)&skb->cb[0])->id;
+ new_mfn = ((rx_info_t *)&skb->cb[0])->new_mach_pfn;
+ mdata = ((rx_info_t *)&skb->cb[0])->old_mach_ptr;
+
+ /* Check the reassignment error code. */
+ if ( unlikely(mcl[1].args[5] != 0) )
+ {
+ DPRINTK("Failed MMU update transferring to DOM%u\n",
+ netif->domid);
+ (void)HYPERVISOR_update_va_mapping(
+ (unsigned long)skb->head >> PAGE_SHIFT,
+ (pte_t) { (mdata & PAGE_MASK) | __PAGE_KERNEL },
+ UVMF_INVLPG);
+ dealloc_mfn(new_mfn);
+ status = NETIF_RSP_ERROR;
+ }
+ else
+ {
+ phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
+
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+ skb_shinfo(skb)->frag_list = NULL;
+
+ netif->stats.rx_bytes += size;
+ netif->stats.rx_packets++;
+
+ status = NETIF_RSP_OKAY;
+ }
+
+ evtchn = netif->evtchn;
+ if ( make_rx_response(netif, id, status, mdata, size) &&
+ (rx_notify[evtchn] == 0) )
+ {
+ rx_notify[evtchn] = 1;
+ notify_list[notify_nr++] = evtchn;
+ }
+
+ dev_kfree_skb(skb);
+
+ mcl += 2;
+ }
+
+ while ( notify_nr != 0 )
+ {
+ evtchn = notify_list[--notify_nr];
+ rx_notify[evtchn] = 0;
+ notify_via_evtchn(evtchn);
+ }
+
+ /* More work to do? */
+ if ( !skb_queue_empty(&rx_queue) )
+ tasklet_schedule(&net_rx_tasklet);
+#if 0
+ else
+ xen_network_done_notify();
+#endif
+}
+
+struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+{
+ netif_t *netif = dev->priv;
+ return &netif->stats;
+}
+
+static int __on_net_schedule_list(netif_t *netif)
+{
+ return netif->list.next != NULL;
+}
+
+static void remove_from_net_schedule_list(netif_t *netif)
+{
+ spin_lock_irq(&net_schedule_list_lock);
+ if ( likely(__on_net_schedule_list(netif)) )
+ {
+ list_del(&netif->list);
+ netif->list.next = NULL;
+ netif_put(netif);
+ }
+ spin_unlock_irq(&net_schedule_list_lock);
+}
+
+static void add_to_net_schedule_list_tail(netif_t *netif)
+{
+ if ( __on_net_schedule_list(netif) )
+ return;
+
+ spin_lock_irq(&net_schedule_list_lock);
+ if ( !__on_net_schedule_list(netif) && (netif->status == CONNECTED) )
+ {
+ list_add_tail(&netif->list, &net_schedule_list);
+ netif_get(netif);
+ }
+ spin_unlock_irq(&net_schedule_list_lock);
+}
+
+static inline void netif_schedule_work(netif_t *netif)
+{
+ if ( (netif->tx_req_cons != netif->tx->req_prod) &&
+ ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
+ {
+ add_to_net_schedule_list_tail(netif);
+ maybe_schedule_tx_action();
+ }
+}
+
+void netif_deschedule(netif_t *netif)
+{
+ remove_from_net_schedule_list(netif);
+}
+
+#if 0
+static void tx_credit_callback(unsigned long data)
+{
+ netif_t *netif = (netif_t *)data;
+ netif->remaining_credit = netif->credit_bytes;
+ netif_schedule_work(netif);
+}
+#endif
+
+static void net_tx_action(unsigned long unused)
+{
+ struct list_head *ent;
+ struct sk_buff *skb;
+ netif_t *netif;
+ netif_tx_request_t txreq;
+ u16 pending_idx;
+ NETIF_RING_IDX i;
+ struct page *page;
+ multicall_entry_t *mcl;
+
+ if ( (i = dealloc_cons) == dealloc_prod )
+ goto skip_dealloc;
+
+ mcl = tx_mcl;
+ while ( i != dealloc_prod )
+ {
+ pending_idx = dealloc_ring[MASK_PEND_IDX(i++)];
+ mcl[0].op = __HYPERVISOR_update_va_mapping;
+ mcl[0].args[0] = MMAP_VADDR(pending_idx) >> PAGE_SHIFT;
+ mcl[0].args[1] = 0;
+ mcl[0].args[2] = 0;
+ mcl++;
+ }
+
+ mcl[-1].args[2] = UVMF_FLUSH_TLB;
+ (void)HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl);
+
+ while ( dealloc_cons != dealloc_prod )
+ {
+ pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
+
+ netif = pending_netif[pending_idx];
+
+ spin_lock(&netif->tx_lock);
+ make_tx_response(netif, pending_id[pending_idx], NETIF_RSP_OKAY);
+ spin_unlock(&netif->tx_lock);
+
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+
+ /*
+ * Scheduling checks must happen after the above response is posted.
+ * This avoids a possible race with a guest OS on another CPU.
+ */
+ mb();
+ if ( (netif->tx_req_cons != netif->tx->req_prod) &&
+ ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
+ add_to_net_schedule_list_tail(netif);
+
+ netif_put(netif);
+ }
+
+ skip_dealloc:
+ mcl = tx_mcl;
+ while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
+ !list_empty(&net_schedule_list) )
+ {
+ /* Get a netif from the list with work to do. */
+ ent = net_schedule_list.next;
+ netif = list_entry(ent, netif_t, list);
+ netif_get(netif);
+ remove_from_net_schedule_list(netif);
+
+ /* Work to do? */
+ i = netif->tx_req_cons;
+ if ( (i == netif->tx->req_prod) ||
+ ((i-netif->tx_resp_prod) == NETIF_TX_RING_SIZE) )
+ {
+ netif_put(netif);
+ continue;
+ }
+ memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req,
+ sizeof(txreq));
+ netif->tx_req_cons++;
+
+#if 0
+ /* Credit-based scheduling. */
+ if ( tx.size > netif->remaining_credit )
+ {
+ s_time_t now = NOW(), next_credit =
+ netif->credit_timeout.expires + MICROSECS(netif->credit_usec);
+ if ( next_credit <= now )
+ {
+ netif->credit_timeout.expires = now;
+ netif->remaining_credit = netif->credit_bytes;
+ }
+ else
+ {
+ netif->remaining_credit = 0;
+ netif->credit_timeout.expires = next_credit;
+ netif->credit_timeout.data = (unsigned long)netif;
+ netif->credit_timeout.function = tx_credit_callback;
+ netif->credit_timeout.cpu = smp_processor_id();
+ add_ac_timer(&netif->credit_timeout);
+ break;
+ }
+ }
+ netif->remaining_credit -= tx.size;
+#endif
+
+ netif_schedule_work(netif);
+
+ if ( unlikely(txreq.size <= PKT_PROT_LEN) ||
+ unlikely(txreq.size > ETH_FRAME_LEN) )
+ {
+ DPRINTK("Bad packet size: %d\n", txreq.size);
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ continue;
+ }
+
+ /* No crossing a page boundary as the payload mustn't fragment. */
+ if ( unlikely(((txreq.addr & ~PAGE_MASK) + txreq.size) >= PAGE_SIZE) )
+ {
+ DPRINTK("txreq.addr: %lx, size: %u, end: %lu\n",
+ txreq.addr, txreq.size,
+ (txreq.addr &~PAGE_MASK) + txreq.size);
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ continue;
+ }
+
+ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+
+ if ( unlikely((skb = alloc_skb(PKT_PROT_LEN, GFP_ATOMIC)) == NULL) )
+ {
+ DPRINTK("Can't allocate a skb in start_xmit.\n");
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ break;
+ }
+
+ mcl[0].op = __HYPERVISOR_update_va_mapping_otherdomain;
+ mcl[0].args[0] = MMAP_VADDR(pending_idx) >> PAGE_SHIFT;
+ mcl[0].args[1] = (txreq.addr & PAGE_MASK) | __PAGE_KERNEL;
+ mcl[0].args[2] = 0;
+ mcl[0].args[3] = netif->domid;
+ mcl++;
+
+ ((tx_info_t *)&skb->cb[0])->idx = pending_idx;
+ ((tx_info_t *)&skb->cb[0])->netif = netif;
+ memcpy(&((tx_info_t *)&skb->cb[0])->req, &txreq, sizeof(txreq));
+ __skb_queue_tail(&tx_queue, skb);
+
+ pending_cons++;
+
+ /* Filled the batch queue? */
+ if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
+ break;
+ }
+
+ if ( mcl == tx_mcl )
+ return;
+
+ (void)HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl);
+
+ mcl = tx_mcl;
+ while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
+ {
+ pending_idx = ((tx_info_t *)&skb->cb[0])->idx;
+ netif = ((tx_info_t *)&skb->cb[0])->netif;
+ memcpy(&txreq, &((tx_info_t *)&skb->cb[0])->req, sizeof(txreq));
+
+ /* Check the remap error code. */
+ if ( unlikely(mcl[0].args[5] != 0) )
+ {
+ DPRINTK("Bad page frame\n");
+ make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+ netif_put(netif);
+ kfree_skb(skb);
+ mcl++;
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+ continue;
+ }
+
+ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
+ txreq.addr >> PAGE_SHIFT;
+
+ __skb_put(skb, PKT_PROT_LEN);
+ memcpy(skb->data,
+ (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
+ PKT_PROT_LEN);
+
+ page = virt_to_page(MMAP_VADDR(pending_idx));
+
+ /* Append the packet payload as a fragment. */
+ skb_shinfo(skb)->frags[0].page = page;
+ skb_shinfo(skb)->frags[0].size = txreq.size - PKT_PROT_LEN;
+ skb_shinfo(skb)->frags[0].page_offset =
+ (txreq.addr + PKT_PROT_LEN) & ~PAGE_MASK;
+ skb_shinfo(skb)->nr_frags = 1;
+ skb->data_len = txreq.size - PKT_PROT_LEN;
+ skb->len += skb->data_len;
+
+ skb->dev = netif->dev;
+ skb->protocol = eth_type_trans(skb, skb->dev);
+
+ /*
+ * Destructor information. We hideously abuse the 'mapping' pointer,
+ * which isn't otherwise used by us. The page deallocator is modified
+ * to interpret a non-NULL value as a destructor function to be called.
+ * This works okay because in all other cases the pointer must be NULL
+ * when the page is freed (normally Linux will explicitly bug out if
+ * it sees otherwise.
+ */
+ page->mapping = (struct address_space *)netif_page_release;
+ atomic_set(&page->count, 1);
+ pending_id[pending_idx] = txreq.id;
+ pending_netif[pending_idx] = netif;
+
+ netif->stats.tx_bytes += txreq.size;
+ netif->stats.tx_packets++;
+
+ netif_rx(skb);
+ netif->dev->last_rx = jiffies;
+
+ mcl++;
+ }
+}
+
+static void netif_page_release(struct page *page)
+{
+ unsigned long flags;
+ u16 pending_idx = page - virt_to_page(mmap_vstart);
+
+ /* Stop the abuse. */
+ page->mapping = NULL;
+
+ spin_lock_irqsave(&dealloc_lock, flags);
+ dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
+ spin_unlock_irqrestore(&dealloc_lock, flags);
+
+ tasklet_schedule(&net_tx_tasklet);
+}
+
+#if 0
+long flush_bufs_for_netif(netif_t *netif)
+{
+ NET_RING_IDX i;
+
+ /* Return any outstanding receive buffers to the guest OS. */
+ spin_lock(&netif->rx_lock);
+ for ( i = netif->rx_req_cons;
+ (i != netif->rx->req_prod) &&
+ ((i-netif->rx_resp_prod) != NETIF_RX_RING_SIZE);
+ i++ )
+ {
+ make_rx_response(netif,
+ netif->rx->ring[MASK_NETIF_RX_IDX(i)].req.id,
+ NETIF_RSP_DROPPED, 0, 0);
+ }
+ netif->rx_req_cons = i;
+ spin_unlock(&netif->rx_lock);
+
+ /*
+ * Flush pending transmit buffers. The guest may still have to wait for
+ * buffers that are queued at a physical NIC.
+ */
+ spin_lock(&netif->tx_lock);
+ for ( i = netif->tx_req_cons;
+ (i != netif->tx->req_prod) &&
+ ((i-netif->tx_resp_prod) != NETIF_TX_RING_SIZE);
+ i++ )
+ {
+ make_tx_response(netif,
+ netif->tx->ring[MASK_NETIF_TX_IDX(i)].req.id,
+ NETIF_RSP_DROPPED);
+ }
+ netif->tx_req_cons = i;
+ spin_unlock(&netif->tx_lock);
+
+ return 0;
+}
+#endif
+
+void netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+ netif_t *netif = dev_id;
+ if ( tx_work_exists(netif) )
+ {
+ add_to_net_schedule_list_tail(netif);
+ maybe_schedule_tx_action();
+ }
+}
+
+static void make_tx_response(netif_t *netif,
+ u16 id,
+ s8 st)
+{
+ NET_RING_IDX i = netif->tx_resp_prod;
+ netif_tx_response_t *resp;
+
+ resp = &netif->tx->ring[MASK_NETIF_TX_IDX(i)].resp;
+ resp->id = id;
+ resp->status = st;
+ wmb();
+ netif->tx->resp_prod = netif->tx_resp_prod = ++i;
+
+ mb(); /* Update producer before checking event threshold. */
+ if ( i == netif->tx->event )
+ notify_via_evtchn(netif->evtchn);
+}
+
+static int make_rx_response(netif_t *netif,
+ u16 id,
+ s8 st,
+ memory_t addr,
+ u16 size)
+{
+ NET_RING_IDX i = netif->rx_resp_prod;
+ netif_rx_response_t *resp;
+
+ resp = &netif->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
+ resp->addr = addr;
+ resp->id = id;
+ resp->status = (s16)size;
+ if ( st < 0 )
+ resp->status = (s16)st;
+ wmb();
+ netif->rx->resp_prod = netif->rx_resp_prod = ++i;
+
+ mb(); /* Update producer before checking event threshold. */
+ return (i == netif->rx->event);
+}
+
+static void netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+{
+ struct list_head *ent;
+ netif_t *netif;
+ int i = 0;
+
+ printk(KERN_ALERT "netif_schedule_list:\n");
+ spin_lock_irq(&net_schedule_list_lock);
+
+ list_for_each ( ent, &net_schedule_list )
+ {
+ netif = list_entry(ent, netif_t, list);
+ printk(KERN_ALERT " %d: private(rx_req_cons=%08x rx_resp_prod=%08x\n",
+ i, netif->rx_req_cons, netif->rx_resp_prod);
+ printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
+ netif->tx_req_cons, netif->tx_resp_prod);
+ printk(KERN_ALERT " shared(rx_req_prod=%08x rx_resp_prod=%08x\n",
+ netif->rx->req_prod, netif->rx->resp_prod);
+ printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
+ netif->rx->event, netif->tx->req_prod);
+ printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
+ netif->tx->resp_prod, netif->tx->event);
+ i++;
+ }
+
+ spin_unlock_irq(&net_schedule_list_lock);
+ printk(KERN_ALERT " ** End of netif_schedule_list **\n");
+}
+
+static int __init init_module(void)
+{
+ int i;
+
+ if ( !(start_info.flags & SIF_NET_BE_DOMAIN) &&
+ !(start_info.flags & SIF_INITDOMAIN) )
+ return 0;
+
+ printk("Initialising Xen netif backend\n");
+
+ skb_queue_head_init(&rx_queue);
+ skb_queue_head_init(&tx_queue);
+
+ netif_interface_init();
+
+ if ( (mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS)) == 0 )
+ BUG();
+
+ pending_cons = 0;
+ pending_prod = MAX_PENDING_REQS;
+ for ( i = 0; i < MAX_PENDING_REQS; i++ )
+ pending_ring[i] = i;
+
+ spin_lock_init(&net_schedule_list_lock);
+ INIT_LIST_HEAD(&net_schedule_list);
+
+ netif_ctrlif_init();
+
+ (void)request_irq(bind_virq_to_irq(VIRQ_DEBUG),
+ netif_be_dbg, SA_SHIRQ,
+ "net-be-dbg", &netif_be_dbg);
+
+ return 0;
+}
+
+static void cleanup_module(void)
+{
+ BUG();
+}
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/Makefile b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/Makefile
new file mode 100644
index 0000000000..032d02d7cc
--- /dev/null
+++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/Makefile
@@ -0,0 +1,3 @@
+O_TARGET := drv.o
+obj-y := main.o
+include $(TOPDIR)/Rules.make
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/main.c b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/main.c
new file mode 100644
index 0000000000..4d4c579703
--- /dev/null
+++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/main.c
@@ -0,0 +1,777 @@
+/******************************************************************************
+ * arch/xen/drivers/netif/frontend/main.c
+ *
+ * Virtual network driver for XenoLinux.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+#include <asm/evtchn.h>
+#include <asm/ctrl_if.h>
+
+#include <asm/page.h>
+
+#include "../netif.h"
+
+#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
+
+static void network_tx_buf_gc(struct net_device *dev);
+static void network_alloc_rx_buffers(struct net_device *dev);
+static void cleanup_module(void);
+
+static unsigned long rx_pfn_array[NETIF_RX_RING_SIZE];
+static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE+1];
+static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
+
+static struct list_head dev_list;
+
+struct net_private
+{
+ struct list_head list;
+ struct net_device *dev;
+
+ struct net_device_stats stats;
+ NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
+ unsigned int tx_full;
+
+ netif_tx_interface_t *tx;
+ netif_rx_interface_t *rx;
+
+ spinlock_t tx_lock;
+ spinlock_t rx_lock;
+
+ unsigned int handle;
+ unsigned int evtchn;
+ unsigned int irq;
+
+#define NETIF_STATE_CLOSED 0
+#define NETIF_STATE_DISCONNECTED 1
+#define NETIF_STATE_CONNECTED 2
+#define NETIF_STATE_ACTIVE 3
+ unsigned int state;
+
+ /*
+ * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
+ * array is an index into a chain of free entries.
+ */
+ struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1];
+ struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1];
+};
+
+/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
+#define ADD_ID_TO_FREELIST(_list, _id) \
+ (_list)[(_id)] = (_list)[0]; \
+ (_list)[0] = (void *)(unsigned long)(_id);
+#define GET_ID_FROM_FREELIST(_list) \
+ ({ unsigned long _id = (unsigned long)(_list)[0]; \
+ (_list)[0] = (_list)[_id]; \
+ (unsigned short)_id; })
+
+static struct net_device *find_dev_by_handle(unsigned int handle)
+{
+ struct list_head *ent;
+ struct net_private *np;
+ list_for_each ( ent, &dev_list )
+ {
+ np = list_entry(ent, struct net_private, list);
+ if ( np->handle == handle )
+ return np->dev;
+ }
+ return NULL;
+}
+
+
+static int network_open(struct net_device *dev)
+{
+ struct net_private *np = dev->priv;
+ int i;
+
+ if ( np->state != NETIF_STATE_CONNECTED )
+ return -EINVAL;
+
+ np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
+ memset(&np->stats, 0, sizeof(np->stats));
+ spin_lock_init(&np->tx_lock);
+ spin_lock_init(&np->rx_lock);
+
+ /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
+ for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ )
+ np->tx_skbs[i] = (void *)(i+1);
+ for ( i = 0; i <= NETIF_RX_RING_SIZE; i++ )
+ np->rx_skbs[i] = (void *)(i+1);
+
+ wmb();
+ np->state = NETIF_STATE_ACTIVE;
+
+ network_alloc_rx_buffers(dev);
+ np->rx->event = np->rx_resp_cons + 1;
+
+ netif_start_queue(dev);
+
+ MOD_INC_USE_COUNT;
+
+ return 0;
+}
+
+
+static void network_tx_buf_gc(struct net_device *dev)
+{
+ NETIF_RING_IDX i, prod;
+ unsigned short id;
+ struct net_private *np = dev->priv;
+ struct sk_buff *skb;
+
+ do {
+ prod = np->tx->resp_prod;
+
+ for ( i = np->tx_resp_cons; i != prod; i++ )
+ {
+ id = np->tx->ring[MASK_NET_TX_IDX(i)].resp.id;
+ skb = np->tx_skbs[id];
+ ADD_ID_TO_FREELIST(np->tx_skbs, id);
+ dev_kfree_skb_any(skb);
+ }
+
+ np->tx_resp_cons = prod;
+
+ /*
+ * Set a new event, then check for race with update of tx_cons. Note
+ * that it is essential to schedule a callback, no matter how few
+ * buffers are pending. Even if there is space in the transmit ring,
+ * higher layers may be blocked because too much data is outstanding:
+ * in such cases notification from Xen is likely to be the only kick
+ * that we'll get.
+ */
+ np->tx->event =
+ prod + ((np->tx->req_prod - prod) >> 1) + 1;
+ mb();
+ }
+ while ( prod != np->tx->resp_prod );
+
+ if ( np->tx_full &&
+ ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE) )
+ {
+ np->tx_full = 0;
+ if ( np->state == NETIF_STATE_ACTIVE )
+ netif_wake_queue(dev);
+ }
+}
+
+
+static void network_alloc_rx_buffers(struct net_device *dev)
+{
+ unsigned short id;
+ struct net_private *np = dev->priv;
+ struct sk_buff *skb;
+ NETIF_RING_IDX i = np->rx->req_prod;
+ int nr_pfns = 0;
+
+ /* Make sure the batch is large enough to be worthwhile (1/2 ring). */
+ if ( unlikely((i - np->rx_resp_cons) > (NETIF_RX_RING_SIZE/2)) ||
+ unlikely(np->state != NETIF_STATE_ACTIVE) )
+ return;
+
+ do {
+ skb = dev_alloc_skb(RX_BUF_SIZE);
+ if ( unlikely(skb == NULL) )
+ break;
+
+ skb->dev = dev;
+
+ if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) )
+ panic("alloc_skb needs to provide us page-aligned buffers.");
+
+ id = GET_ID_FROM_FREELIST(np->rx_skbs);
+
+ np->rx_skbs[id] = skb;
+
+ np->rx->ring[MASK_NET_RX_IDX(i)].req.id = id;
+
+ rx_pfn_array[nr_pfns] = virt_to_machine(skb->head) >> PAGE_SHIFT;
+
+ rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping;
+ rx_mcl[nr_pfns].args[0] = (unsigned long)skb->head >> PAGE_SHIFT;
+ rx_mcl[nr_pfns].args[1] = 0;
+ rx_mcl[nr_pfns].args[2] = 0;
+
+ nr_pfns++;
+ }
+ while ( (++i - np->rx_resp_cons) != NETIF_RX_RING_SIZE );
+
+ /*
+ * We may have allocated buffers which have entries outstanding in the page
+ * update queue -- make sure we flush those first!
+ */
+ flush_page_update_queue();
+
+ /* After all PTEs have been zapped we blow away stale TLB entries. */
+ rx_mcl[nr_pfns-1].args[2] = UVMF_FLUSH_TLB;
+
+ /* Give away a batch of pages. */
+ rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
+ rx_mcl[nr_pfns].args[0] = MEMOP_decrease_reservation;
+ rx_mcl[nr_pfns].args[1] = (unsigned long)rx_pfn_array;
+ rx_mcl[nr_pfns].args[2] = (unsigned long)nr_pfns;
+
+ /* Zap PTEs and give away pages in one big multicall. */
+ (void)HYPERVISOR_multicall(rx_mcl, nr_pfns+1);
+
+ /* Check return status of HYPERVISOR_dom_mem_op(). */
+ if ( rx_mcl[nr_pfns].args[5] != nr_pfns )
+ panic("Unable to reduce memory reservation\n");
+
+ np->rx->req_prod = i;
+}
+
+
+static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ unsigned short id;
+ struct net_private *np = (struct net_private *)dev->priv;
+ netif_tx_request_t *tx;
+ NETIF_RING_IDX i;
+
+ if ( unlikely(np->tx_full) )
+ {
+ printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
+ netif_stop_queue(dev);
+ return -ENOBUFS;
+ }
+
+ if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
+ PAGE_SIZE) )
+ {
+ struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
+ if ( unlikely(new_skb == NULL) )
+ return 1;
+ skb_put(new_skb, skb->len);
+ memcpy(new_skb->data, skb->data, skb->len);
+ dev_kfree_skb(skb);
+ skb = new_skb;
+ }
+
+ spin_lock_irq(&np->tx_lock);
+
+ /* if the backend isn't available then don't do anything! */
+ if ( !netif_carrier_ok(dev) )
+ {
+ spin_unlock_irq(&np->tx_lock);
+ return 1;
+ }
+
+ i = np->tx->req_prod;
+
+ id = GET_ID_FROM_FREELIST(np->tx_skbs);
+ np->tx_skbs[id] = skb;
+
+ tx = &np->tx->ring[MASK_NET_TX_IDX(i)].req;
+
+ tx->id = id;
+ tx->addr = virt_to_machine(skb->data);
+ tx->size = skb->len;
+
+ wmb();
+ np->tx->req_prod = i + 1;
+
+ network_tx_buf_gc(dev);
+
+ if ( (i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1) )
+ {
+ np->tx_full = 1;
+ netif_stop_queue(dev);
+ }
+
+ spin_unlock_irq(&np->tx_lock);
+
+ np->stats.tx_bytes += skb->len;
+ np->stats.tx_packets++;
+
+ /* Only notify Xen if there are no outstanding responses. */
+ mb();
+ if ( np->tx->resp_prod == i )
+ notify_via_evtchn(np->evtchn);
+
+ return 0;
+}
+
+
+static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ struct net_device *dev = dev_id;
+ struct net_private *np = dev->priv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&np->tx_lock, flags);
+
+ if( !netif_carrier_ok(dev) )
+ {
+ spin_unlock_irqrestore(&np->tx_lock, flags);
+ return;
+ }
+
+ network_tx_buf_gc(dev);
+ spin_unlock_irqrestore(&np->tx_lock, flags);
+
+ if ( np->rx_resp_cons != np->rx->resp_prod )
+ netif_rx_schedule(dev);
+}
+
+
+static int netif_poll(struct net_device *dev, int *pbudget)
+{
+ struct net_private *np = dev->priv;
+ struct sk_buff *skb;
+ netif_rx_response_t *rx;
+ NETIF_RING_IDX i;
+ mmu_update_t *mmu = rx_mmu;
+ multicall_entry_t *mcl = rx_mcl;
+ int work_done, budget, more_to_do = 1;
+ struct sk_buff_head rxq;
+ unsigned long flags;
+
+ spin_lock(&np->rx_lock);
+
+ /* if the device is undergoing recovery then don't do anything */
+ if ( !netif_carrier_ok(dev) )
+ {
+ spin_unlock(&np->rx_lock);
+ return 0;
+ }
+
+ skb_queue_head_init(&rxq);
+
+ if ( (budget = *pbudget) > dev->quota )
+ budget = dev->quota;
+
+ for ( i = np->rx_resp_cons, work_done = 0;
+ (i != np->rx->resp_prod) && (work_done < budget);
+ i++, work_done++ )
+ {
+ rx = &np->rx->ring[MASK_NET_RX_IDX(i)].resp;
+
+ skb = np->rx_skbs[rx->id];
+ ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
+
+ if ( unlikely(rx->status <= 0) )
+ {
+ /* Gate this error. We get a (valid) slew of them on suspend. */
+ if ( np->state == NETIF_STATE_ACTIVE )
+ printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
+ dev_kfree_skb(skb);
+ continue;
+ }
+
+ skb->data = skb->tail = skb->head + (rx->addr & ~PAGE_MASK);
+ skb_put(skb, rx->status);
+
+ np->stats.rx_packets++;
+ np->stats.rx_bytes += rx->status;
+
+ /* Remap the page. */
+ mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
+ mmu->val = __pa(skb->head) >> PAGE_SHIFT;
+ mmu++;
+ mcl->op = __HYPERVISOR_update_va_mapping;
+ mcl->args[0] = (unsigned long)skb->head >> PAGE_SHIFT;
+ mcl->args[1] = (rx->addr & PAGE_MASK) | __PAGE_KERNEL;
+ mcl->args[2] = 0;
+ mcl++;
+
+ phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] =
+ rx->addr >> PAGE_SHIFT;
+
+ __skb_queue_tail(&rxq, skb);
+ }
+
+ /* Do all the remapping work, and M->P updates, in one big hypercall. */
+ if ( likely((mcl - rx_mcl) != 0) )
+ {
+ mcl->op = __HYPERVISOR_mmu_update;
+ mcl->args[0] = (unsigned long)rx_mmu;
+ mcl->args[1] = mmu - rx_mmu;
+ mcl->args[2] = 0;
+ mcl++;
+ (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
+ }
+
+ while ( (skb = __skb_dequeue(&rxq)) != NULL )
+ {
+ /* Set the shared-info area, which is hidden behind the real data. */
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
+ skb_shinfo(skb)->nr_frags = 0;
+ skb_shinfo(skb)->frag_list = NULL;
+
+ /* Ethernet-specific work. Delayed to here as it peeks the header. */
+ skb->protocol = eth_type_trans(skb, dev);
+
+ /* Pass it up. */
+ netif_rx(skb);
+ dev->last_rx = jiffies;
+ }
+
+ np->rx_resp_cons = i;
+
+ network_alloc_rx_buffers(dev);
+
+ *pbudget -= work_done;
+ dev->quota -= work_done;
+
+ if ( work_done < budget )
+ {
+ local_irq_save(flags);
+
+ np->rx->event = i + 1;
+
+ /* Deal with hypervisor racing our resetting of rx_event. */
+ mb();
+ if ( np->rx->resp_prod == i )
+ {
+ __netif_rx_complete(dev);
+ more_to_do = 0;
+ }
+
+ local_irq_restore(flags);
+ }
+
+ spin_unlock(&np->rx_lock);
+
+ return more_to_do;
+}
+
+
+static int network_close(struct net_device *dev)
+{
+ struct net_private *np = dev->priv;
+
+ netif_stop_queue(np->dev);
+
+ np->state = NETIF_STATE_CONNECTED;
+
+ /* XXX We need to properly disconnect via the domain controller. */
+ while ( /*(np->rx_resp_cons != np->rx->req_prod) ||*/
+ (np->tx_resp_cons != np->tx->req_prod) )
+ {
+ barrier();
+ current->state = TASK_INTERRUPTIBLE;
+ schedule_timeout(1);
+ }
+
+ MOD_DEC_USE_COUNT;
+
+ return 0;
+}
+
+
+static struct net_device_stats *network_get_stats(struct net_device *dev)
+{
+ struct net_private *np = (struct net_private *)dev->priv;
+ return &np->stats;
+}
+
+
+static void netif_status_change(netif_fe_interface_status_changed_t *status)
+{
+ ctrl_msg_t cmsg;
+ netif_fe_interface_connect_t up;
+ struct net_device *dev;
+ struct net_private *np;
+
+ if ( status->handle != 0 )
+ {
+ printk(KERN_WARNING "Status change on unsupported netif %d\n",
+ status->handle);
+ return;
+ }
+
+ dev = find_dev_by_handle(0);
+ np = dev->priv;
+
+ switch ( status->status )
+ {
+ case NETIF_INTERFACE_STATUS_DESTROYED:
+ printk(KERN_WARNING "Unexpected netif-DESTROYED message in state %d\n",
+ np->state);
+ break;
+
+ case NETIF_INTERFACE_STATUS_DISCONNECTED:
+ if ( np->state != NETIF_STATE_CLOSED )
+ {
+ printk(KERN_WARNING "Unexpected netif-DISCONNECTED message"
+ " in state %d\n", np->state);
+ printk(KERN_INFO "Attempting to reconnect network interface\n");
+
+ /* Begin interface recovery.
+ *
+ * NB. Whilst we're recovering, we turn the carrier state off. We
+ * take measures to ensure that this device isn't used for
+ * anything. We also stop the queue for this device. Various
+ * different approaches (e.g. continuing to buffer packets) have
+ * been tested but don't appear to improve the overall impact on
+ * TCP connections.
+ *
+ * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery
+ * is initiated by a special "RESET" message - disconnect could
+ * just mean we're not allowed to use this interface any more.
+ */
+
+ /* Stop old i/f to prevent errors whilst we rebuild the state. */
+ spin_lock_irq(&np->tx_lock);
+ spin_lock(&np->rx_lock);
+ netif_stop_queue(dev);
+ netif_carrier_off(dev);
+ np->state = NETIF_STATE_DISCONNECTED;
+ spin_unlock(&np->rx_lock);
+ spin_unlock_irq(&np->tx_lock);
+
+ /* Free resources. */
+ free_irq(np->irq, dev);
+ unbind_evtchn_from_irq(np->evtchn);
+ free_page((unsigned long)np->tx);
+ free_page((unsigned long)np->rx);
+ }
+
+ /* Move from CLOSED to DISCONNECTED state. */
+ np->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
+ np->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL);
+ memset(np->tx, 0, PAGE_SIZE);
+ memset(np->rx, 0, PAGE_SIZE);
+ np->state = NETIF_STATE_DISCONNECTED;
+
+ /* Construct an interface-CONNECT message for the domain controller. */
+ cmsg.type = CMSG_NETIF_FE;
+ cmsg.subtype = CMSG_NETIF_FE_INTERFACE_CONNECT;
+ cmsg.length = sizeof(netif_fe_interface_connect_t);
+ up.handle = 0;
+ up.tx_shmem_frame = virt_to_machine(np->tx) >> PAGE_SHIFT;
+ up.rx_shmem_frame = virt_to_machine(np->rx) >> PAGE_SHIFT;
+ memcpy(cmsg.msg, &up, sizeof(up));
+
+ /* Tell the controller to bring up the interface. */
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+ break;
+
+ case NETIF_INTERFACE_STATUS_CONNECTED:
+ if ( np->state == NETIF_STATE_CLOSED )
+ {
+ printk(KERN_WARNING "Unexpected netif-CONNECTED message"
+ " in state %d\n", np->state);
+ break;
+ }
+
+ memcpy(dev->dev_addr, status->mac, ETH_ALEN);
+
+ if(netif_carrier_ok(dev))
+ np->state = NETIF_STATE_CONNECTED;
+ else
+ {
+ int i, requeue_idx;
+ netif_tx_request_t *tx;
+
+ spin_lock_irq(&np->rx_lock);
+ spin_lock(&np->tx_lock);
+
+ /* Recovery procedure: */
+
+ /* Step 1: Reinitialise variables. */
+ np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
+ np->rx->event = 1;
+
+ /* Step 2: Rebuild the RX and TX ring contents.
+ * NB. We could just free the queued TX packets now but we hope
+ * that sending them out might do some good. We have to rebuild
+ * the RX ring because some of our pages are currently flipped out
+ * so we can't just free the RX skbs.
+ * NB2. Freelist index entries are always going to be less than
+ * __PAGE_OFFSET, whereas pointers to skbs will always be equal or
+ * greater than __PAGE_OFFSET: we use this property to distinguish
+ * them.
+ */
+
+ /* Rebuild the TX buffer freelist and the TX ring itself.
+ * NB. This reorders packets. We could keep more private state
+ * to avoid this but maybe it doesn't matter so much given the
+ * interface has been down.
+ */
+ for ( requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++ )
+ {
+ if ( (unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET )
+ {
+ struct sk_buff *skb = np->tx_skbs[i];
+
+ tx = &np->tx->ring[requeue_idx++].req;
+
+ tx->id = i;
+ tx->addr = virt_to_machine(skb->data);
+ tx->size = skb->len;
+
+ np->stats.tx_bytes += skb->len;
+ np->stats.tx_packets++;
+ }
+ }
+ wmb();
+ np->tx->req_prod = requeue_idx;
+
+ /* Rebuild the RX buffer freelist and the RX ring itself. */
+ for ( requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++ )
+ if ( (unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET )
+ np->rx->ring[requeue_idx++].req.id = i;
+ wmb();
+ np->rx->req_prod = requeue_idx;
+
+ /* Step 3: All public and private state should now be sane. Get
+ * ready to start sending and receiving packets and give the driver
+ * domain a kick because we've probably just requeued some
+ * packets.
+ */
+ netif_carrier_on(dev);
+ netif_start_queue(dev);
+ np->state = NETIF_STATE_ACTIVE;
+
+ notify_via_evtchn(status->evtchn);
+
+ network_tx_buf_gc(dev);
+
+ printk(KERN_INFO "Recovery completed\n");
+
+ spin_unlock(&np->tx_lock);
+ spin_unlock_irq(&np->rx_lock);
+ }
+
+ np->evtchn = status->evtchn;
+ np->irq = bind_evtchn_to_irq(np->evtchn);
+ (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM,
+ dev->name, dev);
+ break;
+
+ default:
+ printk(KERN_WARNING "Status change to unknown value %d\n",
+ status->status);
+ break;
+ }
+}
+
+
+static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ switch ( msg->subtype )
+ {
+ case CMSG_NETIF_FE_INTERFACE_STATUS_CHANGED:
+ if ( msg->length != sizeof(netif_fe_interface_status_changed_t) )
+ goto parse_error;
+ netif_status_change((netif_fe_interface_status_changed_t *)
+ &msg->msg[0]);
+ break;
+ default:
+ goto parse_error;
+ }
+
+ ctrl_if_send_response(msg);
+ return;
+
+ parse_error:
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
+
+
+static int __init init_module(void)
+{
+ ctrl_msg_t cmsg;
+ netif_fe_driver_status_changed_t st;
+ int err;
+ struct net_device *dev;
+ struct net_private *np;
+
+ if ( start_info.flags & SIF_INITDOMAIN
+ || start_info.flags & SIF_NET_BE_DOMAIN )
+ return 0;
+
+ printk("Initialising Xen virtual ethernet frontend driver");
+
+ INIT_LIST_HEAD(&dev_list);
+
+ if ( (dev = alloc_etherdev(sizeof(struct net_private))) == NULL )
+ {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ np = dev->priv;
+ np->state = NETIF_STATE_CLOSED;
+ np->handle = 0;
+
+ dev->open = network_open;
+ dev->hard_start_xmit = network_start_xmit;
+ dev->stop = network_close;
+ dev->get_stats = network_get_stats;
+ dev->poll = netif_poll;
+ dev->weight = 64;
+
+ if ( (err = register_netdev(dev)) != 0 )
+ {
+ kfree(dev);
+ goto fail;
+ }
+
+ np->dev = dev;
+ list_add(&np->list, &dev_list);
+
+ (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_NETIF_FE;
+ cmsg.subtype = CMSG_NETIF_FE_DRIVER_STATUS_CHANGED;
+ cmsg.length = sizeof(netif_fe_driver_status_changed_t);
+ st.status = NETIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &st, sizeof(st));
+
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+ /*
+ * We should read 'nr_interfaces' from response message and wait
+ * for notifications before proceeding. For now we assume that we
+ * will be notified of exactly one interface.
+ */
+ while ( np->state != NETIF_STATE_CONNECTED )
+ {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+
+ return 0;
+
+ fail:
+ cleanup_module();
+ return err;
+}
+
+
+static void cleanup_module(void)
+{
+ /* XXX FIXME */
+ BUG();
+}
+
+
+module_init(init_module);
+module_exit(cleanup_module);
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/netif.h b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/netif.h
new file mode 100644
index 0000000000..098b292612
--- /dev/null
+++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/netif.h
@@ -0,0 +1,88 @@
+/******************************************************************************
+ * netif.h
+ *
+ * Unified network-device I/O interface for Xen guest OSes.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __SHARED_NETIF_H__
+#define __SHARED_NETIF_H__
+
+typedef struct {
+ memory_t addr; /* 0: Machine address of packet. */
+ MEMORY_PADDING;
+ u16 id; /* 8: Echoed in response message. */
+ u16 size; /* 10: Packet size in bytes. */
+} PACKED netif_tx_request_t; /* 12 bytes */
+
+typedef struct {
+ u16 id; /* 0 */
+ s8 status; /* 2 */
+ u8 __pad; /* 3 */
+} PACKED netif_tx_response_t; /* 4 bytes */
+
+typedef struct {
+ u16 id; /* 0: Echoed in response message. */
+} PACKED netif_rx_request_t; /* 2 bytes */
+
+typedef struct {
+ memory_t addr; /* 0: Machine address of packet. */
+ MEMORY_PADDING;
+ u16 id; /* 8: */
+ s16 status; /* 10: -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
+} PACKED netif_rx_response_t; /* 12 bytes */
+
+/*
+ * We use a special capitalised type name because it is _essential_ that all
+ * arithmetic on indexes is done on an integer type of the correct size.
+ */
+typedef u32 NETIF_RING_IDX;
+
+/*
+ * Ring indexes are 'free running'. That is, they are not stored modulo the
+ * size of the ring buffer. The following macros convert a free-running counter
+ * into a value that can directly index a ring-buffer array.
+ */
+#define MASK_NETIF_RX_IDX(_i) ((_i)&(NETIF_RX_RING_SIZE-1))
+#define MASK_NETIF_TX_IDX(_i) ((_i)&(NETIF_TX_RING_SIZE-1))
+
+#define NETIF_TX_RING_SIZE 256
+#define NETIF_RX_RING_SIZE 256
+
+/* This structure must fit in a memory page. */
+typedef struct {
+ /*
+ * Frontend places packets into ring at tx_req_prod.
+ * Frontend receives event when tx_resp_prod passes tx_event.
+ */
+ NETIF_RING_IDX req_prod; /* 0 */
+ NETIF_RING_IDX resp_prod; /* 4 */
+ NETIF_RING_IDX event; /* 8 */
+ union { /* 12 */
+ netif_tx_request_t req;
+ netif_tx_response_t resp;
+ } PACKED ring[NETIF_TX_RING_SIZE];
+} PACKED netif_tx_interface_t;
+
+/* This structure must fit in a memory page. */
+typedef struct {
+ /*
+ * Frontend places empty buffers into ring at rx_req_prod.
+ * Frontend receives event when rx_resp_prod passes rx_event.
+ */
+ NETIF_RING_IDX req_prod; /* 0 */
+ NETIF_RING_IDX resp_prod; /* 4 */
+ NETIF_RING_IDX event; /* 8 */
+ union { /* 12 */
+ netif_rx_request_t req;
+ netif_rx_response_t resp;
+ } PACKED ring[NETIF_RX_RING_SIZE];
+} PACKED netif_rx_interface_t;
+
+/* Descriptor status values */
+#define NETIF_RSP_DROPPED -2
+#define NETIF_RSP_ERROR -1
+#define NETIF_RSP_OKAY 0
+
+#endif