diff options
Diffstat (limited to 'linux-2.4.26-xen-sparse/arch/xen/drivers/netif')
9 files changed, 2118 insertions, 0 deletions
diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/Makefile b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/Makefile new file mode 100644 index 0000000000..20c8192d3d --- /dev/null +++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/Makefile @@ -0,0 +1,10 @@ + +O_TARGET := drv.o + +subdir-y += frontend +obj-y += frontend/drv.o + +subdir-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend +obj-$(CONFIG_XEN_PHYSDEV_ACCESS) += backend/drv.o + +include $(TOPDIR)/Rules.make diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/Makefile b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/Makefile new file mode 100644 index 0000000000..9ffb0bd702 --- /dev/null +++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-y := main.o control.o interface.o +include $(TOPDIR)/Rules.make diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/common.h b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/common.h new file mode 100644 index 0000000000..88881cdf66 --- /dev/null +++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/common.h @@ -0,0 +1,96 @@ +/****************************************************************************** + * arch/xen/drivers/netif/backend/common.h + */ + +#ifndef __NETIF__BACKEND__COMMON_H__ +#define __NETIF__BACKEND__COMMON_H__ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <asm/ctrl_if.h> +#include <asm/io.h> +#include "../netif.h" +#include "../../../../../net/bridge/br_private.h" + +#ifndef NDEBUG +#define ASSERT(_p) \ + if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#define DPRINTK(_f, _a...) printk("(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) +#else +#define ASSERT(_p) ((void)0) +#define DPRINTK(_f, _a...) ((void)0) +#endif + +typedef struct netif_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + + /* Physical parameters of the comms window. */ + unsigned long tx_shmem_frame; + unsigned long rx_shmem_frame; + unsigned int evtchn; + int irq; + + /* The shared rings and indexes. */ + netif_tx_interface_t *tx; + netif_rx_interface_t *rx; + + /* Private indexes into shared ring. */ + NETIF_RING_IDX rx_req_cons; + NETIF_RING_IDX rx_resp_prod; /* private version of shared variable */ + NETIF_RING_IDX tx_req_cons; + NETIF_RING_IDX tx_resp_prod; /* private version of shared variable */ + + /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */ + unsigned long credit_bytes; + unsigned long credit_usec; + unsigned long remaining_credit; + struct timer_list credit_timeout; + + /* Miscellaneous private stuff. */ + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; + /* + * DISCONNECT response is deferred until pending requests are ack'ed. + * We therefore need to store the id from the original request. + */ + u8 disconnect_rspid; + struct netif_st *hash_next; + struct list_head list; /* scheduling list */ + atomic_t refcnt; + spinlock_t rx_lock, tx_lock; + struct net_device *dev; + struct net_device_stats stats; +} netif_t; + +void netif_create(netif_be_create_t *create); +void netif_destroy(netif_be_destroy_t *destroy); +void netif_connect(netif_be_connect_t *connect); +int netif_disconnect(netif_be_disconnect_t *disconnect, u8 rsp_id); +void __netif_disconnect_complete(netif_t *netif); +netif_t *netif_find_by_handle(domid_t domid, unsigned int handle); +#define netif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define netif_put(_b) \ + do { \ + if ( atomic_dec_and_test(&(_b)->refcnt) ) \ + __netif_disconnect_complete(_b); \ + } while (0) + +void netif_interface_init(void); +void netif_ctrlif_init(void); + +void netif_deschedule(netif_t *netif); + +int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev); +struct net_device_stats *netif_be_get_stats(struct net_device *dev); +void netif_be_int(int irq, void *dev_id, struct pt_regs *regs); + +#endif /* __NETIF__BACKEND__COMMON_H__ */ diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/control.c b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/control.c new file mode 100644 index 0000000000..cf1b075031 --- /dev/null +++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/control.c @@ -0,0 +1,65 @@ +/****************************************************************************** + * arch/xen/drivers/netif/backend/control.c + * + * Routines for interfacing with the control plane. + * + * Copyright (c) 2004, Keir Fraser + */ + +#include "common.h" + +static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) +{ + switch ( msg->subtype ) + { + case CMSG_NETIF_BE_CREATE: + if ( msg->length != sizeof(netif_be_create_t) ) + goto parse_error; + netif_create((netif_be_create_t *)&msg->msg[0]); + break; + case CMSG_NETIF_BE_DESTROY: + if ( msg->length != sizeof(netif_be_destroy_t) ) + goto parse_error; + netif_destroy((netif_be_destroy_t *)&msg->msg[0]); + break; + case CMSG_NETIF_BE_CONNECT: + if ( msg->length != sizeof(netif_be_connect_t) ) + goto parse_error; + netif_connect((netif_be_connect_t *)&msg->msg[0]); + break; + case CMSG_NETIF_BE_DISCONNECT: + if ( msg->length != sizeof(netif_be_disconnect_t) ) + goto parse_error; + if ( !netif_disconnect((netif_be_disconnect_t *)&msg->msg[0],msg->id) ) + return; /* Sending the response is deferred until later. */ + break; + default: + goto parse_error; + } + + ctrl_if_send_response(msg); + return; + + parse_error: + DPRINTK("Parse error while reading message subtype %d, len %d\n", + msg->subtype, msg->length); + msg->length = 0; + ctrl_if_send_response(msg); +} + +void netif_ctrlif_init(void) +{ + ctrl_msg_t cmsg; + netif_be_driver_status_changed_t st; + + (void)ctrl_if_register_receiver(CMSG_NETIF_BE, netif_ctrlif_rx, + CALLBACK_IN_BLOCKING_CONTEXT); + + /* Send a driver-UP notification to the domain controller. */ + cmsg.type = CMSG_NETIF_BE; + cmsg.subtype = CMSG_NETIF_BE_DRIVER_STATUS_CHANGED; + cmsg.length = sizeof(netif_be_driver_status_changed_t); + st.status = NETIF_DRIVER_STATUS_UP; + memcpy(cmsg.msg, &st, sizeof(st)); + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); +} diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/interface.c b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/interface.c new file mode 100644 index 0000000000..5a2da3d29b --- /dev/null +++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/interface.c @@ -0,0 +1,304 @@ +/****************************************************************************** + * arch/xen/drivers/netif/backend/interface.c + * + * Network-device interface management. + * + * Copyright (c) 2004, Keir Fraser + */ + +#include "common.h" +#include <linux/rtnetlink.h> + +#define NETIF_HASHSZ 1024 +#define NETIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(NETIF_HASHSZ-1)) + +static netif_t *netif_hash[NETIF_HASHSZ]; +static struct net_device *bridge_dev; +static struct net_bridge *bridge_br; + +netif_t *netif_find_by_handle(domid_t domid, unsigned int handle) +{ + netif_t *netif = netif_hash[NETIF_HASH(domid, handle)]; + while ( (netif != NULL) && + ((netif->domid != domid) || (netif->handle != handle)) ) + netif = netif->hash_next; + return netif; +} + +void __netif_disconnect_complete(netif_t *netif) +{ + ctrl_msg_t cmsg; + netif_be_disconnect_t disc; + + /* + * These can't be done in __netif_disconnect() because at that point there + * may be outstanding requests at the disc whose asynchronous responses + * must still be notified to the remote driver. + */ + unbind_evtchn_from_irq(netif->evtchn); + vfree(netif->tx); /* Frees netif->rx as well. */ + rtnl_lock(); + (void)br_del_if(bridge_br, netif->dev); + (void)dev_close(netif->dev); + rtnl_unlock(); + + /* Construct the deferred response message. */ + cmsg.type = CMSG_NETIF_BE; + cmsg.subtype = CMSG_NETIF_BE_DISCONNECT; + cmsg.id = netif->disconnect_rspid; + cmsg.length = sizeof(netif_be_disconnect_t); + disc.domid = netif->domid; + disc.netif_handle = netif->handle; + disc.status = NETIF_BE_STATUS_OKAY; + memcpy(cmsg.msg, &disc, sizeof(disc)); + + /* + * Make sure message is constructed /before/ status change, because + * after the status change the 'netif' structure could be deallocated at + * any time. Also make sure we send the response /after/ status change, + * as otherwise a subsequent CONNECT request could spuriously fail if + * another CPU doesn't see the status change yet. + */ + mb(); + if ( netif->status != DISCONNECTING ) + BUG(); + netif->status = DISCONNECTED; + mb(); + + /* Send the successful response. */ + ctrl_if_send_response(&cmsg); +} + +void netif_create(netif_be_create_t *create) +{ + domid_t domid = create->domid; + unsigned int handle = create->netif_handle; + struct net_device *dev; + netif_t **pnetif, *netif; + char name[IFNAMSIZ]; + + snprintf(name, IFNAMSIZ, "vif%u.%u", domid, handle); + dev = alloc_netdev(sizeof(netif_t), name, ether_setup); + if ( dev == NULL ) + { + DPRINTK("Could not create netif: out of memory\n"); + create->status = NETIF_BE_STATUS_OUT_OF_MEMORY; + return; + } + + netif = dev->priv; + memset(netif, 0, sizeof(*netif)); + netif->domid = domid; + netif->handle = handle; + netif->status = DISCONNECTED; + spin_lock_init(&netif->rx_lock); + spin_lock_init(&netif->tx_lock); + atomic_set(&netif->refcnt, 0); + netif->dev = dev; + + netif->credit_bytes = netif->remaining_credit = ~0UL; + netif->credit_usec = 0UL; + /*init_ac_timer(&new_vif->credit_timeout);*/ + + pnetif = &netif_hash[NETIF_HASH(domid, handle)]; + while ( *pnetif != NULL ) + { + if ( ((*pnetif)->domid == domid) && ((*pnetif)->handle == handle) ) + { + DPRINTK("Could not create netif: already exists\n"); + create->status = NETIF_BE_STATUS_INTERFACE_EXISTS; + kfree(dev); + return; + } + pnetif = &(*pnetif)->hash_next; + } + + dev->hard_start_xmit = netif_be_start_xmit; + dev->get_stats = netif_be_get_stats; + memcpy(dev->dev_addr, create->mac, ETH_ALEN); + + /* Disable queuing. */ + dev->tx_queue_len = 0; + + /* XXX In bridge mode we should force a different MAC from remote end. */ + dev->dev_addr[2] ^= 1; + + if ( register_netdev(dev) != 0 ) + { + DPRINTK("Could not register new net device\n"); + create->status = NETIF_BE_STATUS_OUT_OF_MEMORY; + kfree(dev); + return; + } + + netif->hash_next = *pnetif; + *pnetif = netif; + + DPRINTK("Successfully created netif\n"); + create->status = NETIF_BE_STATUS_OKAY; +} + +void netif_destroy(netif_be_destroy_t *destroy) +{ + domid_t domid = destroy->domid; + unsigned int handle = destroy->netif_handle; + netif_t **pnetif, *netif; + + pnetif = &netif_hash[NETIF_HASH(domid, handle)]; + while ( (netif = *pnetif) != NULL ) + { + if ( (netif->domid == domid) && (netif->handle == handle) ) + { + if ( netif->status != DISCONNECTED ) + goto still_connected; + goto destroy; + } + pnetif = &netif->hash_next; + } + + destroy->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND; + return; + + still_connected: + destroy->status = NETIF_BE_STATUS_INTERFACE_CONNECTED; + return; + + destroy: + *pnetif = netif->hash_next; + unregister_netdev(netif->dev); + kfree(netif->dev); + destroy->status = NETIF_BE_STATUS_OKAY; +} + +void netif_connect(netif_be_connect_t *connect) +{ + domid_t domid = connect->domid; + unsigned int handle = connect->netif_handle; + unsigned int evtchn = connect->evtchn; + unsigned long tx_shmem_frame = connect->tx_shmem_frame; + unsigned long rx_shmem_frame = connect->rx_shmem_frame; + struct vm_struct *vma; + pgprot_t prot; + int error; + netif_t *netif; + struct net_device *eth0_dev; + + netif = netif_find_by_handle(domid, handle); + if ( unlikely(netif == NULL) ) + { + DPRINTK("netif_connect attempted for non-existent netif (%u,%u)\n", + connect->domid, connect->netif_handle); + connect->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND; + return; + } + + if ( netif->status != DISCONNECTED ) + { + connect->status = NETIF_BE_STATUS_INTERFACE_CONNECTED; + return; + } + + if ( (vma = get_vm_area(2*PAGE_SIZE, VM_IOREMAP)) == NULL ) + { + connect->status = NETIF_BE_STATUS_OUT_OF_MEMORY; + return; + } + + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); + error = direct_remap_area_pages(&init_mm, + VMALLOC_VMADDR(vma->addr), + tx_shmem_frame<<PAGE_SHIFT, PAGE_SIZE, + prot, domid); + error |= direct_remap_area_pages(&init_mm, + VMALLOC_VMADDR(vma->addr) + PAGE_SIZE, + rx_shmem_frame<<PAGE_SHIFT, PAGE_SIZE, + prot, domid); + if ( error != 0 ) + { + if ( error == -ENOMEM ) + connect->status = NETIF_BE_STATUS_OUT_OF_MEMORY; + else if ( error == -EFAULT ) + connect->status = NETIF_BE_STATUS_MAPPING_ERROR; + else + connect->status = NETIF_BE_STATUS_ERROR; + vfree(vma->addr); + return; + } + + netif->evtchn = evtchn; + netif->irq = bind_evtchn_to_irq(evtchn); + netif->tx_shmem_frame = tx_shmem_frame; + netif->rx_shmem_frame = rx_shmem_frame; + netif->tx = + (netif_tx_interface_t *)vma->addr; + netif->rx = + (netif_rx_interface_t *)((char *)vma->addr + PAGE_SIZE); + netif->status = CONNECTED; + netif_get(netif); + + rtnl_lock(); + + (void)dev_open(netif->dev); + (void)br_add_if(bridge_br, netif->dev); + + /* + * The default config is a very simple binding to eth0. + * If eth0 is being used as an IP interface by this OS then someone + * must add eth0's IP address to nbe-br, and change the routing table + * to refer to nbe-br instead of eth0. + */ + (void)dev_open(bridge_dev); + if ( (eth0_dev = __dev_get_by_name("eth0")) != NULL ) + { + (void)dev_open(eth0_dev); + (void)br_add_if(bridge_br, eth0_dev); + } + + rtnl_unlock(); + + (void)request_irq(netif->irq, netif_be_int, 0, netif->dev->name, netif); + netif_start_queue(netif->dev); + + connect->status = NETIF_BE_STATUS_OKAY; +} + +int netif_disconnect(netif_be_disconnect_t *disconnect, u8 rsp_id) +{ + domid_t domid = disconnect->domid; + unsigned int handle = disconnect->netif_handle; + netif_t *netif; + + netif = netif_find_by_handle(domid, handle); + if ( unlikely(netif == NULL) ) + { + DPRINTK("netif_disconnect attempted for non-existent netif" + " (%u,%u)\n", disconnect->domid, disconnect->netif_handle); + disconnect->status = NETIF_BE_STATUS_INTERFACE_NOT_FOUND; + return 1; /* Caller will send response error message. */ + } + + if ( netif->status == CONNECTED ) + { + netif->status = DISCONNECTING; + netif->disconnect_rspid = rsp_id; + wmb(); /* Let other CPUs see the status change. */ + netif_stop_queue(netif->dev); + free_irq(netif->irq, NULL); + netif_deschedule(netif); + netif_put(netif); + } + + return 0; /* Caller should not send response message. */ +} + +void netif_interface_init(void) +{ + memset(netif_hash, 0, sizeof(netif_hash)); + if ( br_add_bridge("nbe-br") != 0 ) + BUG(); + bridge_dev = __dev_get_by_name("nbe-br"); + bridge_br = (struct net_bridge *)bridge_dev->priv; + bridge_br->bridge_hello_time = bridge_br->hello_time = 0; + bridge_br->bridge_forward_delay = bridge_br->forward_delay = 0; + bridge_br->stp_enabled = 0; +} diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/main.c b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/main.c new file mode 100644 index 0000000000..2c4e5cb211 --- /dev/null +++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/backend/main.c @@ -0,0 +1,772 @@ +/****************************************************************************** + * arch/xen/drivers/netif/backend/main.c + * + * Back-end of the driver for virtual block devices. This portion of the + * driver exports a 'unified' block-device interface that can be accessed + * by any operating system that implements a compatible front end. A + * reference front-end implementation can be found in: + * arch/xen/drivers/netif/frontend + * + * Copyright (c) 2002-2004, K A Fraser + */ + +#include "common.h" + +static void netif_page_release(struct page *page); +static void make_tx_response(netif_t *netif, + u16 id, + s8 st); +static int make_rx_response(netif_t *netif, + u16 id, + s8 st, + memory_t addr, + u16 size); + +static void net_tx_action(unsigned long unused); +static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0); + +static void net_rx_action(unsigned long unused); +static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0); + +typedef struct { + u16 id; + unsigned long old_mach_ptr; + unsigned long new_mach_pfn; + netif_t *netif; +} rx_info_t; +static struct sk_buff_head rx_queue; +static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2]; +static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE*3]; +static unsigned char rx_notify[NR_EVENT_CHANNELS]; + +/* Don't currently gate addition of an interface to the tx scheduling list. */ +#define tx_work_exists(_if) (1) + +#define MAX_PENDING_REQS 256 +static unsigned long mmap_vstart; +#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE)) + +#define PKT_PROT_LEN (ETH_HLEN + 20) + +static u16 pending_id[MAX_PENDING_REQS]; +static netif_t *pending_netif[MAX_PENDING_REQS]; +static u16 pending_ring[MAX_PENDING_REQS]; +typedef unsigned int PEND_RING_IDX; +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) +static PEND_RING_IDX pending_prod, pending_cons; +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) + +/* Freed TX SKBs get batched on this ring before return to pending_ring. */ +static u16 dealloc_ring[MAX_PENDING_REQS]; +static spinlock_t dealloc_lock = SPIN_LOCK_UNLOCKED; +static PEND_RING_IDX dealloc_prod, dealloc_cons; + +typedef struct { + u16 idx; + netif_tx_request_t req; + netif_t *netif; +} tx_info_t; +static struct sk_buff_head tx_queue; +static multicall_entry_t tx_mcl[MAX_PENDING_REQS]; + +static struct list_head net_schedule_list; +static spinlock_t net_schedule_list_lock; + +#define MAX_MFN_ALLOC 64 +static unsigned long mfn_list[MAX_MFN_ALLOC]; +static unsigned int alloc_index = 0; +static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED; + +static void __refresh_mfn_list(void) +{ + int ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, + mfn_list, MAX_MFN_ALLOC); + if ( unlikely(ret != MAX_MFN_ALLOC) ) + { + printk(KERN_ALERT "Unable to increase memory reservation (%d)\n", ret); + BUG(); + } + alloc_index = MAX_MFN_ALLOC; +} + +static unsigned long get_new_mfn(void) +{ + unsigned long mfn, flags; + spin_lock_irqsave(&mfn_lock, flags); + if ( alloc_index == 0 ) + __refresh_mfn_list(); + mfn = mfn_list[--alloc_index]; + spin_unlock_irqrestore(&mfn_lock, flags); + return mfn; +} + +static void dealloc_mfn(unsigned long mfn) +{ + unsigned long flags; + spin_lock_irqsave(&mfn_lock, flags); + if ( alloc_index != MAX_MFN_ALLOC ) + mfn_list[alloc_index++] = mfn; + else + (void)HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, &mfn, 1); + spin_unlock_irqrestore(&mfn_lock, flags); +} + +static inline void maybe_schedule_tx_action(void) +{ + smp_mb(); + if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && + !list_empty(&net_schedule_list) ) + tasklet_schedule(&net_tx_tasklet); +} + +/* + * This is the primary RECEIVE function for a network interface. + * Note that, from the p.o.v. of /this/ OS it looks like a transmit. + */ +int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + netif_t *netif = (netif_t *)dev->priv; + + /* Drop the packet if the target domain has no receive buffers. */ + if ( (netif->rx_req_cons == netif->rx->req_prod) || + ((netif->rx_req_cons-netif->rx_resp_prod) == NETIF_RX_RING_SIZE) ) + goto drop; + + /* + * We do not copy the packet unless: + * 1. The data is shared; or + * 2. It spans a page boundary; or + * 3. We cannot be sure the whole data page is allocated. + * The copying method is taken from skb_copy(). + * NB. We also couldn't cope with fragmented packets, but we won't get + * any because we not advertise the NETIF_F_SG feature. + */ + if ( skb_shared(skb) || skb_cloned(skb) || + (((unsigned long)skb->end ^ (unsigned long)skb->head) & PAGE_MASK) || + ((skb->end - skb->head) < (PAGE_SIZE/2)) ) + { + struct sk_buff *nskb = alloc_skb(PAGE_SIZE-1024, GFP_ATOMIC); + int hlen = skb->data - skb->head; + if ( unlikely(nskb == NULL) ) + goto drop; + skb_reserve(nskb, hlen); + __skb_put(nskb, skb->len); + (void)skb_copy_bits(skb, -hlen, nskb->head, hlen + skb->len); + dev_kfree_skb(skb); + skb = nskb; + } + + ((rx_info_t *)&skb->cb[0])->id = + netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_req_cons++)].req.id; + ((rx_info_t *)&skb->cb[0])->netif = netif; + + __skb_queue_tail(&rx_queue, skb); + tasklet_schedule(&net_rx_tasklet); + + return 0; + + drop: + netif->stats.rx_dropped++; + dev_kfree_skb(skb); + return 0; +} + +#if 0 +static void xen_network_done_notify(void) +{ + static struct net_device *eth0_dev = NULL; + if ( unlikely(eth0_dev == NULL) ) + eth0_dev = __dev_get_by_name("eth0"); + netif_rx_schedule(eth0_dev); +} +/* + * Add following to poll() function in NAPI driver (Tigon3 is example): + * if ( xen_network_done() ) + * tg3_enable_ints(tp); + */ +int xen_network_done(void) +{ + return skb_queue_empty(&rx_queue); +} +#endif + +static void net_rx_action(unsigned long unused) +{ + netif_t *netif; + s8 status; + u16 size, id, evtchn; + mmu_update_t *mmu = rx_mmu; + multicall_entry_t *mcl; + unsigned long vdata, mdata, new_mfn; + struct sk_buff_head rxq; + struct sk_buff *skb; + u16 notify_list[NETIF_RX_RING_SIZE]; + int notify_nr = 0; + + skb_queue_head_init(&rxq); + + mcl = rx_mcl; + while ( (skb = __skb_dequeue(&rx_queue)) != NULL ) + { + netif = ((rx_info_t *)&skb->cb[0])->netif; + vdata = (unsigned long)skb->data; + mdata = virt_to_machine(vdata); + new_mfn = get_new_mfn(); + + mmu[0].ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; + mmu[0].val = __pa(vdata) >> PAGE_SHIFT; + mmu[1].val = (unsigned long)(netif->domid<<16) & ~0xFFFFUL; + mmu[1].ptr = (unsigned long)(netif->domid<< 0) & ~0xFFFFUL; + mmu[1].ptr |= MMU_EXTENDED_COMMAND; + mmu[1].val |= MMUEXT_SET_SUBJECTDOM; + mmu[2].ptr = (mdata & PAGE_MASK) | MMU_EXTENDED_COMMAND; + mmu[2].val = MMUEXT_REASSIGN_PAGE; + + mcl[0].op = __HYPERVISOR_update_va_mapping; + mcl[0].args[0] = vdata >> PAGE_SHIFT; + mcl[0].args[1] = (new_mfn << PAGE_SHIFT) | __PAGE_KERNEL; + mcl[0].args[2] = 0; + mcl[1].op = __HYPERVISOR_mmu_update; + mcl[1].args[0] = (unsigned long)mmu; + mcl[1].args[1] = 3; + mcl[1].args[2] = 0; + + mmu += 3; + mcl += 2; + + ((rx_info_t *)&skb->cb[0])->old_mach_ptr = mdata; + ((rx_info_t *)&skb->cb[0])->new_mach_pfn = new_mfn; + __skb_queue_tail(&rxq, skb); + + /* Filled the batch queue? */ + if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) ) + break; + } + + if ( mcl == rx_mcl ) + return; + + mcl[-2].args[2] = UVMF_FLUSH_TLB; + (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); + + mcl = rx_mcl; + while ( (skb = __skb_dequeue(&rxq)) != NULL ) + { + netif = ((rx_info_t *)&skb->cb[0])->netif; + size = skb->tail - skb->data; + id = ((rx_info_t *)&skb->cb[0])->id; + new_mfn = ((rx_info_t *)&skb->cb[0])->new_mach_pfn; + mdata = ((rx_info_t *)&skb->cb[0])->old_mach_ptr; + + /* Check the reassignment error code. */ + if ( unlikely(mcl[1].args[5] != 0) ) + { + DPRINTK("Failed MMU update transferring to DOM%u\n", + netif->domid); + (void)HYPERVISOR_update_va_mapping( + (unsigned long)skb->head >> PAGE_SHIFT, + (pte_t) { (mdata & PAGE_MASK) | __PAGE_KERNEL }, + UVMF_INVLPG); + dealloc_mfn(new_mfn); + status = NETIF_RSP_ERROR; + } + else + { + phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn; + + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + netif->stats.rx_bytes += size; + netif->stats.rx_packets++; + + status = NETIF_RSP_OKAY; + } + + evtchn = netif->evtchn; + if ( make_rx_response(netif, id, status, mdata, size) && + (rx_notify[evtchn] == 0) ) + { + rx_notify[evtchn] = 1; + notify_list[notify_nr++] = evtchn; + } + + dev_kfree_skb(skb); + + mcl += 2; + } + + while ( notify_nr != 0 ) + { + evtchn = notify_list[--notify_nr]; + rx_notify[evtchn] = 0; + notify_via_evtchn(evtchn); + } + + /* More work to do? */ + if ( !skb_queue_empty(&rx_queue) ) + tasklet_schedule(&net_rx_tasklet); +#if 0 + else + xen_network_done_notify(); +#endif +} + +struct net_device_stats *netif_be_get_stats(struct net_device *dev) +{ + netif_t *netif = dev->priv; + return &netif->stats; +} + +static int __on_net_schedule_list(netif_t *netif) +{ + return netif->list.next != NULL; +} + +static void remove_from_net_schedule_list(netif_t *netif) +{ + spin_lock_irq(&net_schedule_list_lock); + if ( likely(__on_net_schedule_list(netif)) ) + { + list_del(&netif->list); + netif->list.next = NULL; + netif_put(netif); + } + spin_unlock_irq(&net_schedule_list_lock); +} + +static void add_to_net_schedule_list_tail(netif_t *netif) +{ + if ( __on_net_schedule_list(netif) ) + return; + + spin_lock_irq(&net_schedule_list_lock); + if ( !__on_net_schedule_list(netif) && (netif->status == CONNECTED) ) + { + list_add_tail(&netif->list, &net_schedule_list); + netif_get(netif); + } + spin_unlock_irq(&net_schedule_list_lock); +} + +static inline void netif_schedule_work(netif_t *netif) +{ + if ( (netif->tx_req_cons != netif->tx->req_prod) && + ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) ) + { + add_to_net_schedule_list_tail(netif); + maybe_schedule_tx_action(); + } +} + +void netif_deschedule(netif_t *netif) +{ + remove_from_net_schedule_list(netif); +} + +#if 0 +static void tx_credit_callback(unsigned long data) +{ + netif_t *netif = (netif_t *)data; + netif->remaining_credit = netif->credit_bytes; + netif_schedule_work(netif); +} +#endif + +static void net_tx_action(unsigned long unused) +{ + struct list_head *ent; + struct sk_buff *skb; + netif_t *netif; + netif_tx_request_t txreq; + u16 pending_idx; + NETIF_RING_IDX i; + struct page *page; + multicall_entry_t *mcl; + + if ( (i = dealloc_cons) == dealloc_prod ) + goto skip_dealloc; + + mcl = tx_mcl; + while ( i != dealloc_prod ) + { + pending_idx = dealloc_ring[MASK_PEND_IDX(i++)]; + mcl[0].op = __HYPERVISOR_update_va_mapping; + mcl[0].args[0] = MMAP_VADDR(pending_idx) >> PAGE_SHIFT; + mcl[0].args[1] = 0; + mcl[0].args[2] = 0; + mcl++; + } + + mcl[-1].args[2] = UVMF_FLUSH_TLB; + (void)HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl); + + while ( dealloc_cons != dealloc_prod ) + { + pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)]; + + netif = pending_netif[pending_idx]; + + spin_lock(&netif->tx_lock); + make_tx_response(netif, pending_id[pending_idx], NETIF_RSP_OKAY); + spin_unlock(&netif->tx_lock); + + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + + /* + * Scheduling checks must happen after the above response is posted. + * This avoids a possible race with a guest OS on another CPU. + */ + mb(); + if ( (netif->tx_req_cons != netif->tx->req_prod) && + ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) ) + add_to_net_schedule_list_tail(netif); + + netif_put(netif); + } + + skip_dealloc: + mcl = tx_mcl; + while ( (NR_PENDING_REQS < MAX_PENDING_REQS) && + !list_empty(&net_schedule_list) ) + { + /* Get a netif from the list with work to do. */ + ent = net_schedule_list.next; + netif = list_entry(ent, netif_t, list); + netif_get(netif); + remove_from_net_schedule_list(netif); + + /* Work to do? */ + i = netif->tx_req_cons; + if ( (i == netif->tx->req_prod) || + ((i-netif->tx_resp_prod) == NETIF_TX_RING_SIZE) ) + { + netif_put(netif); + continue; + } + memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req, + sizeof(txreq)); + netif->tx_req_cons++; + +#if 0 + /* Credit-based scheduling. */ + if ( tx.size > netif->remaining_credit ) + { + s_time_t now = NOW(), next_credit = + netif->credit_timeout.expires + MICROSECS(netif->credit_usec); + if ( next_credit <= now ) + { + netif->credit_timeout.expires = now; + netif->remaining_credit = netif->credit_bytes; + } + else + { + netif->remaining_credit = 0; + netif->credit_timeout.expires = next_credit; + netif->credit_timeout.data = (unsigned long)netif; + netif->credit_timeout.function = tx_credit_callback; + netif->credit_timeout.cpu = smp_processor_id(); + add_ac_timer(&netif->credit_timeout); + break; + } + } + netif->remaining_credit -= tx.size; +#endif + + netif_schedule_work(netif); + + if ( unlikely(txreq.size <= PKT_PROT_LEN) || + unlikely(txreq.size > ETH_FRAME_LEN) ) + { + DPRINTK("Bad packet size: %d\n", txreq.size); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + continue; + } + + /* No crossing a page boundary as the payload mustn't fragment. */ + if ( unlikely(((txreq.addr & ~PAGE_MASK) + txreq.size) >= PAGE_SIZE) ) + { + DPRINTK("txreq.addr: %lx, size: %u, end: %lu\n", + txreq.addr, txreq.size, + (txreq.addr &~PAGE_MASK) + txreq.size); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + continue; + } + + pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; + + if ( unlikely((skb = alloc_skb(PKT_PROT_LEN, GFP_ATOMIC)) == NULL) ) + { + DPRINTK("Can't allocate a skb in start_xmit.\n"); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + break; + } + + mcl[0].op = __HYPERVISOR_update_va_mapping_otherdomain; + mcl[0].args[0] = MMAP_VADDR(pending_idx) >> PAGE_SHIFT; + mcl[0].args[1] = (txreq.addr & PAGE_MASK) | __PAGE_KERNEL; + mcl[0].args[2] = 0; + mcl[0].args[3] = netif->domid; + mcl++; + + ((tx_info_t *)&skb->cb[0])->idx = pending_idx; + ((tx_info_t *)&skb->cb[0])->netif = netif; + memcpy(&((tx_info_t *)&skb->cb[0])->req, &txreq, sizeof(txreq)); + __skb_queue_tail(&tx_queue, skb); + + pending_cons++; + + /* Filled the batch queue? */ + if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) ) + break; + } + + if ( mcl == tx_mcl ) + return; + + (void)HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl); + + mcl = tx_mcl; + while ( (skb = __skb_dequeue(&tx_queue)) != NULL ) + { + pending_idx = ((tx_info_t *)&skb->cb[0])->idx; + netif = ((tx_info_t *)&skb->cb[0])->netif; + memcpy(&txreq, &((tx_info_t *)&skb->cb[0])->req, sizeof(txreq)); + + /* Check the remap error code. */ + if ( unlikely(mcl[0].args[5] != 0) ) + { + DPRINTK("Bad page frame\n"); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + kfree_skb(skb); + mcl++; + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + continue; + } + + phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] = + txreq.addr >> PAGE_SHIFT; + + __skb_put(skb, PKT_PROT_LEN); + memcpy(skb->data, + (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)), + PKT_PROT_LEN); + + page = virt_to_page(MMAP_VADDR(pending_idx)); + + /* Append the packet payload as a fragment. */ + skb_shinfo(skb)->frags[0].page = page; + skb_shinfo(skb)->frags[0].size = txreq.size - PKT_PROT_LEN; + skb_shinfo(skb)->frags[0].page_offset = + (txreq.addr + PKT_PROT_LEN) & ~PAGE_MASK; + skb_shinfo(skb)->nr_frags = 1; + skb->data_len = txreq.size - PKT_PROT_LEN; + skb->len += skb->data_len; + + skb->dev = netif->dev; + skb->protocol = eth_type_trans(skb, skb->dev); + + /* + * Destructor information. We hideously abuse the 'mapping' pointer, + * which isn't otherwise used by us. The page deallocator is modified + * to interpret a non-NULL value as a destructor function to be called. + * This works okay because in all other cases the pointer must be NULL + * when the page is freed (normally Linux will explicitly bug out if + * it sees otherwise. + */ + page->mapping = (struct address_space *)netif_page_release; + atomic_set(&page->count, 1); + pending_id[pending_idx] = txreq.id; + pending_netif[pending_idx] = netif; + + netif->stats.tx_bytes += txreq.size; + netif->stats.tx_packets++; + + netif_rx(skb); + netif->dev->last_rx = jiffies; + + mcl++; + } +} + +static void netif_page_release(struct page *page) +{ + unsigned long flags; + u16 pending_idx = page - virt_to_page(mmap_vstart); + + /* Stop the abuse. */ + page->mapping = NULL; + + spin_lock_irqsave(&dealloc_lock, flags); + dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx; + spin_unlock_irqrestore(&dealloc_lock, flags); + + tasklet_schedule(&net_tx_tasklet); +} + +#if 0 +long flush_bufs_for_netif(netif_t *netif) +{ + NET_RING_IDX i; + + /* Return any outstanding receive buffers to the guest OS. */ + spin_lock(&netif->rx_lock); + for ( i = netif->rx_req_cons; + (i != netif->rx->req_prod) && + ((i-netif->rx_resp_prod) != NETIF_RX_RING_SIZE); + i++ ) + { + make_rx_response(netif, + netif->rx->ring[MASK_NETIF_RX_IDX(i)].req.id, + NETIF_RSP_DROPPED, 0, 0); + } + netif->rx_req_cons = i; + spin_unlock(&netif->rx_lock); + + /* + * Flush pending transmit buffers. The guest may still have to wait for + * buffers that are queued at a physical NIC. + */ + spin_lock(&netif->tx_lock); + for ( i = netif->tx_req_cons; + (i != netif->tx->req_prod) && + ((i-netif->tx_resp_prod) != NETIF_TX_RING_SIZE); + i++ ) + { + make_tx_response(netif, + netif->tx->ring[MASK_NETIF_TX_IDX(i)].req.id, + NETIF_RSP_DROPPED); + } + netif->tx_req_cons = i; + spin_unlock(&netif->tx_lock); + + return 0; +} +#endif + +void netif_be_int(int irq, void *dev_id, struct pt_regs *regs) +{ + netif_t *netif = dev_id; + if ( tx_work_exists(netif) ) + { + add_to_net_schedule_list_tail(netif); + maybe_schedule_tx_action(); + } +} + +static void make_tx_response(netif_t *netif, + u16 id, + s8 st) +{ + NET_RING_IDX i = netif->tx_resp_prod; + netif_tx_response_t *resp; + + resp = &netif->tx->ring[MASK_NETIF_TX_IDX(i)].resp; + resp->id = id; + resp->status = st; + wmb(); + netif->tx->resp_prod = netif->tx_resp_prod = ++i; + + mb(); /* Update producer before checking event threshold. */ + if ( i == netif->tx->event ) + notify_via_evtchn(netif->evtchn); +} + +static int make_rx_response(netif_t *netif, + u16 id, + s8 st, + memory_t addr, + u16 size) +{ + NET_RING_IDX i = netif->rx_resp_prod; + netif_rx_response_t *resp; + + resp = &netif->rx->ring[MASK_NETIF_RX_IDX(i)].resp; + resp->addr = addr; + resp->id = id; + resp->status = (s16)size; + if ( st < 0 ) + resp->status = (s16)st; + wmb(); + netif->rx->resp_prod = netif->rx_resp_prod = ++i; + + mb(); /* Update producer before checking event threshold. */ + return (i == netif->rx->event); +} + +static void netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) +{ + struct list_head *ent; + netif_t *netif; + int i = 0; + + printk(KERN_ALERT "netif_schedule_list:\n"); + spin_lock_irq(&net_schedule_list_lock); + + list_for_each ( ent, &net_schedule_list ) + { + netif = list_entry(ent, netif_t, list); + printk(KERN_ALERT " %d: private(rx_req_cons=%08x rx_resp_prod=%08x\n", + i, netif->rx_req_cons, netif->rx_resp_prod); + printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n", + netif->tx_req_cons, netif->tx_resp_prod); + printk(KERN_ALERT " shared(rx_req_prod=%08x rx_resp_prod=%08x\n", + netif->rx->req_prod, netif->rx->resp_prod); + printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n", + netif->rx->event, netif->tx->req_prod); + printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n", + netif->tx->resp_prod, netif->tx->event); + i++; + } + + spin_unlock_irq(&net_schedule_list_lock); + printk(KERN_ALERT " ** End of netif_schedule_list **\n"); +} + +static int __init init_module(void) +{ + int i; + + if ( !(start_info.flags & SIF_NET_BE_DOMAIN) && + !(start_info.flags & SIF_INITDOMAIN) ) + return 0; + + printk("Initialising Xen netif backend\n"); + + skb_queue_head_init(&rx_queue); + skb_queue_head_init(&tx_queue); + + netif_interface_init(); + + if ( (mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS)) == 0 ) + BUG(); + + pending_cons = 0; + pending_prod = MAX_PENDING_REQS; + for ( i = 0; i < MAX_PENDING_REQS; i++ ) + pending_ring[i] = i; + + spin_lock_init(&net_schedule_list_lock); + INIT_LIST_HEAD(&net_schedule_list); + + netif_ctrlif_init(); + + (void)request_irq(bind_virq_to_irq(VIRQ_DEBUG), + netif_be_dbg, SA_SHIRQ, + "net-be-dbg", &netif_be_dbg); + + return 0; +} + +static void cleanup_module(void) +{ + BUG(); +} + +module_init(init_module); +module_exit(cleanup_module); diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/Makefile b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/Makefile new file mode 100644 index 0000000000..032d02d7cc --- /dev/null +++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/Makefile @@ -0,0 +1,3 @@ +O_TARGET := drv.o +obj-y := main.o +include $(TOPDIR)/Rules.make diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/main.c b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/main.c new file mode 100644 index 0000000000..4d4c579703 --- /dev/null +++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/frontend/main.c @@ -0,0 +1,777 @@ +/****************************************************************************** + * arch/xen/drivers/netif/frontend/main.c + * + * Virtual network driver for XenoLinux. + * + * Copyright (c) 2002-2004, K A Fraser + */ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/init.h> + +#include <asm/io.h> +#include <net/sock.h> +#include <net/pkt_sched.h> + +#include <asm/evtchn.h> +#include <asm/ctrl_if.h> + +#include <asm/page.h> + +#include "../netif.h" + +#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ + +static void network_tx_buf_gc(struct net_device *dev); +static void network_alloc_rx_buffers(struct net_device *dev); +static void cleanup_module(void); + +static unsigned long rx_pfn_array[NETIF_RX_RING_SIZE]; +static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE+1]; +static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE]; + +static struct list_head dev_list; + +struct net_private +{ + struct list_head list; + struct net_device *dev; + + struct net_device_stats stats; + NETIF_RING_IDX rx_resp_cons, tx_resp_cons; + unsigned int tx_full; + + netif_tx_interface_t *tx; + netif_rx_interface_t *rx; + + spinlock_t tx_lock; + spinlock_t rx_lock; + + unsigned int handle; + unsigned int evtchn; + unsigned int irq; + +#define NETIF_STATE_CLOSED 0 +#define NETIF_STATE_DISCONNECTED 1 +#define NETIF_STATE_CONNECTED 2 +#define NETIF_STATE_ACTIVE 3 + unsigned int state; + + /* + * {tx,rx}_skbs store outstanding skbuffs. The first entry in each + * array is an index into a chain of free entries. + */ + struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1]; + struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1]; +}; + +/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ +#define ADD_ID_TO_FREELIST(_list, _id) \ + (_list)[(_id)] = (_list)[0]; \ + (_list)[0] = (void *)(unsigned long)(_id); +#define GET_ID_FROM_FREELIST(_list) \ + ({ unsigned long _id = (unsigned long)(_list)[0]; \ + (_list)[0] = (_list)[_id]; \ + (unsigned short)_id; }) + +static struct net_device *find_dev_by_handle(unsigned int handle) +{ + struct list_head *ent; + struct net_private *np; + list_for_each ( ent, &dev_list ) + { + np = list_entry(ent, struct net_private, list); + if ( np->handle == handle ) + return np->dev; + } + return NULL; +} + + +static int network_open(struct net_device *dev) +{ + struct net_private *np = dev->priv; + int i; + + if ( np->state != NETIF_STATE_CONNECTED ) + return -EINVAL; + + np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; + memset(&np->stats, 0, sizeof(np->stats)); + spin_lock_init(&np->tx_lock); + spin_lock_init(&np->rx_lock); + + /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ + for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ ) + np->tx_skbs[i] = (void *)(i+1); + for ( i = 0; i <= NETIF_RX_RING_SIZE; i++ ) + np->rx_skbs[i] = (void *)(i+1); + + wmb(); + np->state = NETIF_STATE_ACTIVE; + + network_alloc_rx_buffers(dev); + np->rx->event = np->rx_resp_cons + 1; + + netif_start_queue(dev); + + MOD_INC_USE_COUNT; + + return 0; +} + + +static void network_tx_buf_gc(struct net_device *dev) +{ + NETIF_RING_IDX i, prod; + unsigned short id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + + do { + prod = np->tx->resp_prod; + + for ( i = np->tx_resp_cons; i != prod; i++ ) + { + id = np->tx->ring[MASK_NET_TX_IDX(i)].resp.id; + skb = np->tx_skbs[id]; + ADD_ID_TO_FREELIST(np->tx_skbs, id); + dev_kfree_skb_any(skb); + } + + np->tx_resp_cons = prod; + + /* + * Set a new event, then check for race with update of tx_cons. Note + * that it is essential to schedule a callback, no matter how few + * buffers are pending. Even if there is space in the transmit ring, + * higher layers may be blocked because too much data is outstanding: + * in such cases notification from Xen is likely to be the only kick + * that we'll get. + */ + np->tx->event = + prod + ((np->tx->req_prod - prod) >> 1) + 1; + mb(); + } + while ( prod != np->tx->resp_prod ); + + if ( np->tx_full && + ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE) ) + { + np->tx_full = 0; + if ( np->state == NETIF_STATE_ACTIVE ) + netif_wake_queue(dev); + } +} + + +static void network_alloc_rx_buffers(struct net_device *dev) +{ + unsigned short id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + NETIF_RING_IDX i = np->rx->req_prod; + int nr_pfns = 0; + + /* Make sure the batch is large enough to be worthwhile (1/2 ring). */ + if ( unlikely((i - np->rx_resp_cons) > (NETIF_RX_RING_SIZE/2)) || + unlikely(np->state != NETIF_STATE_ACTIVE) ) + return; + + do { + skb = dev_alloc_skb(RX_BUF_SIZE); + if ( unlikely(skb == NULL) ) + break; + + skb->dev = dev; + + if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) ) + panic("alloc_skb needs to provide us page-aligned buffers."); + + id = GET_ID_FROM_FREELIST(np->rx_skbs); + + np->rx_skbs[id] = skb; + + np->rx->ring[MASK_NET_RX_IDX(i)].req.id = id; + + rx_pfn_array[nr_pfns] = virt_to_machine(skb->head) >> PAGE_SHIFT; + + rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping; + rx_mcl[nr_pfns].args[0] = (unsigned long)skb->head >> PAGE_SHIFT; + rx_mcl[nr_pfns].args[1] = 0; + rx_mcl[nr_pfns].args[2] = 0; + + nr_pfns++; + } + while ( (++i - np->rx_resp_cons) != NETIF_RX_RING_SIZE ); + + /* + * We may have allocated buffers which have entries outstanding in the page + * update queue -- make sure we flush those first! + */ + flush_page_update_queue(); + + /* After all PTEs have been zapped we blow away stale TLB entries. */ + rx_mcl[nr_pfns-1].args[2] = UVMF_FLUSH_TLB; + + /* Give away a batch of pages. */ + rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op; + rx_mcl[nr_pfns].args[0] = MEMOP_decrease_reservation; + rx_mcl[nr_pfns].args[1] = (unsigned long)rx_pfn_array; + rx_mcl[nr_pfns].args[2] = (unsigned long)nr_pfns; + + /* Zap PTEs and give away pages in one big multicall. */ + (void)HYPERVISOR_multicall(rx_mcl, nr_pfns+1); + + /* Check return status of HYPERVISOR_dom_mem_op(). */ + if ( rx_mcl[nr_pfns].args[5] != nr_pfns ) + panic("Unable to reduce memory reservation\n"); + + np->rx->req_prod = i; +} + + +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned short id; + struct net_private *np = (struct net_private *)dev->priv; + netif_tx_request_t *tx; + NETIF_RING_IDX i; + + if ( unlikely(np->tx_full) ) + { + printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name); + netif_stop_queue(dev); + return -ENOBUFS; + } + + if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= + PAGE_SIZE) ) + { + struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE); + if ( unlikely(new_skb == NULL) ) + return 1; + skb_put(new_skb, skb->len); + memcpy(new_skb->data, skb->data, skb->len); + dev_kfree_skb(skb); + skb = new_skb; + } + + spin_lock_irq(&np->tx_lock); + + /* if the backend isn't available then don't do anything! */ + if ( !netif_carrier_ok(dev) ) + { + spin_unlock_irq(&np->tx_lock); + return 1; + } + + i = np->tx->req_prod; + + id = GET_ID_FROM_FREELIST(np->tx_skbs); + np->tx_skbs[id] = skb; + + tx = &np->tx->ring[MASK_NET_TX_IDX(i)].req; + + tx->id = id; + tx->addr = virt_to_machine(skb->data); + tx->size = skb->len; + + wmb(); + np->tx->req_prod = i + 1; + + network_tx_buf_gc(dev); + + if ( (i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1) ) + { + np->tx_full = 1; + netif_stop_queue(dev); + } + + spin_unlock_irq(&np->tx_lock); + + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; + + /* Only notify Xen if there are no outstanding responses. */ + mb(); + if ( np->tx->resp_prod == i ) + notify_via_evtchn(np->evtchn); + + return 0; +} + + +static void netif_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + struct net_device *dev = dev_id; + struct net_private *np = dev->priv; + unsigned long flags; + + spin_lock_irqsave(&np->tx_lock, flags); + + if( !netif_carrier_ok(dev) ) + { + spin_unlock_irqrestore(&np->tx_lock, flags); + return; + } + + network_tx_buf_gc(dev); + spin_unlock_irqrestore(&np->tx_lock, flags); + + if ( np->rx_resp_cons != np->rx->resp_prod ) + netif_rx_schedule(dev); +} + + +static int netif_poll(struct net_device *dev, int *pbudget) +{ + struct net_private *np = dev->priv; + struct sk_buff *skb; + netif_rx_response_t *rx; + NETIF_RING_IDX i; + mmu_update_t *mmu = rx_mmu; + multicall_entry_t *mcl = rx_mcl; + int work_done, budget, more_to_do = 1; + struct sk_buff_head rxq; + unsigned long flags; + + spin_lock(&np->rx_lock); + + /* if the device is undergoing recovery then don't do anything */ + if ( !netif_carrier_ok(dev) ) + { + spin_unlock(&np->rx_lock); + return 0; + } + + skb_queue_head_init(&rxq); + + if ( (budget = *pbudget) > dev->quota ) + budget = dev->quota; + + for ( i = np->rx_resp_cons, work_done = 0; + (i != np->rx->resp_prod) && (work_done < budget); + i++, work_done++ ) + { + rx = &np->rx->ring[MASK_NET_RX_IDX(i)].resp; + + skb = np->rx_skbs[rx->id]; + ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); + + if ( unlikely(rx->status <= 0) ) + { + /* Gate this error. We get a (valid) slew of them on suspend. */ + if ( np->state == NETIF_STATE_ACTIVE ) + printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status); + dev_kfree_skb(skb); + continue; + } + + skb->data = skb->tail = skb->head + (rx->addr & ~PAGE_MASK); + skb_put(skb, rx->status); + + np->stats.rx_packets++; + np->stats.rx_bytes += rx->status; + + /* Remap the page. */ + mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE; + mmu->val = __pa(skb->head) >> PAGE_SHIFT; + mmu++; + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->args[0] = (unsigned long)skb->head >> PAGE_SHIFT; + mcl->args[1] = (rx->addr & PAGE_MASK) | __PAGE_KERNEL; + mcl->args[2] = 0; + mcl++; + + phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = + rx->addr >> PAGE_SHIFT; + + __skb_queue_tail(&rxq, skb); + } + + /* Do all the remapping work, and M->P updates, in one big hypercall. */ + if ( likely((mcl - rx_mcl) != 0) ) + { + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)rx_mmu; + mcl->args[1] = mmu - rx_mmu; + mcl->args[2] = 0; + mcl++; + (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); + } + + while ( (skb = __skb_dequeue(&rxq)) != NULL ) + { + /* Set the shared-info area, which is hidden behind the real data. */ + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + /* Ethernet-specific work. Delayed to here as it peeks the header. */ + skb->protocol = eth_type_trans(skb, dev); + + /* Pass it up. */ + netif_rx(skb); + dev->last_rx = jiffies; + } + + np->rx_resp_cons = i; + + network_alloc_rx_buffers(dev); + + *pbudget -= work_done; + dev->quota -= work_done; + + if ( work_done < budget ) + { + local_irq_save(flags); + + np->rx->event = i + 1; + + /* Deal with hypervisor racing our resetting of rx_event. */ + mb(); + if ( np->rx->resp_prod == i ) + { + __netif_rx_complete(dev); + more_to_do = 0; + } + + local_irq_restore(flags); + } + + spin_unlock(&np->rx_lock); + + return more_to_do; +} + + +static int network_close(struct net_device *dev) +{ + struct net_private *np = dev->priv; + + netif_stop_queue(np->dev); + + np->state = NETIF_STATE_CONNECTED; + + /* XXX We need to properly disconnect via the domain controller. */ + while ( /*(np->rx_resp_cons != np->rx->req_prod) ||*/ + (np->tx_resp_cons != np->tx->req_prod) ) + { + barrier(); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } + + MOD_DEC_USE_COUNT; + + return 0; +} + + +static struct net_device_stats *network_get_stats(struct net_device *dev) +{ + struct net_private *np = (struct net_private *)dev->priv; + return &np->stats; +} + + +static void netif_status_change(netif_fe_interface_status_changed_t *status) +{ + ctrl_msg_t cmsg; + netif_fe_interface_connect_t up; + struct net_device *dev; + struct net_private *np; + + if ( status->handle != 0 ) + { + printk(KERN_WARNING "Status change on unsupported netif %d\n", + status->handle); + return; + } + + dev = find_dev_by_handle(0); + np = dev->priv; + + switch ( status->status ) + { + case NETIF_INTERFACE_STATUS_DESTROYED: + printk(KERN_WARNING "Unexpected netif-DESTROYED message in state %d\n", + np->state); + break; + + case NETIF_INTERFACE_STATUS_DISCONNECTED: + if ( np->state != NETIF_STATE_CLOSED ) + { + printk(KERN_WARNING "Unexpected netif-DISCONNECTED message" + " in state %d\n", np->state); + printk(KERN_INFO "Attempting to reconnect network interface\n"); + + /* Begin interface recovery. + * + * NB. Whilst we're recovering, we turn the carrier state off. We + * take measures to ensure that this device isn't used for + * anything. We also stop the queue for this device. Various + * different approaches (e.g. continuing to buffer packets) have + * been tested but don't appear to improve the overall impact on + * TCP connections. + * + * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery + * is initiated by a special "RESET" message - disconnect could + * just mean we're not allowed to use this interface any more. + */ + + /* Stop old i/f to prevent errors whilst we rebuild the state. */ + spin_lock_irq(&np->tx_lock); + spin_lock(&np->rx_lock); + netif_stop_queue(dev); + netif_carrier_off(dev); + np->state = NETIF_STATE_DISCONNECTED; + spin_unlock(&np->rx_lock); + spin_unlock_irq(&np->tx_lock); + + /* Free resources. */ + free_irq(np->irq, dev); + unbind_evtchn_from_irq(np->evtchn); + free_page((unsigned long)np->tx); + free_page((unsigned long)np->rx); + } + + /* Move from CLOSED to DISCONNECTED state. */ + np->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL); + np->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL); + memset(np->tx, 0, PAGE_SIZE); + memset(np->rx, 0, PAGE_SIZE); + np->state = NETIF_STATE_DISCONNECTED; + + /* Construct an interface-CONNECT message for the domain controller. */ + cmsg.type = CMSG_NETIF_FE; + cmsg.subtype = CMSG_NETIF_FE_INTERFACE_CONNECT; + cmsg.length = sizeof(netif_fe_interface_connect_t); + up.handle = 0; + up.tx_shmem_frame = virt_to_machine(np->tx) >> PAGE_SHIFT; + up.rx_shmem_frame = virt_to_machine(np->rx) >> PAGE_SHIFT; + memcpy(cmsg.msg, &up, sizeof(up)); + + /* Tell the controller to bring up the interface. */ + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); + break; + + case NETIF_INTERFACE_STATUS_CONNECTED: + if ( np->state == NETIF_STATE_CLOSED ) + { + printk(KERN_WARNING "Unexpected netif-CONNECTED message" + " in state %d\n", np->state); + break; + } + + memcpy(dev->dev_addr, status->mac, ETH_ALEN); + + if(netif_carrier_ok(dev)) + np->state = NETIF_STATE_CONNECTED; + else + { + int i, requeue_idx; + netif_tx_request_t *tx; + + spin_lock_irq(&np->rx_lock); + spin_lock(&np->tx_lock); + + /* Recovery procedure: */ + + /* Step 1: Reinitialise variables. */ + np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; + np->rx->event = 1; + + /* Step 2: Rebuild the RX and TX ring contents. + * NB. We could just free the queued TX packets now but we hope + * that sending them out might do some good. We have to rebuild + * the RX ring because some of our pages are currently flipped out + * so we can't just free the RX skbs. + * NB2. Freelist index entries are always going to be less than + * __PAGE_OFFSET, whereas pointers to skbs will always be equal or + * greater than __PAGE_OFFSET: we use this property to distinguish + * them. + */ + + /* Rebuild the TX buffer freelist and the TX ring itself. + * NB. This reorders packets. We could keep more private state + * to avoid this but maybe it doesn't matter so much given the + * interface has been down. + */ + for ( requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++ ) + { + if ( (unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET ) + { + struct sk_buff *skb = np->tx_skbs[i]; + + tx = &np->tx->ring[requeue_idx++].req; + + tx->id = i; + tx->addr = virt_to_machine(skb->data); + tx->size = skb->len; + + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; + } + } + wmb(); + np->tx->req_prod = requeue_idx; + + /* Rebuild the RX buffer freelist and the RX ring itself. */ + for ( requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++ ) + if ( (unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET ) + np->rx->ring[requeue_idx++].req.id = i; + wmb(); + np->rx->req_prod = requeue_idx; + + /* Step 3: All public and private state should now be sane. Get + * ready to start sending and receiving packets and give the driver + * domain a kick because we've probably just requeued some + * packets. + */ + netif_carrier_on(dev); + netif_start_queue(dev); + np->state = NETIF_STATE_ACTIVE; + + notify_via_evtchn(status->evtchn); + + network_tx_buf_gc(dev); + + printk(KERN_INFO "Recovery completed\n"); + + spin_unlock(&np->tx_lock); + spin_unlock_irq(&np->rx_lock); + } + + np->evtchn = status->evtchn; + np->irq = bind_evtchn_to_irq(np->evtchn); + (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, + dev->name, dev); + break; + + default: + printk(KERN_WARNING "Status change to unknown value %d\n", + status->status); + break; + } +} + + +static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) +{ + switch ( msg->subtype ) + { + case CMSG_NETIF_FE_INTERFACE_STATUS_CHANGED: + if ( msg->length != sizeof(netif_fe_interface_status_changed_t) ) + goto parse_error; + netif_status_change((netif_fe_interface_status_changed_t *) + &msg->msg[0]); + break; + default: + goto parse_error; + } + + ctrl_if_send_response(msg); + return; + + parse_error: + msg->length = 0; + ctrl_if_send_response(msg); +} + + +static int __init init_module(void) +{ + ctrl_msg_t cmsg; + netif_fe_driver_status_changed_t st; + int err; + struct net_device *dev; + struct net_private *np; + + if ( start_info.flags & SIF_INITDOMAIN + || start_info.flags & SIF_NET_BE_DOMAIN ) + return 0; + + printk("Initialising Xen virtual ethernet frontend driver"); + + INIT_LIST_HEAD(&dev_list); + + if ( (dev = alloc_etherdev(sizeof(struct net_private))) == NULL ) + { + err = -ENOMEM; + goto fail; + } + + np = dev->priv; + np->state = NETIF_STATE_CLOSED; + np->handle = 0; + + dev->open = network_open; + dev->hard_start_xmit = network_start_xmit; + dev->stop = network_close; + dev->get_stats = network_get_stats; + dev->poll = netif_poll; + dev->weight = 64; + + if ( (err = register_netdev(dev)) != 0 ) + { + kfree(dev); + goto fail; + } + + np->dev = dev; + list_add(&np->list, &dev_list); + + (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx, + CALLBACK_IN_BLOCKING_CONTEXT); + + /* Send a driver-UP notification to the domain controller. */ + cmsg.type = CMSG_NETIF_FE; + cmsg.subtype = CMSG_NETIF_FE_DRIVER_STATUS_CHANGED; + cmsg.length = sizeof(netif_fe_driver_status_changed_t); + st.status = NETIF_DRIVER_STATUS_UP; + memcpy(cmsg.msg, &st, sizeof(st)); + + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); + + /* + * We should read 'nr_interfaces' from response message and wait + * for notifications before proceeding. For now we assume that we + * will be notified of exactly one interface. + */ + while ( np->state != NETIF_STATE_CONNECTED ) + { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1); + } + + return 0; + + fail: + cleanup_module(); + return err; +} + + +static void cleanup_module(void) +{ + /* XXX FIXME */ + BUG(); +} + + +module_init(init_module); +module_exit(cleanup_module); diff --git a/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/netif.h b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/netif.h new file mode 100644 index 0000000000..098b292612 --- /dev/null +++ b/linux-2.4.26-xen-sparse/arch/xen/drivers/netif/netif.h @@ -0,0 +1,88 @@ +/****************************************************************************** + * netif.h + * + * Unified network-device I/O interface for Xen guest OSes. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __SHARED_NETIF_H__ +#define __SHARED_NETIF_H__ + +typedef struct { + memory_t addr; /* 0: Machine address of packet. */ + MEMORY_PADDING; + u16 id; /* 8: Echoed in response message. */ + u16 size; /* 10: Packet size in bytes. */ +} PACKED netif_tx_request_t; /* 12 bytes */ + +typedef struct { + u16 id; /* 0 */ + s8 status; /* 2 */ + u8 __pad; /* 3 */ +} PACKED netif_tx_response_t; /* 4 bytes */ + +typedef struct { + u16 id; /* 0: Echoed in response message. */ +} PACKED netif_rx_request_t; /* 2 bytes */ + +typedef struct { + memory_t addr; /* 0: Machine address of packet. */ + MEMORY_PADDING; + u16 id; /* 8: */ + s16 status; /* 10: -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */ +} PACKED netif_rx_response_t; /* 12 bytes */ + +/* + * We use a special capitalised type name because it is _essential_ that all + * arithmetic on indexes is done on an integer type of the correct size. + */ +typedef u32 NETIF_RING_IDX; + +/* + * Ring indexes are 'free running'. That is, they are not stored modulo the + * size of the ring buffer. The following macros convert a free-running counter + * into a value that can directly index a ring-buffer array. + */ +#define MASK_NETIF_RX_IDX(_i) ((_i)&(NETIF_RX_RING_SIZE-1)) +#define MASK_NETIF_TX_IDX(_i) ((_i)&(NETIF_TX_RING_SIZE-1)) + +#define NETIF_TX_RING_SIZE 256 +#define NETIF_RX_RING_SIZE 256 + +/* This structure must fit in a memory page. */ +typedef struct { + /* + * Frontend places packets into ring at tx_req_prod. + * Frontend receives event when tx_resp_prod passes tx_event. + */ + NETIF_RING_IDX req_prod; /* 0 */ + NETIF_RING_IDX resp_prod; /* 4 */ + NETIF_RING_IDX event; /* 8 */ + union { /* 12 */ + netif_tx_request_t req; + netif_tx_response_t resp; + } PACKED ring[NETIF_TX_RING_SIZE]; +} PACKED netif_tx_interface_t; + +/* This structure must fit in a memory page. */ +typedef struct { + /* + * Frontend places empty buffers into ring at rx_req_prod. + * Frontend receives event when rx_resp_prod passes rx_event. + */ + NETIF_RING_IDX req_prod; /* 0 */ + NETIF_RING_IDX resp_prod; /* 4 */ + NETIF_RING_IDX event; /* 8 */ + union { /* 12 */ + netif_rx_request_t req; + netif_rx_response_t resp; + } PACKED ring[NETIF_RX_RING_SIZE]; +} PACKED netif_rx_interface_t; + +/* Descriptor status values */ +#define NETIF_RSP_DROPPED -2 +#define NETIF_RSP_ERROR -1 +#define NETIF_RSP_OKAY 0 + +#endif |