diff options
Diffstat (limited to 'xenolinux-2.4.25-sparse/arch/xeno/drivers/network')
-rw-r--r-- | xenolinux-2.4.25-sparse/arch/xeno/drivers/network/Makefile | 3 | ||||
-rw-r--r-- | xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c | 632 |
2 files changed, 635 insertions, 0 deletions
diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/Makefile b/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/Makefile new file mode 100644 index 0000000000..b44a288a5b --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/Makefile @@ -0,0 +1,3 @@ +O_TARGET := net.o +obj-y := network.o +include $(TOPDIR)/Rules.make diff --git a/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c new file mode 100644 index 0000000000..29f7c2224f --- /dev/null +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c @@ -0,0 +1,632 @@ +/****************************************************************************** + * network.c + * + * Virtual network driver for XenoLinux. + * + * Copyright (c) 2002-2003, K A Fraser + */ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> + +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/init.h> + +#include <asm/io.h> +#include <net/sock.h> +#include <net/pkt_sched.h> + +#define NET_IRQ _EVENT_NET + +#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */ + +static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs); +static void network_tx_buf_gc(struct net_device *dev); +static void network_alloc_rx_buffers(struct net_device *dev); +static void cleanup_module(void); + +static struct list_head dev_list; + +struct net_private +{ + struct list_head list; + struct net_device *dev; + + struct net_device_stats stats; + NET_RING_IDX rx_resp_cons, tx_resp_cons; + unsigned int net_ring_fixmap_idx, tx_full; + net_ring_t *net_ring; + net_idx_t *net_idx; + spinlock_t tx_lock; + unsigned int idx; /* Domain-specific index of this VIF. */ + + unsigned int rx_bufs_to_notify; + +#define STATE_ACTIVE 0 +#define STATE_SUSPENDED 1 +#define STATE_CLOSED 2 + unsigned int state; + + /* + * {tx,rx}_skbs store outstanding skbuffs. The first entry in each + * array is an index into a chain of free entries. + */ + struct sk_buff *tx_skbs[TX_RING_SIZE+1]; + struct sk_buff *rx_skbs[RX_RING_SIZE+1]; +}; + +/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */ +#define ADD_ID_TO_FREELIST(_list, _id) \ + (_list)[(_id)] = (_list)[0]; \ + (_list)[0] = (void *)(unsigned long)(_id); +#define GET_ID_FROM_FREELIST(_list) \ + ({ unsigned long _id = (unsigned long)(_list)[0]; \ + (_list)[0] = (_list)[_id]; \ + (unsigned short)_id; }) + + +static void _dbg_network_int(struct net_device *dev) +{ + struct net_private *np = dev->priv; + + if ( np->state == STATE_CLOSED ) + return; + + printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x," + " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x," + " tx_event=0x%08x, state=%d\n", + np->tx_full, np->tx_resp_cons, + np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, + np->net_idx->tx_event, + test_bit(__LINK_STATE_XOFF, &dev->state)); + printk(KERN_ALERT "net: rx_resp_cons=0x%08x," + " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n", + np->rx_resp_cons, np->net_idx->rx_req_prod, + np->net_idx->rx_resp_prod, np->net_idx->rx_event); +} + + +static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs) +{ + struct list_head *ent; + struct net_private *np; + list_for_each ( ent, &dev_list ) + { + np = list_entry(ent, struct net_private, list); + _dbg_network_int(np->dev); + } +} + + +static int network_open(struct net_device *dev) +{ + struct net_private *np = dev->priv; + netop_t netop; + int i, ret; + + netop.cmd = NETOP_RESET_RINGS; + netop.vif = np->idx; + if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) + { + printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n"); + return ret; + } + + netop.cmd = NETOP_GET_VIF_INFO; + netop.vif = np->idx; + if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 ) + { + printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx); + return ret; + } + + memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); + + set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, + netop.u.get_vif_info.ring_mfn << PAGE_SHIFT); + np->net_ring = (net_ring_t *)fix_to_virt( + FIX_NETRING0_BASE + np->net_ring_fixmap_idx); + np->net_idx = &HYPERVISOR_shared_info->net_idx[np->idx]; + + np->rx_bufs_to_notify = 0; + np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0; + memset(&np->stats, 0, sizeof(np->stats)); + spin_lock_init(&np->tx_lock); + memset(np->net_ring, 0, sizeof(*np->net_ring)); + memset(np->net_idx, 0, sizeof(*np->net_idx)); + + /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ + for ( i = 0; i <= TX_RING_SIZE; i++ ) + np->tx_skbs[i] = (void *)(i+1); + for ( i = 0; i <= RX_RING_SIZE; i++ ) + np->rx_skbs[i] = (void *)(i+1); + + wmb(); + np->state = STATE_ACTIVE; + + network_alloc_rx_buffers(dev); + + netif_start_queue(dev); + + MOD_INC_USE_COUNT; + + return 0; +} + + +static void network_tx_buf_gc(struct net_device *dev) +{ + NET_RING_IDX i, prod; + unsigned short id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + tx_entry_t *tx_ring = np->net_ring->tx_ring; + + do { + prod = np->net_idx->tx_resp_prod; + + for ( i = np->tx_resp_cons; i != prod; i++ ) + { + id = tx_ring[MASK_NET_TX_IDX(i)].resp.id; + skb = np->tx_skbs[id]; + ADD_ID_TO_FREELIST(np->tx_skbs, id); + dev_kfree_skb_any(skb); + } + + np->tx_resp_cons = prod; + + /* + * Set a new event, then check for race with update of tx_cons. Note + * that it is essential to schedule a callback, no matter how few + * buffers are pending. Even if there is space in the transmit ring, + * higher layers may be blocked because too much data is outstanding: + * in such cases notification from Xen is likely to be the only kick + * that we'll get. + */ + np->net_idx->tx_event = + prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1; + mb(); + } + while ( prod != np->net_idx->tx_resp_prod ); + + if ( np->tx_full && ((np->net_idx->tx_req_prod - prod) < TX_RING_SIZE) ) + { + np->tx_full = 0; + if ( np->state == STATE_ACTIVE ) + netif_wake_queue(dev); + } +} + + +static inline pte_t *get_ppte(void *addr) +{ + pgd_t *pgd; pmd_t *pmd; pte_t *pte; + pgd = pgd_offset_k( (unsigned long)addr); + pmd = pmd_offset(pgd, (unsigned long)addr); + pte = pte_offset(pmd, (unsigned long)addr); + return pte; +} + + +static void network_alloc_rx_buffers(struct net_device *dev) +{ + unsigned short id; + struct net_private *np = dev->priv; + struct sk_buff *skb; + netop_t netop; + NET_RING_IDX i = np->net_idx->rx_req_prod; + + if ( unlikely((i - np->rx_resp_cons) == RX_RING_SIZE) || + unlikely(np->state != STATE_ACTIVE) ) + return; + + do { + skb = dev_alloc_skb(RX_BUF_SIZE); + if ( unlikely(skb == NULL) ) + break; + + skb->dev = dev; + + if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) ) + panic("alloc_skb needs to provide us page-aligned buffers."); + + id = GET_ID_FROM_FREELIST(np->rx_skbs); + np->rx_skbs[id] = skb; + + np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id = id; + np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = + virt_to_machine(get_ppte(skb->head)); + + np->rx_bufs_to_notify++; + } + while ( (++i - np->rx_resp_cons) != RX_RING_SIZE ); + + /* + * We may have allocated buffers which have entries outstanding in the page + * update queue -- make sure we flush those first! + */ + flush_page_update_queue(); + + np->net_idx->rx_req_prod = i; + np->net_idx->rx_event = np->rx_resp_cons + 1; + + /* Batch Xen notifications. */ + if ( np->rx_bufs_to_notify > (RX_RING_SIZE/4) ) + { + netop.cmd = NETOP_PUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + np->rx_bufs_to_notify = 0; + } +} + + +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + unsigned short id; + struct net_private *np = (struct net_private *)dev->priv; + tx_req_entry_t *tx; + netop_t netop; + NET_RING_IDX i; + + if ( unlikely(np->tx_full) ) + { + printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name); + netif_stop_queue(dev); + return -ENOBUFS; + } + + if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= + PAGE_SIZE) ) + { + struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE); + if ( unlikely(new_skb == NULL) ) + return 1; + skb_put(new_skb, skb->len); + memcpy(new_skb->data, skb->data, skb->len); + dev_kfree_skb(skb); + skb = new_skb; + } + + spin_lock_irq(&np->tx_lock); + + i = np->net_idx->tx_req_prod; + + id = GET_ID_FROM_FREELIST(np->tx_skbs); + np->tx_skbs[id] = skb; + + tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req; + + tx->id = id; + tx->addr = phys_to_machine(virt_to_phys(skb->data)); + tx->size = skb->len; + + wmb(); + np->net_idx->tx_req_prod = i + 1; + + network_tx_buf_gc(dev); + + if ( (i - np->tx_resp_cons) == (TX_RING_SIZE - 1) ) + { + np->tx_full = 1; + netif_stop_queue(dev); + } + + spin_unlock_irq(&np->tx_lock); + + np->stats.tx_bytes += skb->len; + np->stats.tx_packets++; + + /* Only notify Xen if there are no outstanding responses. */ + mb(); + if ( np->net_idx->tx_resp_prod == i ) + { + netop.cmd = NETOP_PUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + } + + return 0; +} + + +static inline void _network_interrupt(struct net_device *dev) +{ + struct net_private *np = dev->priv; + unsigned long flags; + struct sk_buff *skb; + rx_resp_entry_t *rx; + NET_RING_IDX i; + + if ( unlikely(np->state == STATE_CLOSED) ) + return; + + spin_lock_irqsave(&np->tx_lock, flags); + network_tx_buf_gc(dev); + spin_unlock_irqrestore(&np->tx_lock, flags); + + again: + for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ ) + { + rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp; + + skb = np->rx_skbs[rx->id]; + ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); + + if ( unlikely(rx->status != RING_STATUS_OK) ) + { + /* Gate this error. We get a (valid) slew of them on suspend. */ + if ( np->state == STATE_ACTIVE ) + printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status); + dev_kfree_skb_any(skb); + continue; + } + + /* + * Set up shinfo -- from alloc_skb This was particularily nasty: the + * shared info is hidden at the back of the data area (presumably so it + * can be shared), but on page flip it gets very spunked. + */ + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = + (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; + + skb->data = skb->tail = skb->head + rx->offset; + skb_put(skb, rx->size); + skb->protocol = eth_type_trans(skb, dev); + + np->stats.rx_packets++; + + np->stats.rx_bytes += rx->size; + netif_rx(skb); + dev->last_rx = jiffies; + } + + np->rx_resp_cons = i; + + network_alloc_rx_buffers(dev); + + /* Deal with hypervisor racing our resetting of rx_event. */ + mb(); + if ( np->net_idx->rx_resp_prod != i ) + goto again; +} + + +static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs) +{ + struct list_head *ent; + struct net_private *np; + list_for_each ( ent, &dev_list ) + { + np = list_entry(ent, struct net_private, list); + _network_interrupt(np->dev); + } +} + + +int network_close(struct net_device *dev) +{ + struct net_private *np = dev->priv; + netop_t netop; + + np->state = STATE_SUSPENDED; + wmb(); + + netif_stop_queue(np->dev); + + netop.cmd = NETOP_FLUSH_BUFFERS; + netop.vif = np->idx; + (void)HYPERVISOR_net_io_op(&netop); + + while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) || + (np->tx_resp_cons != np->net_idx->tx_req_prod) ) + { + barrier(); + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } + + wmb(); + np->state = STATE_CLOSED; + wmb(); + + /* Now no longer safe to take interrupts for this device. */ + clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx); + + MOD_DEC_USE_COUNT; + + return 0; +} + + +static struct net_device_stats *network_get_stats(struct net_device *dev) +{ + struct net_private *np = (struct net_private *)dev->priv; + return &np->stats; +} + + +/* + * This notifier is installed for domain 0 only. + * All other domains have VFR rules installed on their behalf by domain 0 + * when they are created. For bootstrap, Xen creates wildcard rules for + * domain 0 -- this notifier is used to detect when we find our proper + * IP address, so we can poke down proper rules and remove the wildcards. + */ +static int inetdev_notify(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct list_head *ent; + struct net_private *np; + int idx = -1; + network_op_t op; + + list_for_each ( ent, &dev_list ) + { + np = list_entry(dev_list.next, struct net_private, list); + if ( np->dev == dev ) + idx = np->idx; + } + + if ( idx == -1 ) + goto out; + + memset(&op, 0, sizeof(op)); + op.u.net_rule.proto = NETWORK_PROTO_ANY; + op.u.net_rule.action = NETWORK_ACTION_ACCEPT; + + if ( event == NETDEV_UP ) + op.cmd = NETWORK_OP_ADDRULE; + else if ( event == NETDEV_DOWN ) + op.cmd = NETWORK_OP_DELETERULE; + else + goto out; + + op.u.net_rule.src_dom = 0; + op.u.net_rule.src_idx = idx; + op.u.net_rule.dst_dom = VIF_SPECIAL; + op.u.net_rule.dst_idx = VIF_PHYSICAL_INTERFACE; + op.u.net_rule.src_addr = ntohl(ifa->ifa_address); + op.u.net_rule.src_addr_mask = ~0UL; + op.u.net_rule.dst_addr = 0; + op.u.net_rule.dst_addr_mask = 0; + (void)HYPERVISOR_network_op(&op); + + op.u.net_rule.src_dom = VIF_SPECIAL; + op.u.net_rule.src_idx = VIF_ANY_INTERFACE; + op.u.net_rule.dst_dom = 0; + op.u.net_rule.dst_idx = idx; + op.u.net_rule.src_addr = 0; + op.u.net_rule.src_addr_mask = 0; + op.u.net_rule.dst_addr = ntohl(ifa->ifa_address); + op.u.net_rule.dst_addr_mask = ~0UL; + (void)HYPERVISOR_network_op(&op); + + out: + return NOTIFY_DONE; +} + +static struct notifier_block notifier_inetdev = { + .notifier_call = inetdev_notify, + .next = NULL, + .priority = 0 +}; + + +int __init init_module(void) +{ + int i, fixmap_idx=-1, err; + struct net_device *dev; + struct net_private *np; + netop_t netop; + + INIT_LIST_HEAD(&dev_list); + + /* + * Domain 0 must poke its own network rules as it discovers its IP + * addresses. All other domains have a privileged "parent" to do this for + * them at start of day. + */ + if ( start_info.flags & SIF_INITDOMAIN ) + (void)register_inetaddr_notifier(¬ifier_inetdev); + + err = request_irq(NET_IRQ, network_interrupt, + SA_SAMPLE_RANDOM, "network", NULL); + if ( err ) + { + printk(KERN_WARNING "Could not allocate network interrupt\n"); + goto fail; + } + + err = request_irq(_EVENT_DEBUG, dbg_network_int, SA_SHIRQ, "net_dbg", + &dbg_network_int); + if ( err ) + printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); + + for ( i = 0; i < MAX_DOMAIN_VIFS; i++ ) + { + /* If the VIF is invalid then the query hypercall will fail. */ + netop.cmd = NETOP_GET_VIF_INFO; + netop.vif = i; + if ( HYPERVISOR_net_io_op(&netop) != 0 ) + continue; + + /* We actually only support up to 4 vifs right now. */ + if ( ++fixmap_idx == 4 ) + break; + + dev = alloc_etherdev(sizeof(struct net_private)); + if ( dev == NULL ) + { + err = -ENOMEM; + goto fail; + } + + np = dev->priv; + np->state = STATE_CLOSED; + np->net_ring_fixmap_idx = fixmap_idx; + np->idx = i; + + SET_MODULE_OWNER(dev); + dev->open = network_open; + dev->hard_start_xmit = network_start_xmit; + dev->stop = network_close; + dev->get_stats = network_get_stats; + + memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN); + + if ( (err = register_netdev(dev)) != 0 ) + { + kfree(dev); + goto fail; + } + + np->dev = dev; + list_add(&np->list, &dev_list); + } + + return 0; + + fail: + cleanup_module(); + return err; +} + + +static void cleanup_module(void) +{ + struct net_private *np; + struct net_device *dev; + + while ( !list_empty(&dev_list) ) + { + np = list_entry(dev_list.next, struct net_private, list); + list_del(&np->list); + dev = np->dev; + unregister_netdev(dev); + kfree(dev); + } + + if ( start_info.flags & SIF_INITDOMAIN ) + (void)unregister_inetaddr_notifier(¬ifier_inetdev); +} + + +module_init(init_module); +module_exit(cleanup_module); |