diff options
author | kaf24@labyrinth.cl.cam.ac.uk <kaf24@labyrinth.cl.cam.ac.uk> | 2003-02-24 14:19:58 +0000 |
---|---|---|
committer | kaf24@labyrinth.cl.cam.ac.uk <kaf24@labyrinth.cl.cam.ac.uk> | 2003-02-24 14:19:58 +0000 |
commit | 0c876caf68d82f2ccc626a6e5b2e66c785783e42 (patch) | |
tree | d79e18e7282ac90b4248b5156415565503cfcb8f /xen-2.4.16 | |
parent | 0dc9ba5c3a168b9ae1573a97bd20fe1388f3c83d (diff) | |
download | xen-0c876caf68d82f2ccc626a6e5b2e66c785783e42.tar.gz xen-0c876caf68d82f2ccc626a6e5b2e66c785783e42.tar.bz2 xen-0c876caf68d82f2ccc626a6e5b2e66c785783e42.zip |
bitkeeper revision 1.90 (3e5a2a0eLNdNDmT58mo-G1hnrOmMig)
Many files:
More updates to network tx path. New scheduler. More code ripped out. Nearly finished now.
Diffstat (limited to 'xen-2.4.16')
-rw-r--r-- | xen-2.4.16/common/network.c | 24 | ||||
-rw-r--r-- | xen-2.4.16/common/softirq.c | 6 | ||||
-rw-r--r-- | xen-2.4.16/drivers/block/xen_block.c | 12 | ||||
-rw-r--r-- | xen-2.4.16/include/hypervisor-ifs/network.h | 12 | ||||
-rw-r--r-- | xen-2.4.16/include/xeno/interrupt.h | 8 | ||||
-rw-r--r-- | xen-2.4.16/include/xeno/netdevice.h | 77 | ||||
-rw-r--r-- | xen-2.4.16/include/xeno/skbuff.h | 979 | ||||
-rw-r--r-- | xen-2.4.16/include/xeno/vif.h | 24 | ||||
-rw-r--r-- | xen-2.4.16/net/dev.c | 533 | ||||
-rw-r--r-- | xen-2.4.16/net/devinit.c | 5 | ||||
-rw-r--r-- | xen-2.4.16/net/skbuff.c | 1355 |
11 files changed, 641 insertions, 2394 deletions
diff --git a/xen-2.4.16/common/network.c b/xen-2.4.16/common/network.c index 84b201f94c..f761ca9ba2 100644 --- a/xen-2.4.16/common/network.c +++ b/xen-2.4.16/common/network.c @@ -81,9 +81,9 @@ net_vif_t *create_net_vif(int domain) new_vif->net_ring = new_ring; new_vif->shadow_ring = shadow_ring; - - skb_queue_head_init(&new_vif->skb_list); - new_vif->domain = domain; + new_vif->domain = find_domain_by_id(domain); + + new_vif->list.next = NULL; write_lock(&sys_vif_lock); new_vif->id = sys_vif_count; @@ -114,16 +114,11 @@ fail: void destroy_net_vif(struct task_struct *p) { - struct sk_buff *skb; int i; if ( p->num_net_vifs <= 0 ) return; // nothing to do. i = --p->num_net_vifs; - while ( (skb = skb_dequeue(&p->net_vif_list[i]->skb_list)) != NULL ) - { - kfree_skb(skb); - } write_lock(&sys_vif_lock); sys_vif_list[p->net_vif_list[i]->id] = NULL; // system vif list not gc'ed @@ -145,17 +140,12 @@ void vif_query(vif_query_t *vq) char buf[128]; int i; - if ( !(dom_task = find_domain_by_id(vq->domain)) ) - { - return; - } + if ( !(dom_task = find_domain_by_id(vq->domain)) ) return; *buf = '\0'; - for (i=0; i < dom_task->num_net_vifs; i++) - { + for ( i = 0; i < dom_task->num_net_vifs; i++ ) sprintf(buf + strlen(buf), "%d\n", dom_task->net_vif_list[i]->id); - } copy_to_user(vq->buf, buf, strlen(buf) + 1); @@ -171,12 +161,12 @@ void print_vif_list() net_vif_t *v; printk("Currently, there are %d VIFs.\n", sys_vif_count); - for (i=0; i<sys_vif_count; i++) + for ( i = 0; i<sys_vif_count; i++ ) { v = sys_vif_list[i]; printk("] VIF Entry %d(%d):\n", i, v->id); printk(" > net_ring*: %p\n", v->net_ring); - printk(" > domain : %u\n", v->domain); + printk(" > domain : %u\n", v->domain->domain); } } diff --git a/xen-2.4.16/common/softirq.c b/xen-2.4.16/common/softirq.c index 35932711ec..b98c47f3ce 100644 --- a/xen-2.4.16/common/softirq.c +++ b/xen-2.4.16/common/softirq.c @@ -166,10 +166,9 @@ static void tasklet_action(struct softirq_action *a) if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) BUG(); t->func(t->data); - tasklet_unlock(t); - continue; } tasklet_unlock(t); + continue; } local_irq_disable(); @@ -200,10 +199,9 @@ static void tasklet_hi_action(struct softirq_action *a) if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) BUG(); t->func(t->data); - tasklet_unlock(t); - continue; } tasklet_unlock(t); + continue; } local_irq_disable(); diff --git a/xen-2.4.16/drivers/block/xen_block.c b/xen-2.4.16/drivers/block/xen_block.c index bf70737dab..f96ee0577d 100644 --- a/xen-2.4.16/drivers/block/xen_block.c +++ b/xen-2.4.16/drivers/block/xen_block.c @@ -70,18 +70,6 @@ static void remove_from_blkdev_list(struct task_struct *p) spin_unlock_irqrestore(&io_schedule_list_lock, flags); } -static void add_to_blkdev_list(struct task_struct *p) -{ - unsigned long flags; - if ( __on_blkdev_list(p) ) return; - spin_lock_irqsave(&io_schedule_list_lock, flags); - if ( !__on_blkdev_list(p) ) - { - list_add(&p->blkdev_list, &io_schedule_list); - } - spin_unlock_irqrestore(&io_schedule_list_lock, flags); -} - static void add_to_blkdev_list_tail(struct task_struct *p) { unsigned long flags; diff --git a/xen-2.4.16/include/hypervisor-ifs/network.h b/xen-2.4.16/include/hypervisor-ifs/network.h index 28cb94618c..1e4e7e1c53 100644 --- a/xen-2.4.16/include/hypervisor-ifs/network.h +++ b/xen-2.4.16/include/hypervisor-ifs/network.h @@ -15,15 +15,15 @@ #include <linux/types.h> typedef struct tx_entry_st { - unsigned long addr; /* virtual address */ - unsigned long size; /* in bytes */ - int status; /* per descriptor status. */ + unsigned long addr; /* machine address of packet */ + unsigned short size; /* in bytes */ + unsigned short status; /* per descriptor status. */ } tx_entry_t; typedef struct rx_entry_st { - unsigned long addr; /* virtual address */ - unsigned long size; /* in bytes */ - int status; /* per descriptor status. */ + unsigned long addr; /* machine address of PTE to swizzle */ + unsigned short size; /* in bytes */ + unsigned short status; /* per descriptor status. */ } rx_entry_t; #define TX_RING_SIZE 256 diff --git a/xen-2.4.16/include/xeno/interrupt.h b/xen-2.4.16/include/xeno/interrupt.h index b018eb3de7..488809b99a 100644 --- a/xen-2.4.16/include/xeno/interrupt.h +++ b/xen-2.4.16/include/xeno/interrupt.h @@ -172,13 +172,17 @@ static inline void tasklet_disable(struct tasklet_struct *t) static inline void tasklet_enable(struct tasklet_struct *t) { smp_mb__before_atomic_dec(); - atomic_dec(&t->count); + if (atomic_dec_and_test(&t->count) && + test_bit(TASKLET_STATE_SCHED, &t->state)) + __tasklet_schedule(t); } static inline void tasklet_hi_enable(struct tasklet_struct *t) { smp_mb__before_atomic_dec(); - atomic_dec(&t->count); + if (atomic_dec_and_test(&t->count) && + test_bit(TASKLET_STATE_SCHED, &t->state)) + __tasklet_hi_schedule(t); } extern void tasklet_kill(struct tasklet_struct *t); diff --git a/xen-2.4.16/include/xeno/netdevice.h b/xen-2.4.16/include/xeno/netdevice.h index 03169af14b..0d7c4c5606 100644 --- a/xen-2.4.16/include/xeno/netdevice.h +++ b/xen-2.4.16/include/xeno/netdevice.h @@ -38,29 +38,11 @@ #ifdef __KERNEL__ #include <xeno/config.h> -struct divert_blk; struct vlan_group; -#define HAVE_ALLOC_NETDEV /* feature macro: alloc_xxxdev - functions are available. */ - -#define NET_XMIT_SUCCESS 0 -#define NET_XMIT_DROP 1 /* skb dropped */ -#define NET_XMIT_CN 2 /* congestion notification */ -#define NET_XMIT_POLICED 3 /* skb is shot by police */ -#define NET_XMIT_BYPASS 4 /* packet does not leave via dequeue; - (TC use only - dev_queue_xmit - returns this as NET_XMIT_SUCCESS) */ - /* Backlog congestion levels */ #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ #define NET_RX_DROP 1 /* packet dropped */ -#define NET_RX_CN_LOW 2 /* storm alert, just in case */ -#define NET_RX_CN_MOD 3 /* Storm on its way! */ -#define NET_RX_CN_HIGH 4 /* The storm is here */ -#define NET_RX_BAD 5 /* packet dropped due to kernel error */ - -#define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0) #endif @@ -182,7 +164,6 @@ enum netdev_state_t __LINK_STATE_XOFF=0, __LINK_STATE_START, __LINK_STATE_PRESENT, - __LINK_STATE_SCHED, __LINK_STATE_NOCARRIER }; @@ -409,7 +390,6 @@ extern struct net_device *dev_alloc(const char *name, int *err); extern int dev_alloc_name(struct net_device *dev, const char *name); extern int dev_open(struct net_device *dev); extern int dev_close(struct net_device *dev); -extern int dev_queue_xmit(struct sk_buff *skb); extern int register_netdevice(struct net_device *dev); extern int unregister_netdevice(struct net_device *dev); extern void dev_shutdown(struct net_device *dev); @@ -430,30 +410,14 @@ static inline int unregister_gifconf(unsigned int family) extern struct tasklet_struct net_tx_tasklet; - -struct softnet_data -{ - struct net_device *output_queue; - struct sk_buff *completion_queue; -} __attribute__((__aligned__(SMP_CACHE_BYTES))); - - -extern struct softnet_data softnet_data[NR_CPUS]; +extern struct list_head net_schedule_list; +extern spinlock_t net_schedule_list_lock; #define HAVE_NETIF_QUEUE static inline void __netif_schedule(struct net_device *dev) { - if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { - unsigned long flags; - int cpu = smp_processor_id(); - - local_irq_save(flags); - dev->next_sched = softnet_data[cpu].output_queue; - softnet_data[cpu].output_queue = dev; - tasklet_schedule(&net_tx_tasklet); - local_irq_restore(flags); - } + tasklet_schedule(&net_tx_tasklet); } static inline void netif_schedule(struct net_device *dev) @@ -488,41 +452,18 @@ static inline int netif_running(struct net_device *dev) return test_bit(__LINK_STATE_START, &dev->state); } -/* Use this variant when it is known for sure that it - * is executing from interrupt context. - */ -static inline void dev_kfree_skb_irq(struct sk_buff *skb) -{ - if (atomic_dec_and_test(&skb->users)) { - int cpu = smp_processor_id(); - unsigned long flags; - - local_irq_save(flags); - skb->next = softnet_data[cpu].completion_queue; - softnet_data[cpu].completion_queue = skb; - tasklet_schedule(&net_tx_tasklet); - local_irq_restore(flags); - } -} -/* Use this variant in places where it could be invoked - * either from interrupt or non-interrupt context. +/* + * Xen does not need deferred skb freeing, as all destructor hook functions + * are IRQ safe. Linux needed more care for some destructors... */ -static inline void dev_kfree_skb_any(struct sk_buff *skb) -{ - if (in_irq()) - dev_kfree_skb_irq(skb); - else - dev_kfree_skb(skb); -} +#define dev_kfree_skb_irq(_skb) dev_kfree_skb(_skb) +#define dev_kfree_skb_any(_skb) dev_kfree_skb(_skb) extern void net_call_rx_atomic(void (*fn)(void)); -#define HAVE_NETIF_RX 1 extern int netif_rx(struct sk_buff *skb); extern int dev_ioctl(unsigned int cmd, void *); extern int dev_change_flags(struct net_device *, unsigned); -extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); - extern void dev_init(void); extern int netdev_nit; @@ -654,9 +595,7 @@ extern void dev_load(const char *name); extern void dev_mcast_init(void); extern int netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev)); extern void netdev_unregister_fc(int bit); -extern int netdev_max_backlog; extern unsigned long netdev_fc_xoff; -extern atomic_t netdev_dropping; extern int netdev_set_master(struct net_device *dev, struct net_device *master); extern struct sk_buff * skb_checksum_help(struct sk_buff *skb); diff --git a/xen-2.4.16/include/xeno/skbuff.h b/xen-2.4.16/include/xeno/skbuff.h index 5be56a7cc7..519328a679 100644 --- a/xen-2.4.16/include/xeno/skbuff.h +++ b/xen-2.4.16/include/xeno/skbuff.h @@ -19,21 +19,23 @@ #include <linux/time.h> #include <linux/timer.h> #include <linux/cache.h> - +#include <linux/slab.h> +#include <asm/system.h> #include <asm/atomic.h> #include <asm/types.h> #include <linux/spinlock.h> #include <linux/mm.h> -// vif special values. +/* vif special values */ #define VIF_PHYSICAL_INTERFACE -1 #define VIF_UNKNOWN_INTERFACE -2 #define VIF_DROP -3 #define VIF_ANY_INTERFACE -4 -//skb_type values: -#define SKB_NORMAL 0 -#define SKB_ZERO_COPY 1 +/* skb_type values */ +#define SKB_NORMAL 0 /* A Linux-style skbuff: no strangeness */ +#define SKB_ZERO_COPY 1 /* Zero copy skbs are used for receive */ +#define SKB_NODATA 2 /* Data allocation not handled by us */ #define HAVE_ALLOC_SKB /* For the drivers to know */ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ @@ -44,9 +46,6 @@ #define CHECKSUM_UNNECESSARY 2 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1)) -#define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1)) -#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X),0)) -#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0,2)) /* A. Checksumming of received packets by device. * @@ -82,8 +81,6 @@ * TCP/UDP over IPv4. Sigh. Vendors like this * way by an unknown reason. Though, see comment above * about CHECKSUM_UNNECESSARY. 8) - * - * Any questions? No questions, good. --ANK */ #ifdef __i386__ @@ -92,171 +89,98 @@ #define NET_CALLER(arg) __builtin_return_address(0) #endif -#ifdef CONFIG_NETFILTER -struct nf_conntrack { - atomic_t use; - void (*destroy)(struct nf_conntrack *); -}; - -struct nf_ct_info { - struct nf_conntrack *master; -}; -#endif - struct sk_buff_head { - /* These two members must be first. */ - struct sk_buff * next; - struct sk_buff * prev; + /* These two members must be first. */ + struct sk_buff * next; + struct sk_buff * prev; - __u32 qlen; - spinlock_t lock; + __u32 qlen; + spinlock_t lock; }; -struct sk_buff; - -#define MAX_SKB_FRAGS 6 +#define MAX_SKB_FRAGS 1 /* KAF: was 6 */ -typedef struct skb_frag_struct skb_frag_t; +typedef struct skb_frag_struct { + struct pfn_info *page; + __u16 page_offset; + __u16 size; +} skb_frag_t; -struct skb_frag_struct -{ - struct pfn_info *page; - __u16 page_offset; - __u16 size; -}; - -/* This data is invariant across clones and lives at - * the end of the header data, ie. at skb->end. - */ struct skb_shared_info { - atomic_t dataref; - unsigned int nr_frags; - struct sk_buff *frag_list; - skb_frag_t frags[MAX_SKB_FRAGS]; + unsigned int nr_frags; + skb_frag_t frags[MAX_SKB_FRAGS]; }; struct sk_buff { - /* These two members must be first. */ - struct sk_buff * next; /* Next buffer in list */ - struct sk_buff * prev; /* Previous buffer in list */ - - struct sk_buff_head * list; /* List we are on */ - struct net_device *dev; /* Device we arrived on/are leaving by */ - - /* Transport layer header */ - union - { - struct tcphdr *th; - struct udphdr *uh; - struct icmphdr *icmph; - struct igmphdr *igmph; - struct iphdr *ipiph; - struct spxhdr *spxh; - unsigned char *raw; - } h; - - /* Network layer header */ - union - { - struct iphdr *iph; - struct ipv6hdr *ipv6h; - struct arphdr *arph; - struct ipxhdr *ipxh; - unsigned char *raw; - } nh; + /* These two members must be first. */ + struct sk_buff * next; /* Next buffer in list */ + struct sk_buff * prev; /* Previous buffer in list */ + + struct sk_buff_head * list; /* List we are on */ + struct net_device *dev; /* Device we arrived on/are leaving by */ + + /* Transport layer header */ + union + { + struct tcphdr *th; + struct udphdr *uh; + struct icmphdr *icmph; + struct igmphdr *igmph; + struct iphdr *ipiph; + struct spxhdr *spxh; + unsigned char *raw; + } h; + + /* Network layer header */ + union + { + struct iphdr *iph; + struct ipv6hdr *ipv6h; + struct arphdr *arph; + struct ipxhdr *ipxh; + unsigned char *raw; + } nh; - /* Link layer header */ - union - { - struct ethhdr *ethernet; - unsigned char *raw; - } mac; - - unsigned int len; /* Length of actual data */ - unsigned int data_len; - unsigned int csum; /* Checksum */ - unsigned char skb_type, - cloned, /* head may be cloned (check refcnt to be sure) */ - pkt_type, /* Packet class */ - ip_summed; /* Driver fed us an IP checksum */ - atomic_t users; /* User count - see datagram.c,tcp.c */ - unsigned short protocol; /* Packet protocol from driver. */ - unsigned char *head; /* Head of buffer */ - unsigned char *data; /* Data head pointer */ - unsigned char *tail; /* Tail pointer */ - unsigned char *end; /* End pointer */ - - void (*destructor)(struct sk_buff *); /* Destruct function */ - struct pfn_info *pf; /* record of physical pf address for freeing */ - int src_vif; /* vif we came from */ - int dst_vif; /* vif we are bound for */ - struct skb_shared_info shinfo; /* shared info is no longer shared in Xen. */ + /* Link layer header */ + union + { + struct ethhdr *ethernet; + unsigned char *raw; + } mac; + + unsigned int len; /* Length of actual data */ + unsigned int data_len; + unsigned int csum; /* Checksum */ + unsigned char skb_type, + pkt_type, /* Packet class */ + ip_summed; /* Driver fed us an IP checksum */ + unsigned short protocol; /* Packet protocol from driver. */ + unsigned char *head; /* Head of buffer */ + unsigned char *data; /* Data head pointer */ + unsigned char *tail; /* Tail pointer */ + unsigned char *end; /* End pointer */ + + void (*destructor)(struct sk_buff *); /* Destruct function */ + struct pfn_info *pf; /* record of physical pf address for freeing */ + int src_vif; /* vif we came from */ + int dst_vif; /* vif we are bound for */ + struct skb_shared_info shinfo; /* shared info is no longer shared in Xen. */ }; -#define SK_WMEM_MAX 65535 -#define SK_RMEM_MAX 65535 - -#ifdef __KERNEL__ -/* - * Handling routines are only of interest to the kernel - */ -#include <linux/slab.h> - -#include <asm/system.h> - -extern void __kfree_skb(struct sk_buff *skb); -extern struct sk_buff * alloc_skb(unsigned int size, int priority); -extern struct sk_buff * alloc_zc_skb(unsigned int size, int priority); -extern void kfree_skbmem(struct sk_buff *skb); -extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); -extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); -extern struct sk_buff * pskb_copy(struct sk_buff *skb, int gfp_mask); -extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask); -extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); -extern struct sk_buff * skb_copy_expand(const struct sk_buff *skb, - int newheadroom, - int newtailroom, - int priority); +extern void __kfree_skb(struct sk_buff *skb); +extern struct sk_buff *alloc_skb(unsigned int size, int priority); +extern struct sk_buff *alloc_skb_nodata(int priority); +extern struct sk_buff *alloc_zc_skb(unsigned int size, int priority); +extern void kfree_skbmem(struct sk_buff *skb); +extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority); #define dev_kfree_skb(a) kfree_skb(a) extern void skb_over_panic(struct sk_buff *skb, int len, void *here); extern void skb_under_panic(struct sk_buff *skb, int len, void *here); -/* Internal */ -//#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) +/* In Xen, we don't clone skbs, so shared data can go in the sk_buff struct. */ #define skb_shinfo(SKB) ((struct skb_shared_info *)(&(SKB)->shinfo)) /** - * skb_queue_empty - check if a queue is empty - * @list: queue head - * - * Returns true if the queue is empty, false otherwise. - */ - -static inline int skb_queue_empty(struct sk_buff_head *list) -{ - return (list->next == (struct sk_buff *) list); -} - -/** - * skb_get - reference buffer - * @skb: buffer to reference - * - * Makes another reference to a socket buffer and returns a pointer - * to the buffer. - */ - -static inline struct sk_buff *skb_get(struct sk_buff *skb) -{ - atomic_inc(&skb->users); - return skb; -} - -/* - * If users==1, we are the only owner and are can avoid redundant - * atomic change. - */ - -/** * kfree_skb - free an sk_buff * @skb: buffer to free * @@ -266,143 +190,7 @@ static inline struct sk_buff *skb_get(struct sk_buff *skb) static inline void kfree_skb(struct sk_buff *skb) { - if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) - __kfree_skb(skb); -} - -/* Use this if you didn't touch the skb state [for fast switching] */ -static inline void kfree_skb_fast(struct sk_buff *skb) -{ - if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) - kfree_skbmem(skb); -} - -/** - * skb_cloned - is the buffer a clone - * @skb: buffer to check - * - * Returns true if the buffer was generated with skb_clone() and is - * one of multiple shared copies of the buffer. Cloned buffers are - * shared data so must not be written to under normal circumstances. - */ - -static inline int skb_cloned(struct sk_buff *skb) -{ - return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1; -} - -/** - * skb_shared - is the buffer shared - * @skb: buffer to check - * - * Returns true if more than one person has a reference to this - * buffer. - */ - -static inline int skb_shared(struct sk_buff *skb) -{ - return (atomic_read(&skb->users) != 1); -} - -/** - * skb_share_check - check if buffer is shared and if so clone it - * @skb: buffer to check - * @pri: priority for memory allocation - * - * If the buffer is shared the buffer is cloned and the old copy - * drops a reference. A new clone with a single reference is returned. - * If the buffer is not shared the original buffer is returned. When - * being called from interrupt status or with spinlocks held pri must - * be GFP_ATOMIC. - * - * NULL is returned on a memory allocation failure. - */ - -static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) -{ - if (skb_shared(skb)) { - struct sk_buff *nskb; - nskb = skb_clone(skb, pri); - kfree_skb(skb); - return nskb; - } - return skb; -} - - -/* - * Copy shared buffers into a new sk_buff. We effectively do COW on - * packets to handle cases where we have a local reader and forward - * and a couple of other messy ones. The normal one is tcpdumping - * a packet thats being forwarded. - */ - -/** - * skb_unshare - make a copy of a shared buffer - * @skb: buffer to check - * @pri: priority for memory allocation - * - * If the socket buffer is a clone then this function creates a new - * copy of the data, drops a reference count on the old copy and returns - * the new copy with the reference count at 1. If the buffer is not a clone - * the original buffer is returned. When called with a spinlock held or - * from interrupt state @pri must be %GFP_ATOMIC - * - * %NULL is returned on a memory allocation failure. - */ - -static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) -{ - struct sk_buff *nskb; - if(!skb_cloned(skb)) - return skb; - nskb=skb_copy(skb, pri); - kfree_skb(skb); /* Free our shared copy */ - return nskb; -} - -/** - * skb_peek - * @list_: list to peek at - * - * Peek an &sk_buff. Unlike most other operations you _MUST_ - * be careful with this one. A peek leaves the buffer on the - * list and someone else may run off with it. You must hold - * the appropriate locks or have a private queue to do this. - * - * Returns %NULL for an empty list or a pointer to the head element. - * The reference count is not incremented and the reference is therefore - * volatile. Use with caution. - */ - -static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) -{ - struct sk_buff *list = ((struct sk_buff *)list_)->next; - if (list == (struct sk_buff *)list_) - list = NULL; - return list; -} - -/** - * skb_peek_tail - * @list_: list to peek at - * - * Peek an &sk_buff. Unlike most other operations you _MUST_ - * be careful with this one. A peek leaves the buffer on the - * list and someone else may run off with it. You must hold - * the appropriate locks or have a private queue to do this. - * - * Returns %NULL for an empty list or a pointer to the tail element. - * The reference count is not incremented and the reference is therefore - * volatile. Use with caution. - */ - -static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) -{ - struct sk_buff *list = ((struct sk_buff *)list_)->prev; - if (list == (struct sk_buff *)list_) - list = NULL; - return list; + __kfree_skb(skb); } /** @@ -414,24 +202,17 @@ static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) static inline __u32 skb_queue_len(struct sk_buff_head *list_) { - return(list_->qlen); + return(list_->qlen); } static inline void skb_queue_head_init(struct sk_buff_head *list) { - spin_lock_init(&list->lock); - list->prev = (struct sk_buff *)list; - list->next = (struct sk_buff *)list; - list->qlen = 0; + spin_lock_init(&list->lock); + list->prev = (struct sk_buff *)list; + list->next = (struct sk_buff *)list; + list->qlen = 0; } -/* - * Insert an sk_buff at the start of a list. - * - * The "__skb_xxxx()" functions are the non-atomic ones that - * can only be called with interrupts disabled. - */ - /** * __skb_queue_head - queue a buffer at the list head * @list: list to use @@ -445,85 +226,16 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) { - struct sk_buff *prev, *next; + struct sk_buff *prev, *next; - newsk->list = list; - list->qlen++; - prev = (struct sk_buff *)list; - next = prev->next; - newsk->next = next; - newsk->prev = prev; - next->prev = newsk; - prev->next = newsk; -} - - -/** - * skb_queue_head - queue a buffer at the list head - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the start of the list. This function takes the - * list lock and can be used safely with other locking &sk_buff functions - * safely. - * - * A buffer cannot be placed on two lists at the same time. - */ - -static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_head(list, newsk); - spin_unlock_irqrestore(&list->lock, flags); -} - -/** - * __skb_queue_tail - queue a buffer at the list tail - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the end of a list. This function takes no locks - * and you must therefore hold required locks before calling it. - * - * A buffer cannot be placed on two lists at the same time. - */ - - -static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) -{ - struct sk_buff *prev, *next; - - newsk->list = list; - list->qlen++; - next = (struct sk_buff *)list; - prev = next->prev; - newsk->next = next; - newsk->prev = prev; - next->prev = newsk; - prev->next = newsk; -} - -/** - * skb_queue_tail - queue a buffer at the list tail - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the tail of the list. This function takes the - * list lock and can be used safely with other locking &sk_buff functions - * safely. - * - * A buffer cannot be placed on two lists at the same time. - */ - -static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_tail(list, newsk); - spin_unlock_irqrestore(&list->lock, flags); + newsk->list = list; + list->qlen++; + prev = (struct sk_buff *)list; + next = prev->next; + newsk->next = next; + newsk->prev = prev; + next->prev = newsk; + prev->next = newsk; } /** @@ -537,205 +249,29 @@ static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *new static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) { - struct sk_buff *next, *prev, *result; - - prev = (struct sk_buff *) list; - next = prev->next; - result = NULL; - if (next != prev) { - result = next; - next = next->next; - list->qlen--; - next->prev = prev; - prev->next = next; - result->next = NULL; - result->prev = NULL; - result->list = NULL; - } - return result; -} - -/** - * skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. The list lock is taken so the function - * may be used safely with other locking list functions. The head item is - * returned or %NULL if the list is empty. - */ - -static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list) -{ - long flags; - struct sk_buff *result; - - spin_lock_irqsave(&list->lock, flags); - result = __skb_dequeue(list); - spin_unlock_irqrestore(&list->lock, flags); - return result; -} - -/* - * Insert a packet on a list. - */ - -static inline void __skb_insert(struct sk_buff *newsk, - struct sk_buff * prev, struct sk_buff *next, - struct sk_buff_head * list) -{ - newsk->next = next; - newsk->prev = prev; - next->prev = newsk; - prev->next = newsk; - newsk->list = list; - list->qlen++; -} - -/** - * skb_insert - insert a buffer - * @old: buffer to insert before - * @newsk: buffer to insert - * - * Place a packet before a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls - * A buffer cannot be placed on two lists at the same time. - */ - -static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&old->list->lock, flags); - __skb_insert(newsk, old->prev, old, old->list); - spin_unlock_irqrestore(&old->list->lock, flags); -} + struct sk_buff *next, *prev, *result; -/* - * Place a packet after a given packet in a list. - */ - -static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) -{ - __skb_insert(newsk, old, old->next, old->list); -} - -/** - * skb_append - append a buffer - * @old: buffer to insert after - * @newsk: buffer to insert - * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls. - * A buffer cannot be placed on two lists at the same time. - */ - - -static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&old->list->lock, flags); - __skb_append(old, newsk); - spin_unlock_irqrestore(&old->list->lock, flags); -} - -/* - * remove sk_buff from list. _Must_ be called atomically, and with - * the list known.. - */ - -static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) -{ - struct sk_buff * next, * prev; - - list->qlen--; - next = skb->next; - prev = skb->prev; - skb->next = NULL; - skb->prev = NULL; - skb->list = NULL; - next->prev = prev; - prev->next = next; -} - -/** - * skb_unlink - remove a buffer from a list - * @skb: buffer to remove - * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls - * - * Works even without knowing the list it is sitting on, which can be - * handy at times. It also means that THE LIST MUST EXIST when you - * unlink. Thus a list must have its contents unlinked before it is - * destroyed. - */ - -static inline void skb_unlink(struct sk_buff *skb) -{ - struct sk_buff_head *list = skb->list; - - if(list) { - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - if(skb->list == list) - __skb_unlink(skb, skb->list); - spin_unlock_irqrestore(&list->lock, flags); - } -} - -/* XXX: more streamlined implementation */ - -/** - * __skb_dequeue_tail - remove from the tail of the queue - * @list: list to dequeue from - * - * Remove the tail of the list. This function does not take any locks - * so must be used with appropriate locks held only. The tail item is - * returned or %NULL if the list is empty. - */ - -static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) -{ - struct sk_buff *skb = skb_peek_tail(list); - if (skb) - __skb_unlink(skb, list); - return skb; -} - -/** - * skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. The list lock is taken so the function - * may be used safely with other locking list functions. The tail item is - * returned or %NULL if the list is empty. - */ - -static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) -{ - long flags; - struct sk_buff *result; - - spin_lock_irqsave(&list->lock, flags); - result = __skb_dequeue_tail(list); - spin_unlock_irqrestore(&list->lock, flags); - return result; + prev = (struct sk_buff *) list; + next = prev->next; + result = NULL; + if (next != prev) { + result = next; + next = next->next; + list->qlen--; + next->prev = prev; + prev->next = next; + result->next = NULL; + result->prev = NULL; + result->list = NULL; + } + return result; } static inline int skb_is_nonlinear(const struct sk_buff *skb) { - return skb->data_len; -} - -static inline int skb_headlen(const struct sk_buff *skb) -{ - return skb->len - skb->data_len; + return skb->data_len; } -#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) BUG(); } while (0) -#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) BUG(); } while (0) #define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) BUG(); } while (0) /* @@ -744,11 +280,11 @@ static inline int skb_headlen(const struct sk_buff *skb) static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) { - unsigned char *tmp=skb->tail; - SKB_LINEAR_ASSERT(skb); - skb->tail+=len; - skb->len+=len; - return tmp; + unsigned char *tmp=skb->tail; + SKB_LINEAR_ASSERT(skb); + skb->tail+=len; + skb->len+=len; + return tmp; } /** @@ -763,21 +299,21 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) { - unsigned char *tmp=skb->tail; - SKB_LINEAR_ASSERT(skb); - skb->tail+=len; - skb->len+=len; - if(skb->tail>skb->end) { - skb_over_panic(skb, len, current_text_addr()); - } - return tmp; + unsigned char *tmp=skb->tail; + SKB_LINEAR_ASSERT(skb); + skb->tail+=len; + skb->len+=len; + if(skb->tail>skb->end) { + skb_over_panic(skb, len, current_text_addr()); + } + return tmp; } static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) { - skb->data-=len; - skb->len+=len; - return skb->data; + skb->data-=len; + skb->len+=len; + return skb->data; } /** @@ -792,20 +328,20 @@ static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) { - skb->data-=len; - skb->len+=len; - if(skb->data<skb->head) { - skb_under_panic(skb, len, current_text_addr()); - } - return skb->data; + skb->data-=len; + skb->len+=len; + if(skb->data<skb->head) { + skb_under_panic(skb, len, current_text_addr()); + } + return skb->data; } static inline char *__skb_pull(struct sk_buff *skb, unsigned int len) { - skb->len-=len; - if (skb->len < skb->data_len) - BUG(); - return skb->data+=len; + skb->len-=len; + if (skb->len < skb->data_len) + BUG(); + return skb->data+=len; } /** @@ -821,60 +357,9 @@ static inline char *__skb_pull(struct sk_buff *skb, unsigned int len) static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len) { - if (len > skb->len) - return NULL; - return __skb_pull(skb,len); -} - -extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta); - -static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len) -{ - if (len > skb_headlen(skb) && - __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL) - return NULL; - skb->len -= len; - return skb->data += len; -} - -static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len) -{ - if (len > skb->len) - return NULL; - return __pskb_pull(skb,len); -} - -static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) -{ - if (len <= skb_headlen(skb)) - return 1; - if (len > skb->len) - return 0; - return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL); -} - -/** - * skb_headroom - bytes at buffer head - * @skb: buffer to check - * - * Return the number of bytes of free space at the head of an &sk_buff. - */ - -static inline int skb_headroom(const struct sk_buff *skb) -{ - return skb->data-skb->head; -} - -/** - * skb_tailroom - bytes at buffer end - * @skb: buffer to check - * - * Return the number of bytes of free space at the tail of an sk_buff - */ - -static inline int skb_tailroom(const struct sk_buff *skb) -{ - return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail; + if (len > skb->len) + return NULL; + return __skb_pull(skb,len); } /** @@ -888,106 +373,8 @@ static inline int skb_tailroom(const struct sk_buff *skb) static inline void skb_reserve(struct sk_buff *skb, unsigned int len) { - skb->data+=len; - skb->tail+=len; -} - -extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); - -static inline void __skb_trim(struct sk_buff *skb, unsigned int len) -{ - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data+len; - } else { - ___pskb_trim(skb, len, 0); - } -} - -/** - * skb_trim - remove end from a buffer - * @skb: buffer to alter - * @len: new length - * - * Cut the length of a buffer down by removing data from the tail. If - * the buffer is already under the length specified it is not modified. - */ - -static inline void skb_trim(struct sk_buff *skb, unsigned int len) -{ - if (skb->len > len) { - __skb_trim(skb, len); - } -} - - -static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) -{ - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data+len; - return 0; - } else { - return ___pskb_trim(skb, len, 1); - } -} - -static inline int pskb_trim(struct sk_buff *skb, unsigned int len) -{ - if (len < skb->len) - return __pskb_trim(skb, len); - return 0; -} - -/** - * skb_orphan - orphan a buffer - * @skb: buffer to orphan - * - * If a buffer currently has an owner then we call the owner's - * destructor function and make the @skb unowned. The buffer continues - * to exist but is no longer charged to its former owner. - */ - - -static inline void skb_orphan(struct sk_buff *skb) -{ - if (skb->destructor) - skb->destructor(skb); - skb->destructor = NULL; -} - -/** - * skb_purge - empty a list - * @list: list to empty - * - * Delete all buffers on an &sk_buff list. Each buffer is removed from - * the list and one reference dropped. This function takes the list - * lock and is atomic with respect to other list locking functions. - */ - - -static inline void skb_queue_purge(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb=skb_dequeue(list))!=NULL) - kfree_skb(skb); -} - -/** - * __skb_purge - empty a list - * @list: list to empty - * - * Delete all buffers on an &sk_buff list. Each buffer is removed from - * the list and one reference dropped. This function does not take the - * list lock and the caller must hold the relevant locks to use it. - */ - - -static inline void __skb_queue_purge(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb=__skb_dequeue(list))!=NULL) - kfree_skb(skb); + skb->data+=len; + skb->tail+=len; } /** @@ -1006,13 +393,11 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) static inline struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask) { - struct sk_buff *skb; - - //skb = alloc_skb(length+16, gfp_mask); - skb = alloc_zc_skb(length+16, gfp_mask); - if (skb) - skb_reserve(skb,16); - return skb; + struct sk_buff *skb; + skb = alloc_zc_skb(length+16, gfp_mask); + if (skb) + skb_reserve(skb,16); + return skb; } /** @@ -1030,84 +415,20 @@ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, static inline struct sk_buff *dev_alloc_skb(unsigned int length) { - return __dev_alloc_skb(length, GFP_ATOMIC); -} - -/** - * skb_cow - copy header of skb when it is required - * @skb: buffer to cow - * @headroom: needed headroom - * - * If the skb passed lacks sufficient headroom or its data part - * is shared, data is reallocated. If reallocation fails, an error - * is returned and original skb is not changed. - * - * The result is skb with writable area skb->head...skb->tail - * and at least @headroom of space at head. - */ - -static inline int -skb_cow(struct sk_buff *skb, unsigned int headroom) -{ - int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); - - if (delta < 0) - delta = 0; - - if (delta || skb_cloned(skb)) - return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC); - return 0; + return __dev_alloc_skb(length, GFP_ATOMIC); } -/** - * skb_linearize - convert paged skb to linear one - * @skb: buffer to linarize - * @gfp: allocation mode - * - * If there is no free memory -ENOMEM is returned, otherwise zero - * is returned and the old skb data released. */ -int skb_linearize(struct sk_buff *skb, int gfp); - static inline void *kmap_skb_frag(const skb_frag_t *frag) { - return page_address(frag->page); + return page_address(frag->page); } static inline void kunmap_skb_frag(void *vaddr) { } -#define skb_queue_walk(queue, skb) \ - for (skb = (queue)->next; \ - (skb != (struct sk_buff *)(queue)); \ - skb=skb->next) - - -extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size); -extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump); - -extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); -extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); -extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum); -extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); - +extern int skb_copy_bits(const struct sk_buff *skb, + int offset, void *to, int len); extern void skb_init(void); -extern void skb_add_mtu(int mtu); - -#ifdef CONFIG_NETFILTER -static inline void -nf_conntrack_put(struct nf_ct_info *nfct) -{ - if (nfct && atomic_dec_and_test(&nfct->master->use)) - nfct->master->destroy(nfct->master); -} -static inline void -nf_conntrack_get(struct nf_ct_info *nfct) -{ - if (nfct) - atomic_inc(&nfct->master->use); -} -#endif -#endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/xen-2.4.16/include/xeno/vif.h b/xen-2.4.16/include/xeno/vif.h index 9bf5e7686e..46cfbd4bb8 100644 --- a/xen-2.4.16/include/xeno/vif.h +++ b/xen-2.4.16/include/xeno/vif.h @@ -26,16 +26,17 @@ */ typedef struct rx_shadow_entry_st { - unsigned long addr; - unsigned long size; - int status; - unsigned long flush_count; + unsigned long addr; + unsigned short size; + unsigned short status; + unsigned long flush_count; } rx_shadow_entry_t; typedef struct tx_shadow_entry_st { - unsigned long addr; - unsigned long size; - int status; + void *header; + unsigned long payload; + unsigned short size; + unsigned short status; } tx_shadow_entry_t; typedef struct net_shadow_ring_st { @@ -46,12 +47,11 @@ typedef struct net_shadow_ring_st { } net_shadow_ring_t; typedef struct net_vif_st { - net_ring_t *net_ring; - net_shadow_ring_t *shadow_ring; + net_ring_t *net_ring; + net_shadow_ring_t *shadow_ring; int id; - struct sk_buff_head skb_list; - unsigned int domain; - // rules table goes here in next revision. + struct task_struct *domain; + struct list_head list; } net_vif_t; /* VIF-related defines. */ diff --git a/xen-2.4.16/net/dev.c b/xen-2.4.16/net/dev.c index a01ad51fa3..df637ca688 100644 --- a/xen-2.4.16/net/dev.c +++ b/xen-2.4.16/net/dev.c @@ -37,7 +37,6 @@ #define rtmsg_ifinfo(_a,_b,_c) ((void)0) #define rtnl_lock() ((void)0) #define rtnl_unlock() ((void)0) -#define dst_init() ((void)0) #if 0 #define DPRINTK(_f, _a...) printk(_f , ## _a) @@ -53,11 +52,12 @@ struct net_device *the_dev = NULL; /* - * Device drivers call our routines to queue packets here. We empty the - * queue in the local softnet handler. + * Transmitted packets are fragmented, so we can copy the important headesr + * before checking them for validity. Avoids need for page protection. */ -struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned; - +/* Ethernet + IP headers */ +#define PKT_PROT_LEN (ETH_HLEN + 20) +static kmem_cache_t *net_header_cachep; /** * __dev_get_by_name - find a device by its name @@ -105,14 +105,6 @@ struct net_device *dev_get_by_name(const char *name) return dev; } -/* - Return value is changed to int to prevent illegal usage in future. - It is still legal to use to check for device existance. - - User should understand, that the result returned by this function - is meaningless, if it was not issued under rtnl semaphore. - */ - /** * dev_get - test if a device exists * @name: name to test for @@ -483,142 +475,13 @@ illegal_highdma(struct net_device *dev, struct sk_buff *skb) #define illegal_highdma(dev, skb) (0) #endif -/* - * dev_queue_xmit - transmit a buffer - * @skb: buffer to transmit - * - * Queue a buffer for transmission to a network device. The caller must - * have set the device and priority and built the buffer before calling this - * function. The function can be called from an interrupt. - * - * A negative errno code is returned on a failure. A success does not - * guarantee the frame will be transmitted as it may be dropped due - * to congestion or traffic shaping. - */ - -int dev_queue_xmit(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - - if (!(dev->features&NETIF_F_SG)) - { - printk("NIC doesn't do scatter-gather!\n"); - BUG(); - } - - if (skb_shinfo(skb)->frag_list && - !(dev->features&NETIF_F_FRAGLIST) && - skb_linearize(skb, GFP_ATOMIC) != 0) { - kfree_skb(skb); - return -ENOMEM; - } - - spin_lock_bh(&dev->queue_lock); - if (dev->flags&IFF_UP) { - int cpu = smp_processor_id(); - - if (dev->xmit_lock_owner != cpu) { - spin_unlock(&dev->queue_lock); - spin_lock(&dev->xmit_lock); - dev->xmit_lock_owner = cpu; - - if (!netif_queue_stopped(dev)) { - if (dev->hard_start_xmit(skb, dev) == 0) { - dev->xmit_lock_owner = -1; - spin_unlock_bh(&dev->xmit_lock); - return 0; - } - } - dev->xmit_lock_owner = -1; - spin_unlock_bh(&dev->xmit_lock); - kfree_skb(skb); - return -ENETDOWN; - } - } - spin_unlock_bh(&dev->queue_lock); - - kfree_skb(skb); - return -ENETDOWN; -} - /*======================================================================= Receiver routines =======================================================================*/ -int netdev_max_backlog = 300; -/* These numbers are selected based on intuition and some - * experimentatiom, if you have more scientific way of doing this - * please go ahead and fix things. - */ -int no_cong_thresh = 10; -int no_cong = 20; -int lo_cong = 100; -int mod_cong = 290; - struct netif_rx_stats netdev_rx_stat[NR_CPUS]; - -#ifdef CONFIG_NET_HW_FLOWCONTROL -atomic_t netdev_dropping = ATOMIC_INIT(0); -static unsigned long netdev_fc_mask = 1; -unsigned long netdev_fc_xoff = 0; -spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED; - -static struct -{ - void (*stimul)(struct net_device *); - struct net_device *dev; -} netdev_fc_slots[BITS_PER_LONG]; - -int netdev_register_fc(struct net_device *dev, - void (*stimul)(struct net_device *dev)) -{ - int bit = 0; - unsigned long flags; - - spin_lock_irqsave(&netdev_fc_lock, flags); - if (netdev_fc_mask != ~0UL) { - bit = ffz(netdev_fc_mask); - netdev_fc_slots[bit].stimul = stimul; - netdev_fc_slots[bit].dev = dev; - set_bit(bit, &netdev_fc_mask); - clear_bit(bit, &netdev_fc_xoff); - } - spin_unlock_irqrestore(&netdev_fc_lock, flags); - return bit; -} - -void netdev_unregister_fc(int bit) -{ - unsigned long flags; - - spin_lock_irqsave(&netdev_fc_lock, flags); - if (bit > 0) { - netdev_fc_slots[bit].stimul = NULL; - netdev_fc_slots[bit].dev = NULL; - clear_bit(bit, &netdev_fc_mask); - clear_bit(bit, &netdev_fc_xoff); - } - spin_unlock_irqrestore(&netdev_fc_lock, flags); -} - -static void netdev_wakeup(void) -{ - unsigned long xoff; - - spin_lock(&netdev_fc_lock); - xoff = netdev_fc_xoff; - netdev_fc_xoff = 0; - while (xoff) { - int i = ffz(~xoff); - xoff &= ~(1<<i); - netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev); - } - spin_unlock(&netdev_fc_lock); -} -#endif - void deliver_packet(struct sk_buff *skb, net_vif_t *vif) { net_shadow_ring_t *shadow_ring; @@ -677,9 +540,6 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif) shadow_ring->rx_cons = RX_RING_INC(i); } -/* Deliver skb to an old protocol, which is not threaded well - or which do not understand shared skbs. - */ /** * netif_rx - post buffer to the network code * @skb: buffer to post @@ -691,12 +551,7 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif) * * return values: * NET_RX_SUCCESS (no congestion) - * NET_RX_CN_LOW (low congestion) - * NET_RX_CN_MOD (moderate congestion) - * NET_RX_CN_HIGH (high congestion) * NET_RX_DROP (packet was dropped) - * - * */ int netif_rx(struct sk_buff *skb) @@ -707,7 +562,6 @@ int netif_rx(struct sk_buff *skb) struct task_struct *p; int this_cpu = smp_processor_id(); - struct softnet_data *queue; unsigned long flags; net_vif_t *vif; @@ -733,8 +587,6 @@ int netif_rx(struct sk_buff *skb) skb->data += ETH_HLEN; skb->nh.raw = skb->data; - queue = &softnet_data[this_cpu]; - netdev_rx_stat[this_cpu].total++; if ( skb->src_vif == VIF_UNKNOWN_INTERFACE ) @@ -762,8 +614,7 @@ int netif_rx(struct sk_buff *skb) read_lock(&tasklist_lock); p = &idle0_task; do { - if ( p->domain != vif->domain ) continue; - if ( vif->skb_list.qlen > 100 ) break; + if ( p != vif->domain ) continue; deliver_packet(skb, vif); cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX); read_unlock(&tasklist_lock); @@ -787,124 +638,146 @@ int netif_rx(struct sk_buff *skb) kfree_skb(skb); hyp_event_notify(cpu_mask); local_irq_restore(flags); - return 0; + return NET_RX_SUCCESS; } -static int deliver_to_old_ones(struct packet_type *pt, - struct sk_buff *skb, int last) -{ - static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED; - int ret = NET_RX_DROP; +/************************************************************* + * NEW TRANSMIT SCHEDULER + */ +struct list_head net_schedule_list; +spinlock_t net_schedule_list_lock; - if (!last) { - skb = skb_clone(skb, GFP_ATOMIC); - if (skb == NULL) - return ret; +static int __on_net_schedule_list(net_vif_t *vif) +{ + return vif->list.next != NULL; +} + +static void remove_from_net_schedule_list(net_vif_t *vif) +{ + unsigned long flags; + if ( !__on_net_schedule_list(vif) ) return; + spin_lock_irqsave(&net_schedule_list_lock, flags); + if ( __on_net_schedule_list(vif) ) + { + list_del(&vif->list); + vif->list.next = NULL; } - if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) { - kfree_skb(skb); - return ret; + spin_unlock_irqrestore(&net_schedule_list_lock, flags); +} + +static void add_to_net_schedule_list_tail(net_vif_t *vif) +{ + unsigned long flags; + if ( __on_net_schedule_list(vif) ) return; + spin_lock_irqsave(&net_schedule_list_lock, flags); + if ( !__on_net_schedule_list(vif) ) + { + list_add_tail(&vif->list, &net_schedule_list); } + spin_unlock_irqrestore(&net_schedule_list_lock, flags); +} - /* The assumption (correct one) is that old protocols - did not depened on BHs different of NET_BH and TIMER_BH. - */ - /* Emulate NET_BH with special spinlock */ - spin_lock(&net_bh_lock); +/* Destructor function for tx skbs. */ +static void tx_skb_release(struct sk_buff *skb) +{ + int i; + net_ring_t *ring; + + for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ ) + put_page_tot(skb_shinfo(skb)->frags[i].page); - /* Disable timers and wait for all timers completion */ - tasklet_disable(bh_task_vec+TIMER_BH); + if ( skb->skb_type == SKB_NODATA ) + kmem_cache_free(net_header_cachep, skb->head); - ret = pt->func(skb, skb->dev, pt); + skb_shinfo(skb)->nr_frags = 0; - tasklet_hi_enable(bh_task_vec+TIMER_BH); - spin_unlock(&net_bh_lock); - return ret; + /* + * XXX This assumes that, per vif, SKBs are processed in-order! + * Also assumes no concurrency. This is safe because each vif + * maps to one NIC. This is executed in NIC interrupt code, so we have + * mutual exclusion from do_IRQ(). + */ + ring = sys_vif_list[skb->src_vif]->net_ring; + ring->tx_cons = TX_RING_INC(ring->tx_cons); + + if ( ring->tx_cons == ring->tx_event ) + set_bit(_EVENT_NET_TX, + &sys_vif_list[skb->src_vif]->domain->shared_info->events); } + static void net_tx_action(unsigned long unused) { - int cpu = smp_processor_id(); - - if (softnet_data[cpu].completion_queue) { - struct sk_buff *clist; - - local_irq_disable(); - clist = softnet_data[cpu].completion_queue; - softnet_data[cpu].completion_queue = NULL; - local_irq_enable(); - - while (clist != NULL) { - struct sk_buff *skb = clist; - clist = clist->next; + struct net_device *dev = the_dev; + struct list_head *ent; + struct sk_buff *skb; + net_vif_t *vif; + tx_shadow_entry_t *tx; + int pending_bytes = 0, pending_bytes_max = 1; - BUG_TRAP(atomic_read(&skb->users) == 0); - __kfree_skb(skb); + spin_lock(&dev->xmit_lock); + while ( !netif_queue_stopped(dev) && + (pending_bytes < pending_bytes_max) && + !list_empty(&net_schedule_list) ) + { + /* Get a vif from the list with work to do. */ + ent = net_schedule_list.next; + vif = list_entry(ent, net_vif_t, list); + remove_from_net_schedule_list(vif); + if ( vif->shadow_ring->tx_idx == vif->shadow_ring->tx_prod ) + continue; + + /* Check the chosen entry is good. */ + tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx]; + if ( tx->status != RING_STATUS_OK ) goto skip_desc; + + if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL ) + { + add_to_net_schedule_list_tail(vif); + printk("Out of memory in net_tx_action()!\n"); + goto out; } - } - - if (softnet_data[cpu].output_queue) { - struct net_device *head; - - local_irq_disable(); - head = softnet_data[cpu].output_queue; - softnet_data[cpu].output_queue = NULL; - local_irq_enable(); - - while (head != NULL) { - struct net_device *dev = head; - head = head->next_sched; + + skb->destructor = tx_skb_release; + + skb->head = skb->data = tx->header; + skb->end = skb->tail = skb->head + PKT_PROT_LEN; + + skb->dev = the_dev; + skb->src_vif = vif->id; + skb->dst_vif = VIF_PHYSICAL_INTERFACE; + skb->mac.raw = skb->data; + + skb_shinfo(skb)->frags[0].page = frame_table + + (tx->payload >> PAGE_SHIFT); + skb_shinfo(skb)->frags[0].size = tx->size - PKT_PROT_LEN; + skb_shinfo(skb)->frags[0].page_offset = tx->payload & ~PAGE_MASK; + skb_shinfo(skb)->nr_frags = 1; - smp_mb__before_clear_bit(); - clear_bit(__LINK_STATE_SCHED, &dev->state); + skb->data_len = tx->size - PKT_PROT_LEN; + skb->len = tx->size; - if (spin_trylock(&dev->queue_lock)) { - /*qdisc_run(dev); XXX KAF */ - spin_unlock(&dev->queue_lock); - } else { - netif_schedule(dev); - } + /* Transmit should always work, or the queue would be stopped. */ + if ( dev->hard_start_xmit(skb, dev) != 0 ) + { + add_to_net_schedule_list_tail(vif); + printk("Weird failure in hard_start_xmit!\n"); + goto out; } - } -} -DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0); -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) -void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL; -#endif - -static __inline__ int handle_bridge(struct sk_buff *skb, - struct packet_type *pt_prev) -{ - int ret = NET_RX_DROP; - - if (pt_prev) { - if (!pt_prev->data) - ret = deliver_to_old_ones(pt_prev, skb, 0); - else { - atomic_inc(&skb->users); - ret = pt_prev->func(skb, skb->dev, pt_prev); - } + skip_desc: + vif->shadow_ring->tx_idx = TX_RING_INC(vif->shadow_ring->tx_idx); + if ( vif->shadow_ring->tx_idx != vif->shadow_ring->tx_prod ) + add_to_net_schedule_list_tail(vif); } - -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) - br_handle_frame_hook(skb); -#endif - return ret; + out: + spin_unlock(&dev->xmit_lock); } - -#ifdef CONFIG_NET_DIVERT -static inline void handle_diverter(struct sk_buff *skb) -{ - /* if diversion is supported on device, then divert */ - if (skb->dev->divert && skb->dev->divert->divert) - divert_frame(skb); -} -#endif /* CONFIG_NET_DIVERT */ +DECLARE_TASKLET_DISABLED(net_tx_tasklet, net_tx_action, 0); /* @@ -1809,22 +1682,20 @@ extern void dv_init(void); int __init net_dev_init(void) { struct net_device *dev, **dp; - int i; if ( !dev_boot_phase ) return 0; - /* KAF: was sone in socket_init, but that top-half stuff is gone. */ skb_init(); - /* Initialise the packet receive queues. */ - for ( i = 0; i < NR_CPUS; i++ ) - { - struct softnet_data *queue; - queue = &softnet_data[i]; - queue->completion_queue = NULL; - } - + net_header_cachep = kmem_cache_create( + "net_header_cache", + (PKT_PROT_LEN + sizeof(void *) - 1) & ~(sizeof(void *) - 1), + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + + spin_lock_init(&net_schedule_list_lock); + INIT_LIST_HEAD(&net_schedule_list); + /* * Add the devices. * If the call to dev->init fails, the dev is removed @@ -1887,13 +1758,8 @@ int __init net_dev_init(void) dev_boot_phase = 0; - dst_init(); dev_mcast_init(); -#ifdef CONFIG_NET_SCHED - pktsched_init(); -#endif - /* * Initialise network devices */ @@ -1920,36 +1786,6 @@ inline int init_tx_header(u8 *data, unsigned int len, struct net_device *dev) } -/* - * tx_skb_release - * - * skb destructor function that is attached to zero-copy tx skbs before - * they are passed to the device driver for transmission. The destructor - * is responsible for unlinking the fragment pointer to the skb data that - * is in guest memory, and decrementing the tot_count on the packet pages - * pfn_info. - */ - -void tx_skb_release(struct sk_buff *skb) -{ - int i; - - for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ ) - skb_shinfo(skb)->frags[i].page->tot_count--; - - skb_shinfo(skb)->nr_frags = 0; - - /* - * XXX This assumes that, per vif, SKBs are processed in-order! - * Also, like lots of code in here -- we assume direct access to the - * consumer and producer indexes. This is likely safe for the - * forseeable future. - */ - sys_vif_list[skb->src_vif]->net_ring->tx_cons = - TX_RING_INC(sys_vif_list[skb->src_vif]->net_ring->tx_cons); -} - - /* * do_net_update: * @@ -1957,12 +1793,8 @@ void tx_skb_release(struct sk_buff *skb) * descriptor rings. */ -/* Ethernet + IP headers */ -#define PKT_PROT_LEN (ETH_HLEN + 20) - long do_net_update(void) { - shared_info_t *shared = current->shared_info; net_ring_t *net_ring; net_shadow_ring_t *shadow_ring; net_vif_t *current_vif; @@ -1988,16 +1820,20 @@ long do_net_update(void) * PHASE 1 -- TRANSMIT RING */ - for ( i = shadow_ring->tx_cons; + for ( i = shadow_ring->tx_prod; i != net_ring->tx_prod; i = TX_RING_INC(i) ) { if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) ) { DPRINTK("Bad copy_from_user for tx net descriptor\n"); + shadow_ring->tx_ring[i].status = RING_STATUS_ERR_CFU; continue; } + shadow_ring->tx_ring[i].size = tx.size; + shadow_ring->tx_ring[i].status = RING_STATUS_BAD_PAGE; + if ( tx.size < PKT_PROT_LEN ) { DPRINTK("Runt packet %ld\n", tx.size); @@ -2010,41 +1846,35 @@ long do_net_update(void) tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size); continue; } - - if ( TX_RING_INC(i) == net_ring->tx_event ) + + pfn = tx.addr >> PAGE_SHIFT; + page = frame_table + pfn; + if ( (pfn >= max_page) || + ((page->flags & PG_domain_mask) != current->domain) ) { - set_bit(_EVENT_NET_TX, &shared->events); + DPRINTK("Bad page frame\n"); + continue; } - - /* - * Map the skb in from the guest, and get it's delivery target. - * We need this to know whether the packet is to be sent locally - * or remotely. - */ g_data = map_domain_mem(tx.addr); protocol = __constant_htons( init_tx_header(g_data, tx.size, the_dev)); if ( protocol == 0 ) - { - unmap_domain_mem(g_data); - continue; - } + goto unmap_and_continue; target = __net_get_target_vif(g_data, tx.size, current_vif->id); - if (target > VIF_PHYSICAL_INTERFACE ) + if ( target > VIF_PHYSICAL_INTERFACE ) { /* Local delivery */ - skb = dev_alloc_skb(tx.size); - - if (skb == NULL) - { - unmap_domain_mem(g_data); - continue; - } + if ( (skb = dev_alloc_skb(tx.size)) == NULL ) + goto unmap_and_continue; + skb->destructor = tx_skb_release; + + shadow_ring->tx_ring[i].status = RING_STATUS_OK; + skb->src_vif = current_vif->id; skb->dst_vif = target; skb->protocol = protocol; @@ -2058,52 +1888,26 @@ long do_net_update(void) unmap_domain_mem(skb->head); skb->data += ETH_HLEN; (void)netif_rx(skb); - unmap_domain_mem(g_data); } else if ( target == VIF_PHYSICAL_INTERFACE ) { - /* - * External delivery: create a fragmented SKB, consisting of a - * small copied section for the header, then a reference to the - * in-place payload. - */ - skb = alloc_skb(PKT_PROT_LEN, GFP_KERNEL); - if (skb == NULL) - continue; - - skb_put(skb, PKT_PROT_LEN); - memcpy(skb->data, g_data, PKT_PROT_LEN); - unmap_domain_mem(g_data); - - skb->dev = the_dev; - skb->src_vif = current_vif->id; - skb->dst_vif = target; - skb->protocol = protocol; - skb->mac.raw=skb->data; - - /* One more reference to guest page for duration of transfer */ - page = (tx.addr >> PAGE_SHIFT) + frame_table; - page->tot_count++; - - /* We have special destructor to deal with guest frag. */ - skb->destructor = &tx_skb_release; - - skb_shinfo(skb)->frags[0].page = page; - skb_shinfo(skb)->frags[0].size = tx.size - PKT_PROT_LEN; - skb_shinfo(skb)->frags[0].page_offset - = (tx.addr & ~PAGE_MASK) + PKT_PROT_LEN; - skb_shinfo(skb)->nr_frags = 1; - skb->data_len = tx.size - skb->len; - skb->len = tx.size; - - dev_queue_xmit(skb); - } - else - { - unmap_domain_mem(g_data); + shadow_ring->tx_ring[i].header = + kmem_cache_alloc(net_header_cachep, GFP_KERNEL); + if ( shadow_ring->tx_ring[i].header == NULL ) + goto unmap_and_continue; + memcpy(shadow_ring->tx_ring[i].header, g_data, PKT_PROT_LEN); + shadow_ring->tx_ring[i].payload = tx.addr + PKT_PROT_LEN; + shadow_ring->tx_ring[i].status = RING_STATUS_OK; + get_page_tot(page); } + + unmap_and_continue: + unmap_domain_mem(g_data); } - shadow_ring->tx_cons = i; + smp_wmb(); /* Let other CPUs see new descriptors first. */ + shadow_ring->tx_prod = i; + add_to_net_schedule_list_tail(current_vif); + tasklet_schedule(&net_tx_tasklet); /* XXX */ /* * PHASE 2 -- RECEIVE RING @@ -2131,9 +1935,10 @@ long do_net_update(void) shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE; - if ( page->flags != (PGT_l1_page_table | current->domain) ) + if ( (pfn >= max_page) || + (page->flags != (PGT_l1_page_table | current->domain)) ) { - DPRINTK("Bad page flags\n"); + DPRINTK("Bad page frame containing ppte\n"); continue; } @@ -2175,7 +1980,7 @@ int setup_network_devices(void) int ret; extern char opt_ifname[]; struct net_device *dev = dev_get_by_name(opt_ifname); - + if ( dev == NULL ) { printk("Could not find device %s\n", opt_ifname); @@ -2191,6 +1996,8 @@ int setup_network_devices(void) printk("Device %s opened and ready for use.\n", opt_ifname); the_dev = dev; + tasklet_enable(&net_tx_tasklet); + return 1; } diff --git a/xen-2.4.16/net/devinit.c b/xen-2.4.16/net/devinit.c index 7770a0e7a4..f3ce2c39d4 100644 --- a/xen-2.4.16/net/devinit.c +++ b/xen-2.4.16/net/devinit.c @@ -97,11 +97,6 @@ void dev_activate(struct net_device *dev) void dev_deactivate(struct net_device *dev) { dev_watchdog_down(dev); - - while (test_bit(__LINK_STATE_SCHED, &dev->state)) { - current->policy |= SCHED_YIELD; - schedule(); - } } void dev_init_scheduler(struct net_device *dev) diff --git a/xen-2.4.16/net/skbuff.c b/xen-2.4.16/net/skbuff.c index 0a4f2639e8..695a6f6b63 100644 --- a/xen-2.4.16/net/skbuff.c +++ b/xen-2.4.16/net/skbuff.c @@ -32,10 +32,6 @@ * 2 of the License, or (at your option) any later version. */ -/* - * The functions in this file will not compile correctly with gcc 2.4.x - */ - #include <linux/config.h> #include <linux/lib.h> #include <linux/errno.h> @@ -55,16 +51,13 @@ #define BUG_TRAP ASSERT -#define put_page(_p) ((void)0) /* XXXX KAF */ -#define get_page(_p) ((void)0) - int sysctl_hot_list_len = 128; static kmem_cache_t *skbuff_head_cache; static union { - struct sk_buff_head list; - char pad[SMP_CACHE_BYTES]; + struct sk_buff_head list; + char pad[SMP_CACHE_BYTES]; } skb_head_pool[NR_CPUS]; /* @@ -84,9 +77,9 @@ static union { void skb_over_panic(struct sk_buff *skb, int sz, void *here) { - printk("skput:over: %p:%d put:%d dev:%s", - here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); - BUG(); + printk("skput:over: %p:%d put:%d dev:%s", + here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); + BUG(); } /** @@ -101,148 +94,90 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here) void skb_under_panic(struct sk_buff *skb, int sz, void *here) { - printk("skput:under: %p:%d put:%d dev:%s", - here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); - BUG(); + printk("skput:under: %p:%d put:%d dev:%s", + here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); + BUG(); } static __inline__ struct sk_buff *skb_head_from_pool(void) { - struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; - - if (skb_queue_len(list)) { - struct sk_buff *skb; - unsigned long flags; - - local_irq_save(flags); - skb = __skb_dequeue(list); - local_irq_restore(flags); - return skb; - } - return NULL; + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; + + if (skb_queue_len(list)) { + struct sk_buff *skb; + unsigned long flags; + + local_irq_save(flags); + skb = __skb_dequeue(list); + local_irq_restore(flags); + return skb; + } + return NULL; } static __inline__ void skb_head_to_pool(struct sk_buff *skb) { - struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; - if (skb_queue_len(list) < sysctl_hot_list_len) { - unsigned long flags; + if (skb_queue_len(list) < sysctl_hot_list_len) { + unsigned long flags; - local_irq_save(flags); - __skb_queue_head(list, skb); - local_irq_restore(flags); + local_irq_save(flags); + __skb_queue_head(list, skb); + local_irq_restore(flags); - return; - } - kmem_cache_free(skbuff_head_cache, skb); + return; + } + kmem_cache_free(skbuff_head_cache, skb); } static inline u8 *alloc_skb_data_page(struct sk_buff *skb) { - struct list_head *list_ptr; - struct pfn_info *pf; - unsigned long flags; + struct list_head *list_ptr; + struct pfn_info *pf; + unsigned long flags; - spin_lock_irqsave(&free_list_lock, flags); + spin_lock_irqsave(&free_list_lock, flags); - if (!free_pfns) return NULL; + if (!free_pfns) return NULL; - list_ptr = free_list.next; - pf = list_entry(list_ptr, struct pfn_info, list); - pf->flags = 0; /* owned by dom0 */ - list_del(&pf->list); - free_pfns--; + list_ptr = free_list.next; + pf = list_entry(list_ptr, struct pfn_info, list); + pf->flags = 0; /* owned by dom0 */ + list_del(&pf->list); + free_pfns--; - spin_unlock_irqrestore(&free_list_lock, flags); + spin_unlock_irqrestore(&free_list_lock, flags); - skb->pf = pf; - return (u8 *)((pf - frame_table) << PAGE_SHIFT); + skb->pf = pf; + return (u8 *)((pf - frame_table) << PAGE_SHIFT); } static inline void dealloc_skb_data_page(struct sk_buff *skb) { - struct pfn_info *pf; - unsigned long flags; + struct pfn_info *pf; + unsigned long flags; - pf = skb->pf; + pf = skb->pf; - spin_lock_irqsave(&free_list_lock, flags); + spin_lock_irqsave(&free_list_lock, flags); - list_add(&pf->list, &free_list); - free_pfns++; + list_add(&pf->list, &free_list); + free_pfns++; - spin_unlock_irqrestore(&free_list_lock, flags); + spin_unlock_irqrestore(&free_list_lock, flags); } -struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask) +static inline void INTERRUPT_CHECK(int gfp_mask) { - struct sk_buff *skb; - u8 *data; - - if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { - static int count = 0; - if (++count < 5) { - printk(KERN_ERR "alloc_skb called nonatomically " - "from interrupt %p\n", NET_CALLER(size)); - BUG(); - } - gfp_mask &= ~__GFP_WAIT; - } - - /* Get the HEAD */ - skb = skb_head_from_pool(); - if (skb == NULL) { - skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); - if (skb == NULL) - goto nohead; - } - - /* Get the DATA. Size must match skb_add_mtu(). */ - size = SKB_DATA_ALIGN(size); - data = alloc_skb_data_page(skb); - - if (data == NULL) - goto nodata; - - /* A FAKE virtual address, so that pci_map_xxx dor the right thing. */ - data = phys_to_virt((unsigned long)data); - - /* Load the data pointers. */ - skb->head = data; - skb->data = data; - skb->tail = data; - skb->end = data + size; - - /* Set up other state */ - skb->len = 0; - skb->cloned = 0; - skb->data_len = 0; - skb->src_vif = VIF_UNKNOWN_INTERFACE; - skb->dst_vif = VIF_UNKNOWN_INTERFACE; - skb->skb_type = SKB_ZERO_COPY; - - atomic_set(&skb->users, 1); - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - - return skb; - -nodata: - skb_head_to_pool(skb); -nohead: - return NULL; + if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { + printk(KERN_ERR "alloc_skb called nonatomically\n"); + BUG(); + } } -/* Allocate a new skbuff. We do this ourselves so we can fill in a few - * 'private' fields and also do memory statistics to find all the - * [BEEP] leaks. - * - */ - /** * alloc_skb - allocate a network buffer * @size: size to allocate @@ -258,136 +193,150 @@ nohead: struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) { - struct sk_buff *skb; - u8 *data; - - if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { - static int count = 0; - if (++count < 5) { - printk(KERN_ERR "alloc_skb called nonatomically " - "from interrupt %p\n", NET_CALLER(size)); - BUG(); - } - gfp_mask &= ~__GFP_WAIT; - } - - /* Get the HEAD */ - skb = skb_head_from_pool(); - if (skb == NULL) { - skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); - if (skb == NULL) - goto nohead; - } - - /* Get the DATA. Size must match skb_add_mtu(). */ - size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (data == NULL) - goto nodata; - - /* Load the data pointers. */ - skb->head = data; - skb->data = data; - skb->tail = data; - skb->end = data + size; - - /* Set up other state */ - skb->len = 0; - skb->cloned = 0; - skb->data_len = 0; - skb->src_vif = VIF_UNKNOWN_INTERFACE; - skb->dst_vif = VIF_UNKNOWN_INTERFACE; - skb->skb_type = SKB_NORMAL; - - atomic_set(&skb->users, 1); - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - return skb; - -nodata: - skb_head_to_pool(skb); -nohead: - return NULL; + struct sk_buff *skb; + u8 *data; + + INTERRUPT_CHECK(gfp_mask); + + /* Get the HEAD */ + skb = skb_head_from_pool(); + if (skb == NULL) { + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); + if (skb == NULL) + goto nohead; + } + + /* Get the DATA. Size must match skb_add_mtu(). */ + size = SKB_DATA_ALIGN(size); + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); + if (data == NULL) + goto nodata; + + /* Load the data pointers. */ + skb->head = data; + skb->data = data; + skb->tail = data; + skb->end = data + size; + + /* Set up other state */ + skb->len = 0; + skb->data_len = 0; + skb->src_vif = VIF_UNKNOWN_INTERFACE; + skb->dst_vif = VIF_UNKNOWN_INTERFACE; + skb->skb_type = SKB_NORMAL; + + skb_shinfo(skb)->nr_frags = 0; + return skb; + + nodata: + skb_head_to_pool(skb); + nohead: + return NULL; } -/* - * Slab constructor for a skb head. - */ -static inline void skb_headerinit(void *p, kmem_cache_t *cache, - unsigned long flags) +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask) { - struct sk_buff *skb = p; - - skb->next = NULL; - skb->prev = NULL; - skb->list = NULL; - skb->dev = NULL; - skb->pkt_type = PACKET_HOST; /* Default type */ - skb->ip_summed = 0; - skb->destructor = NULL; - -#ifdef CONFIG_NETFILTER - skb->nfmark = skb->nfcache = 0; - skb->nfct = NULL; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif -#endif -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -#endif + struct sk_buff *skb; + u8 *data; + + INTERRUPT_CHECK(gfp_mask); + + /* Get the HEAD */ + skb = skb_head_from_pool(); + if (skb == NULL) { + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); + if (skb == NULL) + goto nohead; + } + + /* Get the DATA. Size must match skb_add_mtu(). */ + size = SKB_DATA_ALIGN(size); + data = alloc_skb_data_page(skb); + + if (data == NULL) + goto nodata; + + /* A FAKE virtual address, so that pci_map_xxx dor the right thing. */ + data = phys_to_virt((unsigned long)data); + + /* Load the data pointers. */ + skb->head = data; + skb->data = data; + skb->tail = data; + skb->end = data + size; + + /* Set up other state */ + skb->len = 0; + skb->data_len = 0; + skb->src_vif = VIF_UNKNOWN_INTERFACE; + skb->dst_vif = VIF_UNKNOWN_INTERFACE; + skb->skb_type = SKB_ZERO_COPY; + + skb_shinfo(skb)->nr_frags = 0; + + return skb; + + nodata: + skb_head_to_pool(skb); + nohead: + return NULL; } -static void skb_drop_fraglist(struct sk_buff *skb) + +struct sk_buff *alloc_skb_nodata(int gfp_mask) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; + struct sk_buff *skb; - skb_shinfo(skb)->frag_list = NULL; + INTERRUPT_CHECK(gfp_mask); - do { - struct sk_buff *this = list; - list = list->next; - kfree_skb(this); - } while (list); + /* Get the HEAD */ + skb = skb_head_from_pool(); + if (skb == NULL) { + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); + if (skb == NULL) + return NULL; + } + + skb->skb_type = SKB_NODATA; + return skb; } -static void skb_clone_fraglist(struct sk_buff *skb) -{ - struct sk_buff *list; - for (list = skb_shinfo(skb)->frag_list; list; list=list->next) - skb_get(list); +/* + * Slab constructor for a skb head. + */ +static inline void skb_headerinit(void *p, kmem_cache_t *cache, + unsigned long flags) +{ + struct sk_buff *skb = p; + + skb->next = NULL; + skb->prev = NULL; + skb->list = NULL; + skb->dev = NULL; + skb->pkt_type = PACKET_HOST; /* Default type */ + skb->ip_summed = 0; + skb->destructor = NULL; } static void skb_release_data(struct sk_buff *skb) { - - if (!skb->cloned || - atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { - if (skb_shinfo(skb)->nr_frags) { - int i; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); - } - - if (skb_shinfo(skb)->frag_list) - skb_drop_fraglist(skb); - - if (skb->skb_type == SKB_NORMAL) - { - kfree(skb->head); - } - else if (skb->skb_type == SKB_ZERO_COPY) - { - dealloc_skb_data_page(skb); - } - else - { - BUG(); - } - } + if (skb_shinfo(skb)->nr_frags) BUG(); + + switch ( skb->skb_type ) + { + case SKB_NORMAL: + kfree(skb->head); + break; + case SKB_ZERO_COPY: + dealloc_skb_data_page(skb); + break; + case SKB_NODATA: + break; + default: + BUG(); + } } /* @@ -395,8 +344,8 @@ static void skb_release_data(struct sk_buff *skb) */ void kfree_skbmem(struct sk_buff *skb) { - skb_release_data(skb); - skb_head_to_pool(skb); + skb_release_data(skb); + skb_head_to_pool(skb); } /** @@ -410,124 +359,32 @@ void kfree_skbmem(struct sk_buff *skb) void __kfree_skb(struct sk_buff *skb) { - if (skb->list) { - printk(KERN_WARNING "Warning: kfree_skb passed an skb still " - "on a list (from %p).\n", NET_CALLER(skb)); - BUG(); - } - - if(skb->destructor) { - if (in_irq()) { - printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n", - NET_CALLER(skb)); - } - skb->destructor(skb); - } - -#ifdef CONFIG_NETFILTER - nf_conntrack_put(skb->nfct); -#endif - skb_headerinit(skb, NULL, 0); /* clean state */ - kfree_skbmem(skb); -} + if ( skb->list ) + panic(KERN_WARNING "Warning: kfree_skb passed an skb still " + "on a list (from %p).\n", NET_CALLER(skb)); -/** - * skb_clone - duplicate an sk_buff - * @skb: buffer to clone - * @gfp_mask: allocation priority - * - * Duplicate an &sk_buff. The new one is not owned by a socket. Both - * copies share the same packet data but not structure. The new - * buffer has a reference count of 1. If the allocation fails the - * function returns %NULL otherwise the new buffer is returned. - * - * If this function is called from an interrupt gfp_mask() must be - * %GFP_ATOMIC. - */ + if ( skb->destructor ) + skb->destructor(skb); -struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) -{ - struct sk_buff *n; - - n = skb_head_from_pool(); - if (!n) { - n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - if (!n) - return NULL; - } - -#define C(x) n->x = skb->x - - n->next = n->prev = NULL; - n->list = NULL; - C(dev); - C(h); - C(nh); - C(mac); - C(len); - C(data_len); - C(csum); - n->cloned = 1; - C(pkt_type); - C(ip_summed); - atomic_set(&n->users, 1); - C(protocol); - C(head); - C(data); - C(tail); - C(end); - n->destructor = NULL; -#ifdef CONFIG_NETFILTER - C(nfmark); - C(nfcache); - C(nfct); -#ifdef CONFIG_NETFILTER_DEBUG - C(nf_debug); -#endif -#endif /*CONFIG_NETFILTER*/ -#if defined(CONFIG_HIPPI) - C(private); -#endif -#ifdef CONFIG_NET_SCHED - C(tc_index); -#endif - - atomic_inc(&(skb_shinfo(skb)->dataref)); - skb->cloned = 1; -#ifdef CONFIG_NETFILTER - nf_conntrack_get(skb->nfct); -#endif - return n; + skb_headerinit(skb, NULL, 0); /* clean state */ + kfree_skbmem(skb); } static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { - /* - * Shift between the two data areas in bytes - */ - unsigned long offset = new->data - old->data; - - new->list=NULL; - new->dev=old->dev; - new->protocol=old->protocol; - new->h.raw=old->h.raw+offset; - new->nh.raw=old->nh.raw+offset; - new->mac.raw=old->mac.raw+offset; - atomic_set(&new->users, 1); - new->pkt_type=old->pkt_type; - new->destructor = NULL; -#ifdef CONFIG_NETFILTER - new->nfmark=old->nfmark; - new->nfcache=old->nfcache; - new->nfct=old->nfct; - nf_conntrack_get(new->nfct); -#ifdef CONFIG_NETFILTER_DEBUG - new->nf_debug=old->nf_debug; -#endif -#endif -#ifdef CONFIG_NET_SCHED - new->tc_index = old->tc_index; -#endif + /* + * Shift between the two data areas in bytes + */ + unsigned long offset = new->data - old->data; + + new->list=NULL; + new->dev=old->dev; + new->protocol=old->protocol; + new->h.raw=old->h.raw+offset; + new->nh.raw=old->nh.raw+offset; + new->mac.raw=old->mac.raw+offset; + new->pkt_type=old->pkt_type; + new->destructor = NULL; } /** @@ -549,748 +406,96 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) { - struct sk_buff *n; - int headerlen = skb->data-skb->head; - - /* - * Allocate the copy buffer - */ - n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); - if(n==NULL) - return NULL; - - /* Set the data pointer */ - skb_reserve(n,headerlen); - /* Set the tail pointer and length */ - skb_put(n,skb->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; - - if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len)) - BUG(); - - copy_skb_header(n, skb); - - return n; -} - -/* Keep head the same: replace data */ -int skb_linearize(struct sk_buff *skb, int gfp_mask) -{ - unsigned int size; - u8 *data; - long offset; - int headerlen = skb->data - skb->head; - int expand = (skb->tail+skb->data_len) - skb->end; - - if (skb_shared(skb)) - BUG(); - - if (expand <= 0) - expand = 0; - - size = (skb->end - skb->head + expand); - size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (data == NULL) - return -ENOMEM; - - /* Copy entire thing */ - if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len)) - BUG(); - - /* Offset between the two in bytes */ - offset = data - skb->head; - - /* Free old data. */ - skb_release_data(skb); - - skb->head = data; - skb->end = data + size; - - /* Set up new pointers */ - skb->h.raw += offset; - skb->nh.raw += offset; - skb->mac.raw += offset; - skb->tail += offset; - skb->data += offset; - - /* Set up shinfo */ - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - - /* We are no longer a clone, even if we were. */ - skb->cloned = 0; - - skb->tail += skb->data_len; - skb->data_len = 0; - return 0; -} - - -/** - * pskb_copy - create copy of an sk_buff with private head. - * @skb: buffer to copy - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and part of its data, located - * in header. Fragmented data remain shared. This is used when - * the caller wishes to modify only header of &sk_buff and needs - * private copy of the header to alter. Returns %NULL on failure - * or the pointer to the buffer on success. - * The returned buffer has a reference count of 1. - */ - -struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) -{ - struct sk_buff *n; - - /* - * Allocate the copy buffer - */ - n=alloc_skb(skb->end - skb->head, gfp_mask); - if(n==NULL) - return NULL; - - /* Set the data pointer */ - skb_reserve(n,skb->data-skb->head); - /* Set the tail pointer and length */ - skb_put(n,skb_headlen(skb)); - /* Copy the bytes */ - memcpy(n->data, skb->data, n->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; - - n->data_len = skb->data_len; - n->len = skb->len; - - if (skb_shinfo(skb)->nr_frags) { - int i; - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; - get_page(skb_shinfo(n)->frags[i].page); - } - skb_shinfo(n)->nr_frags = i; - } - - if (skb_shinfo(skb)->frag_list) { - skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; - skb_clone_fraglist(n); - } - - copy_skb_header(n, skb); - - return n; -} - -/** - * pskb_expand_head - reallocate header of &sk_buff - * @skb: buffer to reallocate - * @nhead: room to add at head - * @ntail: room to add at tail - * @gfp_mask: allocation priority - * - * Expands (or creates identical copy, if &nhead and &ntail are zero) - * header of skb. &sk_buff itself is not changed. &sk_buff MUST have - * reference count of 1. Returns zero in the case of success or error, - * if expansion failed. In the last case, &sk_buff is not changed. - * - * All the pointers pointing into skb header may change and must be - * reloaded after call to this function. - */ - -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) -{ - int i; - u8 *data; - int size = nhead + (skb->end - skb->head) + ntail; - long off; - - if (skb_shared(skb)) - BUG(); - - size = SKB_DATA_ALIGN(size); - - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (data == NULL) - goto nodata; - - /* Copy only real data... and, alas, header. This should be - * optimized for the cases when header is void. */ - memcpy(data+nhead, skb->head, skb->tail-skb->head); - memcpy(data+size, skb->end, sizeof(struct skb_shared_info)); - - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); - - if (skb_shinfo(skb)->frag_list) - skb_clone_fraglist(skb); - - skb_release_data(skb); - - off = (data+nhead) - skb->head; - - skb->head = data; - skb->end = data+size; - - skb->data += off; - skb->tail += off; - skb->mac.raw += off; - skb->h.raw += off; - skb->nh.raw += off; - skb->cloned = 0; - atomic_set(&skb_shinfo(skb)->dataref, 1); - return 0; - -nodata: - return -ENOMEM; -} - -/* Make private copy of skb with writable head and some headroom */ - -struct sk_buff * -skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) -{ - struct sk_buff *skb2; - int delta = headroom - skb_headroom(skb); - - if (delta <= 0) - return pskb_copy(skb, GFP_ATOMIC); - - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL || - !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) - return skb2; - - kfree_skb(skb2); - return NULL; -} - - -/** - * skb_copy_expand - copy and expand sk_buff - * @skb: buffer to copy - * @newheadroom: new free bytes at head - * @newtailroom: new free bytes at tail - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and its data and while doing so - * allocate additional space. - * - * This is used when the caller wishes to modify the data and needs a - * private copy of the data to alter as well as more space for new fields. - * Returns %NULL on failure or the pointer to the buffer - * on success. The returned buffer has a reference count of 1. - * - * You must pass %GFP_ATOMIC as the allocation priority if this function - * is called from an interrupt. - */ - - -struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, - int newtailroom, - int gfp_mask) -{ - struct sk_buff *n; - - /* - * Allocate the copy buffer - */ - - n=alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask); - if(n==NULL) - return NULL; - - skb_reserve(n,newheadroom); - - /* Set the tail pointer and length */ - skb_put(n,skb->len); - - /* Copy the data only. */ - if (skb_copy_bits(skb, 0, n->data, skb->len)) - BUG(); - - copy_skb_header(n, skb); - return n; -} + struct sk_buff *n; + int headerlen = skb->data-skb->head; + + /* + * Allocate the copy buffer + */ + n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); + if(n==NULL) + return NULL; -/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. - * If realloc==0 and trimming is impossible without change of data, - * it is BUG(). - */ + /* Set the data pointer */ + skb_reserve(n,headerlen); + /* Set the tail pointer and length */ + skb_put(n,skb->len); + n->csum = skb->csum; + n->ip_summed = skb->ip_summed; -int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) -{ - int offset = skb_headlen(skb); - int nfrags = skb_shinfo(skb)->nr_frags; - int i; - - for (i=0; i<nfrags; i++) { - int end = offset + skb_shinfo(skb)->frags[i].size; - if (end > len) { - if (skb_cloned(skb)) { - if (!realloc) - BUG(); - if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return -ENOMEM; - } - if (len <= offset) { - put_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->nr_frags--; - } else { - skb_shinfo(skb)->frags[i].size = len-offset; - } - } - offset = end; - } - - if (offset < len) { - skb->data_len -= skb->len - len; - skb->len = len; - } else { - if (len <= skb_headlen(skb)) { - skb->len = len; - skb->data_len = 0; - skb->tail = skb->data + len; - if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) - skb_drop_fraglist(skb); - } else { - skb->data_len -= skb->len - len; - skb->len = len; - } - } - - return 0; -} + if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len)) + BUG(); -/** - * __pskb_pull_tail - advance tail of skb header - * @skb: buffer to reallocate - * @delta: number of bytes to advance tail - * - * The function makes a sense only on a fragmented &sk_buff, - * it expands header moving its tail forward and copying necessary - * data from fragmented part. - * - * &sk_buff MUST have reference count of 1. - * - * Returns %NULL (and &sk_buff does not change) if pull failed - * or value of new tail of skb in the case of success. - * - * All the pointers pointing into skb header may change and must be - * reloaded after call to this function. - */ + copy_skb_header(n, skb); -/* Moves tail of skb head forward, copying data from fragmented part, - * when it is necessary. - * 1. It may fail due to malloc failure. - * 2. It may change skb pointers. - * - * It is pretty complicated. Luckily, it is called only in exceptional cases. - */ -unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta) -{ - int i, k, eat; - - /* If skb has not enough free space at tail, get new one - * plus 128 bytes for future expansions. If we have enough - * room at tail, reallocate without expansion only if skb is cloned. - */ - eat = (skb->tail+delta) - skb->end; - - if (eat > 0 || skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC)) - return NULL; - } - - if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) - BUG(); - - /* Optimization: no fragments, no reasons to preestimate - * size of pulled pages. Superb. - */ - if (skb_shinfo(skb)->frag_list == NULL) - goto pull_pages; - - /* Estimate size of pulled pages. */ - eat = delta; - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size >= eat) - goto pull_pages; - eat -= skb_shinfo(skb)->frags[i].size; - } - - /* If we need update frag list, we are in troubles. - * Certainly, it possible to add an offset to skb data, - * but taking into account that pulling is expected to - * be very rare operation, it is worth to fight against - * further bloating skb head and crucify ourselves here instead. - * Pure masohism, indeed. 8)8) - */ - if (eat) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - struct sk_buff *clone = NULL; - struct sk_buff *insp = NULL; - - do { - if (list == NULL) - BUG(); - - if (list->len <= eat) { - /* Eaten as whole. */ - eat -= list->len; - list = list->next; - insp = list; - } else { - /* Eaten partially. */ - - if (skb_shared(list)) { - /* Sucks! We need to fork list. :-( */ - clone = skb_clone(list, GFP_ATOMIC); - if (clone == NULL) - return NULL; - insp = list->next; - list = clone; - } else { - /* This may be pulled without - * problems. */ - insp = list; - } - if (pskb_pull(list, eat) == NULL) { - if (clone) - kfree_skb(clone); - return NULL; - } - break; - } - } while (eat); - - /* Free pulled out fragments. */ - while ((list = skb_shinfo(skb)->frag_list) != insp) { - skb_shinfo(skb)->frag_list = list->next; - kfree_skb(list); - } - /* And insert new clone at head. */ - if (clone) { - clone->next = list; - skb_shinfo(skb)->frag_list = clone; - } - } - /* Success! Now we may commit changes to skb data. */ - -pull_pages: - eat = delta; - k = 0; - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); - eat -= skb_shinfo(skb)->frags[i].size; - } else { - skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; - if (eat) { - skb_shinfo(skb)->frags[k].page_offset += eat; - skb_shinfo(skb)->frags[k].size -= eat; - eat = 0; - } - k++; - } - } - skb_shinfo(skb)->nr_frags = k; - - skb->tail += delta; - skb->data_len -= delta; - - return skb->tail; + return n; } /* Copy some data bits from skb to kernel buffer. */ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) { - int i, copy; - int start = skb->len - skb->data_len; - - if (offset > (int)skb->len-len) - goto fault; - - /* Copy header. */ - if ((copy = start-offset) > 0) { - if (copy > len) - copy = len; - memcpy(to, skb->data + offset, copy); - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end-offset) > 0) { - u8 *vaddr; - - if (copy > len) - copy = len; - - vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); - memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+ - offset-start, copy); - kunmap_skb_frag(vaddr); - - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + list->len; - if ((copy = end-offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_bits(list, offset-start, to, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - start = end; - } - } - if (len == 0) - return 0; - -fault: - return -EFAULT; -} - -/* Checksum skb data. */ - -#if 0 - -unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum) -{ - int i, copy; - int start = skb->len - skb->data_len; - int pos = 0; - - /* Checksum header. */ - if ((copy = start-offset) > 0) { - if (copy > len) - copy = len; - csum = csum_partial(skb->data+offset, copy, csum); - if ((len -= copy) == 0) - return csum; - offset += copy; - pos = copy; - } - - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end-offset) > 0) { - unsigned int csum2; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - vaddr = kmap_skb_frag(frag); - csum2 = csum_partial(vaddr + frag->page_offset + - offset-start, copy, 0); - kunmap_skb_frag(vaddr); - csum = csum_block_add(csum, csum2, pos); - if (!(len -= copy)) - return csum; - offset += copy; - pos += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + list->len; - if ((copy = end-offset) > 0) { - unsigned int csum2; - if (copy > len) - copy = len; - csum2 = skb_checksum(list, offset-start, copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - pos += copy; - } - start = end; - } - } - if (len == 0) - return csum; - - BUG(); - return csum; -} - -/* Both of above in one bottle. */ - -unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum) -{ - int i, copy; - int start = skb->len - skb->data_len; - int pos = 0; - - /* Copy header. */ - if ((copy = start-offset) > 0) { - if (copy > len) - copy = len; - csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum); - if ((len -= copy) == 0) - return csum; - offset += copy; - to += copy; - pos = copy; - } - - for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset+len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end-offset) > 0) { - unsigned int csum2; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - vaddr = kmap_skb_frag(frag); - csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset + - offset-start, to, copy, 0); - kunmap_skb_frag(vaddr); - csum = csum_block_add(csum, csum2, pos); - if (!(len -= copy)) - return csum; - offset += copy; - to += copy; - pos += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { - unsigned int csum2; - int end; - - BUG_TRAP(start <= offset+len); - - end = start + list->len; - if ((copy = end-offset) > 0) { - if (copy > len) - copy = len; - csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - to += copy; - pos += copy; - } - start = end; - } - } - if (len == 0) - return csum; - - BUG(); - return csum; -} - -void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) -{ - unsigned int csum; - long csstart; - - if (skb->ip_summed == CHECKSUM_HW) - csstart = skb->h.raw - skb->data; - else - csstart = skb->len - skb->data_len; - - if (csstart > skb->len - skb->data_len) - BUG(); - - memcpy(to, skb->data, csstart); - - csum = 0; - if (csstart != skb->len) - csum = skb_copy_and_csum_bits(skb, csstart, to+csstart, - skb->len-csstart, 0); - - if (skb->ip_summed == CHECKSUM_HW) { - long csstuff = csstart + skb->csum; - - *((unsigned short *)(to + csstuff)) = csum_fold(csum); - } -} - -#endif + int i, copy; + int start = skb->len - skb->data_len; + + if (offset > (int)skb->len-len) + goto fault; + + /* Copy header. */ + if ((copy = start-offset) > 0) { + if (copy > len) + copy = len; + memcpy(to, skb->data + offset, copy); + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end-offset) > 0) { + u8 *vaddr; + + if (copy > len) + copy = len; + + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); + memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+ + offset-start, copy); + kunmap_skb_frag(vaddr); + + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + } + start = end; + } -#if 0 -/* - * Tune the memory allocator for a new MTU size. - */ -void skb_add_mtu(int mtu) -{ - /* Must match allocation in alloc_skb */ - mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); + if (len == 0) + return 0; - kmem_add_cache_size(mtu); + fault: + return -EFAULT; } -#endif void __init skb_init(void) { - int i; - - skbuff_head_cache = kmem_cache_create("skbuff_head_cache", - sizeof(struct sk_buff), - 0, - SLAB_HWCACHE_ALIGN, - skb_headerinit, NULL); - if (!skbuff_head_cache) - panic("cannot create skbuff cache"); - - for (i=0; i<NR_CPUS; i++) - skb_queue_head_init(&skb_head_pool[i].list); + int i; + + skbuff_head_cache = kmem_cache_create("skbuff_head_cache", + sizeof(struct sk_buff), + 0, + SLAB_HWCACHE_ALIGN, + skb_headerinit, NULL); + if (!skbuff_head_cache) + panic("cannot create skbuff cache"); + + for (i=0; i<NR_CPUS; i++) + skb_queue_head_init(&skb_head_pool[i].list); } |