diff options
author | akw27@boulderdash.cl.cam.ac.uk <akw27@boulderdash.cl.cam.ac.uk> | 2003-02-14 14:27:45 +0000 |
---|---|---|
committer | akw27@boulderdash.cl.cam.ac.uk <akw27@boulderdash.cl.cam.ac.uk> | 2003-02-14 14:27:45 +0000 |
commit | ed220108094cf8e5c3f70a7421182cdbcb59f3d3 (patch) | |
tree | 22722275b0a231a057b08d99825b24d551545396 /xenolinux-2.4.16-sparse | |
parent | ce62d9e7f6d76422b025939279a2a51b5262a06d (diff) | |
parent | 77cf55f0b8964f4d82d7119bfe59db8fddf4b320 (diff) | |
download | xen-ed220108094cf8e5c3f70a7421182cdbcb59f3d3.tar.gz xen-ed220108094cf8e5c3f70a7421182cdbcb59f3d3.tar.bz2 xen-ed220108094cf8e5c3f70a7421182cdbcb59f3d3.zip |
bitkeeper revision 1.34 (3e4cfce1Ot4YJzJ4jwNjDthlFrZV5w)
conflict resolution.
Diffstat (limited to 'xenolinux-2.4.16-sparse')
-rw-r--r-- | xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c | 127 | ||||
-rw-r--r-- | xenolinux-2.4.16-sparse/include/asm-xeno/io.h | 7 | ||||
-rw-r--r-- | xenolinux-2.4.16-sparse/include/linux/skbuff.h | 1186 | ||||
-rw-r--r-- | xenolinux-2.4.16-sparse/net/core/skbuff.c | 1373 |
4 files changed, 2666 insertions, 27 deletions
diff --git a/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c index b018e29e49..87b1a97fdc 100644 --- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c @@ -21,14 +21,12 @@ #include <linux/skbuff.h> #include <linux/init.h> +#include <asm/io.h> #include <net/sock.h> #define NET_TX_IRQ _EVENT_NET_TX #define NET_RX_IRQ _EVENT_NET_RX -#define NET_TX_IRQ_FOR_VIF(x) _EVENT_NET_TX_FOR_VIF(x) -#define NET_RX_IRQ_FOR_VIF(x) _EVENT_NET_RX_FOR_VIF(x) - #define TX_MAX_ENTRIES (TX_RING_SIZE - 2) #define RX_MAX_ENTRIES (RX_RING_SIZE - 2) @@ -37,7 +35,7 @@ #define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1)) #define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) -#define RX_BUF_SIZE 1600 /* Ethernet MTU + plenty of slack! */ +#define RX_BUF_SIZE 2049 /* (was 1600) Ethernet MTU + plenty of slack! */ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs); static void network_tx_int(int irq, void *dev_id, struct pt_regs *ptregs); @@ -48,6 +46,8 @@ static void cleanup_module(void); static struct list_head dev_list; +static unsigned int net_countx; + /* * RX RING: RX_IDX <= rx_cons <= rx_prod * TX RING: TX_IDX <= tx_cons <= tx_prod @@ -72,16 +72,8 @@ struct net_private static int network_open(struct net_device *dev) { struct net_private *np = dev->priv; - int error; - char *rxlabel, *txlabel; - - // This is inevitably not the right way to allocate a couple of static strings. - rxlabel = kmalloc(sizeof("net-rx- "), GFP_KERNEL); - txlabel = kmalloc(sizeof("net-tx- "), GFP_KERNEL); - if ((rxlabel == NULL) || (txlabel == NULL)) goto fail; - sprintf(rxlabel, "net-rx-%d", np->id); - sprintf(txlabel, "net-tx-%d", np->id); - + int error = 0; + np->rx_idx = np->tx_idx = np->tx_full = 0; memset(&np->stats, 0, sizeof(np->stats)); @@ -113,8 +105,8 @@ static int network_open(struct net_device *dev) network_alloc_rx_buffers(dev); - error = request_irq(NET_RX_IRQ_FOR_VIF(np->id), network_rx_int, 0, - rxlabel, dev); + error = request_irq(NET_RX_IRQ, network_rx_int, 0, + "net-rx", dev); if ( error ) { printk(KERN_WARNING "%s: Could not allocate receive interrupt\n", @@ -122,13 +114,13 @@ static int network_open(struct net_device *dev) goto fail; } - error = request_irq(NET_TX_IRQ_FOR_VIF(np->id), network_tx_int, 0, - txlabel, dev); + error = request_irq(NET_TX_IRQ, network_tx_int, 0, + "net-tx", dev); if ( error ) { printk(KERN_WARNING "%s: Could not allocate transmit interrupt\n", dev->name); - free_irq(NET_RX_IRQ_FOR_VIF(np->id), dev); + free_irq(NET_RX_IRQ, dev); goto fail; } @@ -141,8 +133,6 @@ static int network_open(struct net_device *dev) return 0; fail: - if ( rxlabel ) kfree(rxlabel); - if ( txlabel ) kfree(txlabel); if ( np->net_ring->rx_ring ) kfree(np->net_ring->rx_ring); if ( np->net_ring->tx_ring ) kfree(np->net_ring->tx_ring); if ( np->rx_skb_ring ) kfree(np->rx_skb_ring); @@ -179,13 +169,59 @@ static void network_tx_buf_gc(struct net_device *dev) spin_unlock_irqrestore(&np->tx_lock, flags); } +inline unsigned long get_ppte(unsigned long addr) +{ + unsigned long ppte = 0xdeadbeef; + pgd_t *pgd; pmd_t *pmd; pte_t *ptep; + pgd = pgd_offset_k(addr); + + if (pgd_none(*pgd) || pgd_bad(*pgd)) BUG(); + + pmd = pmd_offset(pgd, addr); + if (pmd_none(*pmd)) BUG(); + if (pmd_bad(*pmd)) BUG(); + + ptep = pte_offset(pmd, addr); + ppte = (unsigned long)phys_to_machine(virt_to_phys(ptep)); + + return ppte; +} +/* +static void validate_free_list(void) +{ + unsigned long addr, ppfn, mpfn, mpfn2, flags; + struct list_head *i; + struct net_page_info *np; + + printk(KERN_ALERT "Walking free pages:\n"); + + spin_lock_irqsave(&net_page_list_lock, flags); + + list_for_each(i, &net_page_list) + { + np = list_entry(i, struct net_page_info, list); + addr = np->virt_addr; + ppfn = virt_to_phys(addr) >> PAGE_SHIFT; + mpfn = get_ppte(addr); + mpfn2 = phys_to_machine_mapping[ppfn]; + + mpfn = (*(unsigned long *)phys_to_virt(machine_to_phys(mpfn))) >> PAGE_SHIFT; + if (mpfn != mpfn2) printk(KERN_ALERT "mpfn %lu != %lu\n", mpfn, mpfn2); + if (machine_to_phys_mapping[mpfn] != ppfn) printk(KERN_ALERT "ppfn %lu != %lu\n", machine_to_phys_mapping[mpfn], ppfn); + } + + spin_unlock_irqrestore(&net_page_list_lock, flags); + +} +*/ static void network_alloc_rx_buffers(struct net_device *dev) { unsigned int i; struct net_private *np = dev->priv; struct sk_buff *skb; unsigned int end = RX_RING_ADD(np->rx_idx, RX_MAX_ENTRIES); + for ( i = np->net_ring->rx_prod; i != end; i = RX_RING_INC(i) ) { @@ -194,8 +230,9 @@ static void network_alloc_rx_buffers(struct net_device *dev) skb->dev = dev; skb_reserve(skb, 2); /* word align the IP header */ np->rx_skb_ring[i] = skb; - np->net_ring->rx_ring[i].addr = (unsigned long)skb->data; + np->net_ring->rx_ring[i].addr = get_ppte(skb->head); np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */ +//printk(KERN_ALERT "[%p]\n", phys_to_machine(virt_to_phys(skb->page_ptr))); } np->net_ring->rx_prod = i; @@ -219,6 +256,14 @@ static void network_free_rx_buffers(struct net_device *dev) } } +void print_range(u8 *start, unsigned int len) +{ + int i = 0; + + while (i++ < len) + printk("%x:", start[i]); + printk("\n"); +} static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -231,10 +276,23 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev) netif_stop_queue(dev); return -ENOBUFS; } - +//print_range(skb->data, ETH_HLEN + 8); +//print_range(skb->data + ETH_HLEN + 8, 20); +//printk("skb->len is %u in guestOS (expected fraglen: %u).\n", skb->len, skb->len - (ETH_HLEN + 8)); i = np->net_ring->tx_prod; + + if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE ) + { + struct sk_buff *new_skb = alloc_skb(RX_BUF_SIZE, GFP_KERNEL); + skb_put(new_skb, skb->len); + memcpy(new_skb->data, skb->data, skb->len); + kfree_skb(skb); + skb = new_skb; + } + np->tx_skb_ring[i] = skb; - np->net_ring->tx_ring[i].addr = (unsigned long)skb->data; + np->net_ring->tx_ring[i].addr + = (unsigned long)phys_to_machine(virt_to_phys(skb->data)); np->net_ring->tx_ring[i].size = skb->len; np->net_ring->tx_prod = TX_RING_INC(i); atomic_inc(&np->tx_entries); @@ -273,13 +331,30 @@ static void network_rx_int(int irq, void *dev_id, struct pt_regs *ptregs) struct net_private *np = dev->priv; struct sk_buff *skb; + /*if (net_countx++ % 100 == 0) validate_free_list();*/ + again: for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) ) { + if (np->net_ring->rx_ring[i].status != RING_STATUS_OK) + { + printk("bad buffer on RX ring!(%d)\n", + np->net_ring->rx_ring[i].status); + continue; + } skb = np->rx_skb_ring[i]; + +//printk(KERN_ALERT "[%u]: ptmm[%lx] old:(%lx) new:(%lx)\n", i , virt_to_phys(skb->head) >> PAGE_SHIFT, phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT], (*(unsigned long *)phys_to_virt(machine_to_phys(np->net_ring->rx_ring[i].addr))) >> PAGE_SHIFT); + + phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = + (*(unsigned long *)phys_to_virt( + machine_to_phys(np->net_ring->rx_ring[i].addr)) + ) >> PAGE_SHIFT; + skb_put(skb, np->net_ring->rx_ring[i].size); skb->protocol = eth_type_trans(skb, dev); np->stats.rx_packets++; + np->stats.rx_bytes += np->net_ring->rx_ring[i].size; netif_rx(skb); dev->last_rx = jiffies; @@ -309,8 +384,8 @@ int network_close(struct net_device *dev) struct net_private *np = dev->priv; netif_stop_queue(dev); - free_irq(NET_RX_IRQ_FOR_VIF(np->id), dev); - free_irq(NET_TX_IRQ_FOR_VIF(np->id), dev); + free_irq(NET_RX_IRQ, dev); + free_irq(NET_TX_IRQ, dev); network_free_rx_buffers(dev); kfree(np->net_ring->rx_ring); kfree(np->net_ring->tx_ring); diff --git a/xenolinux-2.4.16-sparse/include/asm-xeno/io.h b/xenolinux-2.4.16-sparse/include/asm-xeno/io.h index 250b64fac8..1afc7e4d68 100644 --- a/xenolinux-2.4.16-sparse/include/asm-xeno/io.h +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/io.h @@ -2,7 +2,7 @@ #define _ASM_IO_H #include <linux/config.h> - +#include <asm/hypervisor.h> /* * This file contains the definitions for the x86 IO instructions * inb/inw/inl/outb/outw/outl and the "string versions" of the same @@ -74,6 +74,11 @@ static inline void * phys_to_virt(unsigned long address) } /* + * Change virtual addresses to machine addresses and vv. + * These are equally trivial. + */ + +/* * Change "struct page" to physical address. */ #define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT) diff --git a/xenolinux-2.4.16-sparse/include/linux/skbuff.h b/xenolinux-2.4.16-sparse/include/linux/skbuff.h new file mode 100644 index 0000000000..fb2e5f15d8 --- /dev/null +++ b/xenolinux-2.4.16-sparse/include/linux/skbuff.h @@ -0,0 +1,1186 @@ +/* + * Definitions for the 'struct sk_buff' memory handlers. + * + * Authors: + * Alan Cox, <gw4pts@gw4pts.ampr.org> + * Florian La Roche, <rzsfl@rz.uni-sb.de> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_SKBUFF_H +#define _LINUX_SKBUFF_H + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <linux/cache.h> + +#include <asm/atomic.h> +#include <asm/types.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/highmem.h> + +/* Zero Copy additions: + * + * (1) there are now two types of skb, as indicated by the skb_type field. + * this is because, at least for the time being, there are two seperate types + * of memory that may be allocated to skb->data. + * + * (2) until discontiguous memory is fully supported, there will be a free list of pages + * to be used by the net RX code. This list will be allocated in the driver init code + * but is declared here because the socket free code needs to return pages to it. + */ + +// for skb->skb_type: + +#define SKB_NORMAL 0 +#define SKB_ZERO_COPY 1 + +#define NUM_NET_PAGES 9 // about 1Meg of buffers. (2^9) + +/*struct net_page_info { + struct list_head list; + unsigned long virt_addr; + unsigned long ppte; +}; + +extern char *net_page_chunk; +extern struct net_page_info *net_page_table; +extern struct list_head net_page_list; +extern spinlock_t net_page_list_lock; +extern unsigned int net_pages; +*/ +/* End zero copy additions */ + +#define HAVE_ALLOC_SKB /* For the drivers to know */ +#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ +#define SLAB_SKB /* Slabified skbuffs */ + +#define CHECKSUM_NONE 0 +#define CHECKSUM_HW 1 +#define CHECKSUM_UNNECESSARY 2 + +#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1)) +#define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1)) +#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X),0)) +#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0,2)) + +/* A. Checksumming of received packets by device. + * + * NONE: device failed to checksum this packet. + * skb->csum is undefined. + * + * UNNECESSARY: device parsed packet and wouldbe verified checksum. + * skb->csum is undefined. + * It is bad option, but, unfortunately, many of vendors do this. + * Apparently with secret goal to sell you new device, when you + * will add new protocol to your host. F.e. IPv6. 8) + * + * HW: the most generic way. Device supplied checksum of _all_ + * the packet as seen by netif_rx in skb->csum. + * NOTE: Even if device supports only some protocols, but + * is able to produce some skb->csum, it MUST use HW, + * not UNNECESSARY. + * + * B. Checksumming on output. + * + * NONE: skb is checksummed by protocol or csum is not required. + * + * HW: device is required to csum packet as seen by hard_start_xmit + * from skb->h.raw to the end and to record the checksum + * at skb->h.raw+skb->csum. + * + * Device must show its capabilities in dev->features, set + * at device setup time. + * NETIF_F_HW_CSUM - it is clever device, it is able to checksum + * everything. + * NETIF_F_NO_CSUM - loopback or reliable single hop media. + * NETIF_F_IP_CSUM - device is dumb. It is able to csum only + * TCP/UDP over IPv4. Sigh. Vendors like this + * way by an unknown reason. Though, see comment above + * about CHECKSUM_UNNECESSARY. 8) + * + * Any questions? No questions, good. --ANK + */ + +#ifdef __i386__ +#define NET_CALLER(arg) (*(((void**)&arg)-1)) +#else +#define NET_CALLER(arg) __builtin_return_address(0) +#endif + +#ifdef CONFIG_NETFILTER +struct nf_conntrack { + atomic_t use; + void (*destroy)(struct nf_conntrack *); +}; + +struct nf_ct_info { + struct nf_conntrack *master; +}; +#endif + +struct sk_buff_head { + /* These two members must be first. */ + struct sk_buff * next; + struct sk_buff * prev; + + __u32 qlen; + spinlock_t lock; +}; + +struct sk_buff; + +#define MAX_SKB_FRAGS 6 + +typedef struct skb_frag_struct skb_frag_t; + +struct skb_frag_struct +{ + struct page *page; + __u16 page_offset; + __u16 size; +}; + +/* This data is invariant across clones and lives at + * the end of the header data, ie. at skb->end. + */ +struct skb_shared_info { + atomic_t dataref; + unsigned int nr_frags; + struct sk_buff *frag_list; + skb_frag_t frags[MAX_SKB_FRAGS]; +}; + +struct sk_buff { + /* These two members must be first. */ + struct sk_buff * next; /* Next buffer in list */ + struct sk_buff * prev; /* Previous buffer in list */ + + struct sk_buff_head * list; /* List we are on */ + struct sock *sk; /* Socket we are owned by */ + struct timeval stamp; /* Time we arrived */ + struct net_device *dev; /* Device we arrived on/are leaving by */ + + /* Transport layer header */ + union + { + struct tcphdr *th; + struct udphdr *uh; + struct icmphdr *icmph; + struct igmphdr *igmph; + struct iphdr *ipiph; + struct spxhdr *spxh; + unsigned char *raw; + } h; + + /* Network layer header */ + union + { + struct iphdr *iph; + struct ipv6hdr *ipv6h; + struct arphdr *arph; + struct ipxhdr *ipxh; + unsigned char *raw; + } nh; + + /* Link layer header */ + union + { + struct ethhdr *ethernet; + unsigned char *raw; + } mac; + + struct dst_entry *dst; + + /* + * This is the control buffer. It is free to use for every + * layer. Please put your private variables there. If you + * want to keep them across layers you have to do a skb_clone() + * first. This is owned by whoever has the skb queued ATM. + */ + char cb[48]; + + unsigned int len; /* Length of actual data */ + unsigned int data_len; + unsigned int csum; /* Checksum */ + unsigned char __unused, /* Dead field, may be reused */ + cloned, /* head may be cloned (check refcnt to be sure). */ + pkt_type, /* Packet class */ + ip_summed; /* Driver fed us an IP checksum */ + __u32 priority; /* Packet queueing priority */ + atomic_t users; /* User count - see datagram.c,tcp.c */ + unsigned short protocol; /* Packet protocol from driver. */ + unsigned short security; /* Security level of packet */ + unsigned int truesize; /* Buffer size */ + + unsigned char *head; /* Head of buffer */ + unsigned char *data; /* Data head pointer */ + unsigned char *tail; /* Tail pointer */ + unsigned char *end; /* End pointer */ + + void (*destructor)(struct sk_buff *); /* Destruct function */ +#ifdef CONFIG_NETFILTER + /* Can be used for communication between hooks. */ + unsigned long nfmark; + /* Cache info */ + __u32 nfcache; + /* Associated connection, if any */ + struct nf_ct_info *nfct; +#ifdef CONFIG_NETFILTER_DEBUG + unsigned int nf_debug; +#endif +#endif /*CONFIG_NETFILTER*/ + +#if defined(CONFIG_HIPPI) + union{ + __u32 ifield; + } private; +#endif + +#ifdef CONFIG_NET_SCHED + __u32 tc_index; /* traffic control index */ +#endif + unsigned int skb_type; /* for zero copy handling. */ + struct net_page_info *net_page; +}; + +#define SK_WMEM_MAX 65535 +#define SK_RMEM_MAX 65535 + +#ifdef __KERNEL__ +/* + * Handling routines are only of interest to the kernel + */ +#include <linux/slab.h> + +#include <asm/system.h> + +extern void __kfree_skb(struct sk_buff *skb); +extern struct sk_buff * alloc_skb(unsigned int size, int priority); +extern struct sk_buff * alloc_zc_skb(unsigned int size, int priority); +extern void kfree_skbmem(struct sk_buff *skb); +extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); +extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); +extern struct sk_buff * pskb_copy(struct sk_buff *skb, int gfp_mask); +extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask); +extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); +extern struct sk_buff * skb_copy_expand(const struct sk_buff *skb, + int newheadroom, + int newtailroom, + int priority); +#define dev_kfree_skb(a) kfree_skb(a) +extern void skb_over_panic(struct sk_buff *skb, int len, void *here); +extern void skb_under_panic(struct sk_buff *skb, int len, void *here); + +/* Internal */ +#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) + +/** + * skb_queue_empty - check if a queue is empty + * @list: queue head + * + * Returns true if the queue is empty, false otherwise. + */ + +static inline int skb_queue_empty(struct sk_buff_head *list) +{ + return (list->next == (struct sk_buff *) list); +} + +/** + * skb_get - reference buffer + * @skb: buffer to reference + * + * Makes another reference to a socket buffer and returns a pointer + * to the buffer. + */ + +static inline struct sk_buff *skb_get(struct sk_buff *skb) +{ + atomic_inc(&skb->users); + return skb; +} + +/* + * If users==1, we are the only owner and are can avoid redundant + * atomic change. + */ + +/** + * kfree_skb - free an sk_buff + * @skb: buffer to free + * + * Drop a reference to the buffer and free it if the usage count has + * hit zero. + */ + +static inline void kfree_skb(struct sk_buff *skb) +{ + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) + __kfree_skb(skb); +} + +/* Use this if you didn't touch the skb state [for fast switching] */ +static inline void kfree_skb_fast(struct sk_buff *skb) +{ + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) + kfree_skbmem(skb); +} + +/** + * skb_cloned - is the buffer a clone + * @skb: buffer to check + * + * Returns true if the buffer was generated with skb_clone() and is + * one of multiple shared copies of the buffer. Cloned buffers are + * shared data so must not be written to under normal circumstances. + */ + +static inline int skb_cloned(struct sk_buff *skb) +{ + return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1; +} + +/** + * skb_shared - is the buffer shared + * @skb: buffer to check + * + * Returns true if more than one person has a reference to this + * buffer. + */ + +static inline int skb_shared(struct sk_buff *skb) +{ + return (atomic_read(&skb->users) != 1); +} + +/** + * skb_share_check - check if buffer is shared and if so clone it + * @skb: buffer to check + * @pri: priority for memory allocation + * + * If the buffer is shared the buffer is cloned and the old copy + * drops a reference. A new clone with a single reference is returned. + * If the buffer is not shared the original buffer is returned. When + * being called from interrupt status or with spinlocks held pri must + * be GFP_ATOMIC. + * + * NULL is returned on a memory allocation failure. + */ + +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) +{ + if (skb_shared(skb)) { + struct sk_buff *nskb; + nskb = skb_clone(skb, pri); + kfree_skb(skb); + return nskb; + } + return skb; +} + + +/* + * Copy shared buffers into a new sk_buff. We effectively do COW on + * packets to handle cases where we have a local reader and forward + * and a couple of other messy ones. The normal one is tcpdumping + * a packet thats being forwarded. + */ + +/** + * skb_unshare - make a copy of a shared buffer + * @skb: buffer to check + * @pri: priority for memory allocation + * + * If the socket buffer is a clone then this function creates a new + * copy of the data, drops a reference count on the old copy and returns + * the new copy with the reference count at 1. If the buffer is not a clone + * the original buffer is returned. When called with a spinlock held or + * from interrupt state @pri must be %GFP_ATOMIC + * + * %NULL is returned on a memory allocation failure. + */ + +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) +{ + struct sk_buff *nskb; + if(!skb_cloned(skb)) + return skb; + nskb=skb_copy(skb, pri); + kfree_skb(skb); /* Free our shared copy */ + return nskb; +} + +/** + * skb_peek + * @list_: list to peek at + * + * Peek an &sk_buff. Unlike most other operations you _MUST_ + * be careful with this one. A peek leaves the buffer on the + * list and someone else may run off with it. You must hold + * the appropriate locks or have a private queue to do this. + * + * Returns %NULL for an empty list or a pointer to the head element. + * The reference count is not incremented and the reference is therefore + * volatile. Use with caution. + */ + +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) +{ + struct sk_buff *list = ((struct sk_buff *)list_)->next; + if (list == (struct sk_buff *)list_) + list = NULL; + return list; +} + +/** + * skb_peek_tail + * @list_: list to peek at + * + * Peek an &sk_buff. Unlike most other operations you _MUST_ + * be careful with this one. A peek leaves the buffer on the + * list and someone else may run off with it. You must hold + * the appropriate locks or have a private queue to do this. + * + * Returns %NULL for an empty list or a pointer to the tail element. + * The reference count is not incremented and the reference is therefore + * volatile. Use with caution. + */ + +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) +{ + struct sk_buff *list = ((struct sk_buff *)list_)->prev; + if (list == (struct sk_buff *)list_) + list = NULL; + return list; +} + +/** + * skb_queue_len - get queue length + * @list_: list to measure + * + * Return the length of an &sk_buff queue. + */ + +static inline __u32 skb_queue_len(struct sk_buff_head *list_) +{ + return(list_->qlen); +} + +static inline void skb_queue_head_init(struct sk_buff_head *list) +{ + spin_lock_init(&list->lock); + list->prev = (struct sk_buff *)list; + list->next = (struct sk_buff *)list; + list->qlen = 0; +} + +/* + * Insert an sk_buff at the start of a list. + * + * The "__skb_xxxx()" functions are the non-atomic ones that + * can only be called with interrupts disabled. + */ + +/** + * __skb_queue_head - queue a buffer at the list head + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the start of a list. This function takes no locks + * and you must therefore hold required locks before calling it. + * + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) +{ + struct sk_buff *prev, *next; + + newsk->list = list; + list->qlen++; + prev = (struct sk_buff *)list; + next = prev->next; + newsk->next = next; + newsk->prev = prev; + next->prev = newsk; + prev->next = newsk; +} + + +/** + * skb_queue_head - queue a buffer at the list head + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the start of the list. This function takes the + * list lock and can be used safely with other locking &sk_buff functions + * safely. + * + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + __skb_queue_head(list, newsk); + spin_unlock_irqrestore(&list->lock, flags); +} + +/** + * __skb_queue_tail - queue a buffer at the list tail + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the end of a list. This function takes no locks + * and you must therefore hold required locks before calling it. + * + * A buffer cannot be placed on two lists at the same time. + */ + + +static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) +{ + struct sk_buff *prev, *next; + + newsk->list = list; + list->qlen++; + next = (struct sk_buff *)list; + prev = next->prev; + newsk->next = next; + newsk->prev = prev; + next->prev = newsk; + prev->next = newsk; +} + +/** + * skb_queue_tail - queue a buffer at the list tail + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the tail of the list. This function takes the + * list lock and can be used safely with other locking &sk_buff functions + * safely. + * + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + __skb_queue_tail(list, newsk); + spin_unlock_irqrestore(&list->lock, flags); +} + +/** + * __skb_dequeue - remove from the head of the queue + * @list: list to dequeue from + * + * Remove the head of the list. This function does not take any locks + * so must be used with appropriate locks held only. The head item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) +{ + struct sk_buff *next, *prev, *result; + + prev = (struct sk_buff *) list; + next = prev->next; + result = NULL; + if (next != prev) { + result = next; + next = next->next; + list->qlen--; + next->prev = prev; + prev->next = next; + result->next = NULL; + result->prev = NULL; + result->list = NULL; + } + return result; +} + +/** + * skb_dequeue - remove from the head of the queue + * @list: list to dequeue from + * + * Remove the head of the list. The list lock is taken so the function + * may be used safely with other locking list functions. The head item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list) +{ + long flags; + struct sk_buff *result; + + spin_lock_irqsave(&list->lock, flags); + result = __skb_dequeue(list); + spin_unlock_irqrestore(&list->lock, flags); + return result; +} + +/* + * Insert a packet on a list. + */ + +static inline void __skb_insert(struct sk_buff *newsk, + struct sk_buff * prev, struct sk_buff *next, + struct sk_buff_head * list) +{ + newsk->next = next; + newsk->prev = prev; + next->prev = newsk; + prev->next = newsk; + newsk->list = list; + list->qlen++; +} + +/** + * skb_insert - insert a buffer + * @old: buffer to insert before + * @newsk: buffer to insert + * + * Place a packet before a given packet in a list. The list locks are taken + * and this function is atomic with respect to other list locked calls + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&old->list->lock, flags); + __skb_insert(newsk, old->prev, old, old->list); + spin_unlock_irqrestore(&old->list->lock, flags); +} + +/* + * Place a packet after a given packet in a list. + */ + +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) +{ + __skb_insert(newsk, old, old->next, old->list); +} + +/** + * skb_append - append a buffer + * @old: buffer to insert after + * @newsk: buffer to insert + * + * Place a packet after a given packet in a list. The list locks are taken + * and this function is atomic with respect to other list locked calls. + * A buffer cannot be placed on two lists at the same time. + */ + + +static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&old->list->lock, flags); + __skb_append(old, newsk); + spin_unlock_irqrestore(&old->list->lock, flags); +} + +/* + * remove sk_buff from list. _Must_ be called atomically, and with + * the list known.. + */ + +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) +{ + struct sk_buff * next, * prev; + + list->qlen--; + next = skb->next; + prev = skb->prev; + skb->next = NULL; + skb->prev = NULL; + skb->list = NULL; + next->prev = prev; + prev->next = next; +} + +/** + * skb_unlink - remove a buffer from a list + * @skb: buffer to remove + * + * Place a packet after a given packet in a list. The list locks are taken + * and this function is atomic with respect to other list locked calls + * + * Works even without knowing the list it is sitting on, which can be + * handy at times. It also means that THE LIST MUST EXIST when you + * unlink. Thus a list must have its contents unlinked before it is + * destroyed. + */ + +static inline void skb_unlink(struct sk_buff *skb) +{ + struct sk_buff_head *list = skb->list; + + if(list) { + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + if(skb->list == list) + __skb_unlink(skb, skb->list); + spin_unlock_irqrestore(&list->lock, flags); + } +} + +/* XXX: more streamlined implementation */ + +/** + * __skb_dequeue_tail - remove from the tail of the queue + * @list: list to dequeue from + * + * Remove the tail of the list. This function does not take any locks + * so must be used with appropriate locks held only. The tail item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) +{ + struct sk_buff *skb = skb_peek_tail(list); + if (skb) + __skb_unlink(skb, list); + return skb; +} + +/** + * skb_dequeue - remove from the head of the queue + * @list: list to dequeue from + * + * Remove the head of the list. The list lock is taken so the function + * may be used safely with other locking list functions. The tail item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) +{ + long flags; + struct sk_buff *result; + + spin_lock_irqsave(&list->lock, flags); + result = __skb_dequeue_tail(list); + spin_unlock_irqrestore(&list->lock, flags); + return result; +} + +static inline int skb_is_nonlinear(const struct sk_buff *skb) +{ + return skb->data_len; +} + +static inline int skb_headlen(const struct sk_buff *skb) +{ + return skb->len - skb->data_len; +} + +#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) BUG(); } while (0) +#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) BUG(); } while (0) +#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) BUG(); } while (0) + +/* + * Add data to an sk_buff + */ + +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) +{ + unsigned char *tmp=skb->tail; + SKB_LINEAR_ASSERT(skb); + skb->tail+=len; + skb->len+=len; + return tmp; +} + +/** + * skb_put - add data to a buffer + * @skb: buffer to use + * @len: amount of data to add + * + * This function extends the used data area of the buffer. If this would + * exceed the total buffer size the kernel will panic. A pointer to the + * first byte of the extra data is returned. + */ + +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) +{ + unsigned char *tmp=skb->tail; + SKB_LINEAR_ASSERT(skb); + skb->tail+=len; + skb->len+=len; + if(skb->tail>skb->end) { + skb_over_panic(skb, len, current_text_addr()); + } + return tmp; +} + +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) +{ + skb->data-=len; + skb->len+=len; + return skb->data; +} + +/** + * skb_push - add data to the start of a buffer + * @skb: buffer to use + * @len: amount of data to add + * + * This function extends the used data area of the buffer at the buffer + * start. If this would exceed the total buffer headroom the kernel will + * panic. A pointer to the first byte of the extra data is returned. + */ + +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) +{ + skb->data-=len; + skb->len+=len; + if(skb->data<skb->head) { + skb_under_panic(skb, len, current_text_addr()); + } + return skb->data; +} + +static inline char *__skb_pull(struct sk_buff *skb, unsigned int len) +{ + skb->len-=len; + if (skb->len < skb->data_len) + BUG(); + return skb->data+=len; +} + +/** + * skb_pull - remove data from the start of a buffer + * @skb: buffer to use + * @len: amount of data to remove + * + * This function removes data from the start of a buffer, returning + * the memory to the headroom. A pointer to the next data in the buffer + * is returned. Once the data has been pulled future pushes will overwrite + * the old data. + */ + +static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len) +{ + if (len > skb->len) + return NULL; + return __skb_pull(skb,len); +} + +extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta); + +static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len) +{ + if (len > skb_headlen(skb) && + __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL) + return NULL; + skb->len -= len; + return skb->data += len; +} + +static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len) +{ + if (len > skb->len) + return NULL; + return __pskb_pull(skb,len); +} + +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) +{ + if (len <= skb_headlen(skb)) + return 1; + if (len > skb->len) + return 0; + return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL); +} + +/** + * skb_headroom - bytes at buffer head + * @skb: buffer to check + * + * Return the number of bytes of free space at the head of an &sk_buff. + */ + +static inline int skb_headroom(const struct sk_buff *skb) +{ + return skb->data-skb->head; +} + +/** + * skb_tailroom - bytes at buffer end + * @skb: buffer to check + * + * Return the number of bytes of free space at the tail of an sk_buff + */ + +static inline int skb_tailroom(const struct sk_buff *skb) +{ + return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail; +} + +/** + * skb_reserve - adjust headroom + * @skb: buffer to alter + * @len: bytes to move + * + * Increase the headroom of an empty &sk_buff by reducing the tail + * room. This is only allowed for an empty buffer. + */ + +static inline void skb_reserve(struct sk_buff *skb, unsigned int len) +{ + skb->data+=len; + skb->tail+=len; +} + +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); + +static inline void __skb_trim(struct sk_buff *skb, unsigned int len) +{ + if (!skb->data_len) { + skb->len = len; + skb->tail = skb->data+len; + } else { + ___pskb_trim(skb, len, 0); + } +} + +/** + * skb_trim - remove end from a buffer + * @skb: buffer to alter + * @len: new length + * + * Cut the length of a buffer down by removing data from the tail. If + * the buffer is already under the length specified it is not modified. + */ + +static inline void skb_trim(struct sk_buff *skb, unsigned int len) +{ + if (skb->len > len) { + __skb_trim(skb, len); + } +} + + +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) +{ + if (!skb->data_len) { + skb->len = len; + skb->tail = skb->data+len; + return 0; + } else { + return ___pskb_trim(skb, len, 1); + } +} + +static inline int pskb_trim(struct sk_buff *skb, unsigned int len) +{ + if (len < skb->len) + return __pskb_trim(skb, len); + return 0; +} + +/** + * skb_orphan - orphan a buffer + * @skb: buffer to orphan + * + * If a buffer currently has an owner then we call the owner's + * destructor function and make the @skb unowned. The buffer continues + * to exist but is no longer charged to its former owner. + */ + + +static inline void skb_orphan(struct sk_buff *skb) +{ + if (skb->destructor) + skb->destructor(skb); + skb->destructor = NULL; + skb->sk = NULL; +} + +/** + * skb_purge - empty a list + * @list: list to empty + * + * Delete all buffers on an &sk_buff list. Each buffer is removed from + * the list and one reference dropped. This function takes the list + * lock and is atomic with respect to other list locking functions. + */ + + +static inline void skb_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + while ((skb=skb_dequeue(list))!=NULL) + kfree_skb(skb); +} + +/** + * __skb_purge - empty a list + * @list: list to empty + * + * Delete all buffers on an &sk_buff list. Each buffer is removed from + * the list and one reference dropped. This function does not take the + * list lock and the caller must hold the relevant locks to use it. + */ + + +static inline void __skb_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + while ((skb=__skb_dequeue(list))!=NULL) + kfree_skb(skb); +} + +/** + * __dev_alloc_skb - allocate an skbuff for sending + * @length: length to allocate + * @gfp_mask: get_free_pages mask, passed to alloc_skb + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned in there is no free memory. + */ + +static inline struct sk_buff *__dev_alloc_skb(unsigned int length, + int gfp_mask) +{ + struct sk_buff *skb; + + skb = alloc_skb(length+16, gfp_mask); + //skb = alloc_zc_skb(length+16, gfp_mask); + if (skb) + skb_reserve(skb,16); + return skb; +} + +/** + * dev_alloc_skb - allocate an skbuff for sending + * @length: length to allocate + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned in there is no free memory. Although this function + * allocates memory it can be called from an interrupt. + */ + +static inline struct sk_buff *dev_alloc_skb(unsigned int length) +{ + return __dev_alloc_skb(length, GFP_ATOMIC); +} + +/** + * skb_cow - copy header of skb when it is required + * @skb: buffer to cow + * @headroom: needed headroom + * + * If the skb passed lacks sufficient headroom or its data part + * is shared, data is reallocated. If reallocation fails, an error + * is returned and original skb is not changed. + * + * The result is skb with writable area skb->head...skb->tail + * and at least @headroom of space at head. + */ + +static inline int +skb_cow(struct sk_buff *skb, unsigned int headroom) +{ + int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); + + if (delta < 0) + delta = 0; + + if (delta || skb_cloned(skb)) + return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC); + return 0; +} + +/** + * skb_linearize - convert paged skb to linear one + * @skb: buffer to linarize + * @gfp: allocation mode + * + * If there is no free memory -ENOMEM is returned, otherwise zero + * is returned and the old skb data released. */ +int skb_linearize(struct sk_buff *skb, int gfp); + +static inline void *kmap_skb_frag(const skb_frag_t *frag) +{ +#ifdef CONFIG_HIGHMEM + if (in_irq()) + BUG(); + + local_bh_disable(); +#endif + return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); +} + +static inline void kunmap_skb_frag(void *vaddr) +{ + kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); +#ifdef CONFIG_HIGHMEM + local_bh_enable(); +#endif +} + +#define skb_queue_walk(queue, skb) \ + for (skb = (queue)->next; \ + (skb != (struct sk_buff *)(queue)); \ + skb=skb->next) + + +extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); +extern unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); +extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size); +extern int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size); +extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump); +extern int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov); +extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb); + +extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); +extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); +extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum); +extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); + +extern void skb_init(void); +extern void skb_add_mtu(int mtu); + +#ifdef CONFIG_NETFILTER +static inline void +nf_conntrack_put(struct nf_ct_info *nfct) +{ + if (nfct && atomic_dec_and_test(&nfct->master->use)) + nfct->master->destroy(nfct->master); +} +static inline void +nf_conntrack_get(struct nf_ct_info *nfct) +{ + if (nfct) + atomic_inc(&nfct->master->use); +} +#endif + +#endif /* __KERNEL__ */ +#endif /* _LINUX_SKBUFF_H */ diff --git a/xenolinux-2.4.16-sparse/net/core/skbuff.c b/xenolinux-2.4.16-sparse/net/core/skbuff.c new file mode 100644 index 0000000000..45332f1ffd --- /dev/null +++ b/xenolinux-2.4.16-sparse/net/core/skbuff.c @@ -0,0 +1,1373 @@ +/* + * Routines having to do with the 'struct sk_buff' memory handlers. + * + * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> + * Florian La Roche <rzsfl@rz.uni-sb.de> + * + * Version: $Id: skbuff.c,v 1.89 2001/08/06 13:25:02 davem Exp $ + * + * Fixes: + * Alan Cox : Fixed the worst of the load balancer bugs. + * Dave Platt : Interrupt stacking fix. + * Richard Kooijman : Timestamp fixes. + * Alan Cox : Changed buffer format. + * Alan Cox : destructor hook for AF_UNIX etc. + * Linus Torvalds : Better skb_clone. + * Alan Cox : Added skb_copy. + * Alan Cox : Added all the changed routines Linus + * only put in the headers + * Ray VanTassle : Fixed --skb->lock in free + * Alan Cox : skb_copy copy arp field + * Andi Kleen : slabified it. + * + * NOTE: + * The __skb_ routines should be called with interrupts + * disabled, or you better be *real* sure that the operation is atomic + * with respect to whatever list is being frobbed (e.g. via lock_sock() + * or via disabling bottom half handlers, etc). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * The functions in this file will not compile correctly with gcc 2.4.x + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/in.h> +#include <linux/inet.h> +#include <linux/slab.h> +#include <linux/netdevice.h> +#include <linux/string.h> +#include <linux/skbuff.h> +#include <linux/cache.h> +#include <linux/init.h> +#include <linux/highmem.h> +#include <linux/spinlock.h> + +#include <net/ip.h> +#include <net/protocol.h> +#include <net/dst.h> +#include <net/tcp.h> +#include <net/udp.h> +#include <net/sock.h> +#include <asm/io.h> +#include <asm/uaccess.h> +#include <asm/system.h> + +/* zc globals: */ +/* +char *net_page_chunk; +struct net_page_info *net_page_table; +struct list_head net_page_list; +spinlock_t net_page_list_lock = SPIN_LOCK_UNLOCKED; +unsigned int net_pages; +*/ + + +int sysctl_hot_list_len = 128; + +static kmem_cache_t *skbuff_head_cache; + +static union { + struct sk_buff_head list; + char pad[SMP_CACHE_BYTES]; +} skb_head_pool[NR_CPUS]; + +/* + * Keep out-of-line to prevent kernel bloat. + * __builtin_return_address is not used because it is not always + * reliable. + */ + +/** + * skb_over_panic - private function + * @skb: buffer + * @sz: size + * @here: address + * + * Out of line support code for skb_put(). Not user callable. + */ + +void skb_over_panic(struct sk_buff *skb, int sz, void *here) +{ + printk("skput:over: %p:%d put:%d dev:%s", + here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); + BUG(); +} + +/** + * skb_under_panic - private function + * @skb: buffer + * @sz: size + * @here: address + * + * Out of line support code for skb_push(). Not user callable. + */ + + +void skb_under_panic(struct sk_buff *skb, int sz, void *here) +{ + printk("skput:under: %p:%d put:%d dev:%s", + here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); + BUG(); +} + +static __inline__ struct sk_buff *skb_head_from_pool(void) +{ + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; + + if (skb_queue_len(list)) { + struct sk_buff *skb; + unsigned long flags; + + local_irq_save(flags); + skb = __skb_dequeue(list); + local_irq_restore(flags); + return skb; + } + return NULL; +} + +static __inline__ void skb_head_to_pool(struct sk_buff *skb) +{ + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; + + if (skb_queue_len(list) < sysctl_hot_list_len) { + unsigned long flags; + + local_irq_save(flags); + __skb_queue_head(list, skb); + local_irq_restore(flags); + + return; + } + kmem_cache_free(skbuff_head_cache, skb); +} + + +/* Allocate a new skbuff. We do this ourselves so we can fill in a few + * 'private' fields and also do memory statistics to find all the + * [BEEP] leaks. + * + */ + +/** + * alloc_skb - allocate a network buffer + * @size: size to allocate + * @gfp_mask: allocation mask + * + * Allocate a new &sk_buff. The returned buffer has no headroom and a + * tail room of size bytes. The object has a reference count of one. + * The return is the buffer. On a failure the return is %NULL. + * + * Buffers may only be allocated from interrupts using a @gfp_mask of + * %GFP_ATOMIC. + */ + +struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) +{ + struct sk_buff *skb; + u8 *data; + + if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { + static int count = 0; + if (++count < 5) { + printk(KERN_ERR "alloc_skb called nonatomically " + "from interrupt %p\n", NET_CALLER(size)); + BUG(); + } + gfp_mask &= ~__GFP_WAIT; + } + + /* Get the HEAD */ + skb = skb_head_from_pool(); + if (skb == NULL) { + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); + if (skb == NULL) + goto nohead; + } + + /* Get the DATA. Size must match skb_add_mtu(). */ + size = SKB_DATA_ALIGN(size); + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); + if (data == NULL) + goto nodata; + + /* XXX: does not include slab overhead */ + skb->truesize = size + sizeof(struct sk_buff); + + /* Load the data pointers. */ + skb->head = data; + skb->data = data; + skb->tail = data; + skb->end = data + size; + + /* Set up other state */ + skb->len = 0; + skb->cloned = 0; + skb->data_len = 0; + skb->skb_type = SKB_NORMAL; + + atomic_set(&skb->users, 1); + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + return skb; + +nodata: + skb_head_to_pool(skb); +nohead: + return NULL; +} + +/* begin zc code additions: */ +/* +void init_net_pages(unsigned long order_pages) +{ + int i; + struct net_page_info *np; + pgd_t *pgd; pmd_t *pmd; pte_t *ptep; + unsigned long nr_pages = 1 << order_pages; + + net_page_chunk = (char *)__get_free_pages(GFP_KERNEL, order_pages); + net_page_table = kmalloc(nr_pages * sizeof(struct net_page_info), GFP_KERNEL); + + INIT_LIST_HEAD(&net_page_list); + + for (i = 0; i < nr_pages; i++) + { + np = net_page_table + i; + np->virt_addr = (unsigned long)net_page_chunk + (i * PAGE_SIZE); + + // now fill the pte pointer: + //np->ppte = 0xdeadbeef; + //pgd = pgd_offset_k(np->virt_addr); + //if (pgd_none(*pgd) || pgd_bad(*pgd)) BUG(); + + //if (pmd_none(*pmd)) BUG(); + //if (pmd_bad(*pmd)) BUG(); + + //ptep = pte_offset(pmd, np->virt_addr); + //np->ppte = phys_to_machine(virt_to_phys(ptep)); + + list_add_tail(&np->list, &net_page_list); + } + net_pages = nr_pages; + + +} + +struct net_page_info *get_net_page(void) +{ + + struct list_head *list_ptr; + struct net_page_info *np; + unsigned long flags; + + if (!net_pages) + { + return NULL; + } + spin_lock_irqsave(&net_page_list_lock, flags); + + list_ptr = net_page_list.next; + np = list_entry(list_ptr, struct net_page_info, list); + list_del(&np->list); + net_pages--; + + spin_unlock_irqrestore(&net_page_list_lock, flags); + + return np; +} + +void free_net_page(struct net_page_info *np) +{ + unsigned long flags; + + if (np == NULL) return; + + spin_lock_irqsave(&net_page_list_lock, flags); + + list_add(&np->list, &net_page_list); + net_pages++; + + spin_unlock_irqrestore(&net_page_list_lock, flags); + +} +*/ +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask) +{ + struct sk_buff *skb; + u8 *data; + + if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { + static int count = 0; + if (++count < 5) { + printk(KERN_ERR "alloc_skb called nonatomically " + "from interrupt %p\n", NET_CALLER(size)); + BUG(); + } + gfp_mask &= ~__GFP_WAIT; + } + + /* Get the HEAD */ + skb = skb_head_from_pool(); + if (skb == NULL) { + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); + if (skb == NULL) + goto nohead; + } + + /* Get the DATA. Size must match skb_add_mtu(). */ + size = SKB_DATA_ALIGN(size); + if (size > PAGE_SIZE) + { + printk("alloc_zc_skb called with unruly size.\n"); + size = PAGE_SIZE; + } + /*skb->net_page = get_net_page(); + if (skb->net_page == NULL) + { + goto nodata; + } + data = (u8 *)skb->net_page->virt_addr;*/ + data = (char *)__get_free_page(gfp_mask); + if (data == NULL) + goto nodata; + /* XXX: does not include slab overhead */ + skb->truesize = size + sizeof(struct sk_buff); + + /* Load the data pointers. */ + skb->head = data; + skb->data = data; + skb->tail = data; + skb->end = data + size; + + /* Set up other state */ + skb->len = 0; + skb->cloned = 0; + skb->data_len = 0; + skb->skb_type = SKB_ZERO_COPY; + + atomic_set(&skb->users, 1); + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + return skb; + +nodata: + skb_head_to_pool(skb); +nohead: + return NULL; +} + +/* end zc code additions: */ + +/* + * Slab constructor for a skb head. + */ +static inline void skb_headerinit(void *p, kmem_cache_t *cache, + unsigned long flags) +{ + struct sk_buff *skb = p; + + skb->next = NULL; + skb->prev = NULL; + skb->list = NULL; + skb->sk = NULL; + skb->stamp.tv_sec=0; /* No idea about time */ + skb->dev = NULL; + skb->dst = NULL; + memset(skb->cb, 0, sizeof(skb->cb)); + skb->pkt_type = PACKET_HOST; /* Default type */ + skb->ip_summed = 0; + skb->priority = 0; + skb->security = 0; /* By default packets are insecure */ + skb->destructor = NULL; + +#ifdef CONFIG_NETFILTER + skb->nfmark = skb->nfcache = 0; + skb->nfct = NULL; +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug = 0; +#endif +#endif +#ifdef CONFIG_NET_SCHED + skb->tc_index = 0; +#endif +} + +static void skb_drop_fraglist(struct sk_buff *skb) +{ + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + skb_shinfo(skb)->frag_list = NULL; + + do { + struct sk_buff *this = list; + list = list->next; + kfree_skb(this); + } while (list); +} + +static void skb_clone_fraglist(struct sk_buff *skb) +{ + struct sk_buff *list; + + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) + skb_get(list); +} + +static void skb_release_data(struct sk_buff *skb) +{ + if (!skb->cloned || + atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { + if (skb_shinfo(skb)->nr_frags) { + int i; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) +{ + put_page(skb_shinfo(skb)->frags[i].page); +} + } + + if (skb_shinfo(skb)->frag_list) + skb_drop_fraglist(skb); + + if (skb->skb_type == SKB_NORMAL) + { + kfree(skb->head); + } else {// SKB_ZERO_COPY + //free_net_page(skb->net_page); +//printk(KERN_ALERT "<%p>\n", phys_to_machine(virt_to_phys(skb->head))); + free_page((void *)skb->head); + } + } + +} + +/* + * Free an skbuff by memory without cleaning the state. + */ +void kfree_skbmem(struct sk_buff *skb) +{ + skb_release_data(skb); + skb_head_to_pool(skb); +} + +/** + * __kfree_skb - private function + * @skb: buffer + * + * Free an sk_buff. Release anything attached to the buffer. + * Clean the state. This is an internal helper function. Users should + * always call kfree_skb + */ + +void __kfree_skb(struct sk_buff *skb) +{ + if (skb->list) { + printk(KERN_WARNING "Warning: kfree_skb passed an skb still " + "on a list (from %p).\n", NET_CALLER(skb)); + BUG(); + } + + dst_release(skb->dst); + if(skb->destructor) { + if (in_irq()) { + printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n", + NET_CALLER(skb)); + } + skb->destructor(skb); + } +#ifdef CONFIG_NETFILTER + nf_conntrack_put(skb->nfct); +#endif + skb_headerinit(skb, NULL, 0); /* clean state */ + kfree_skbmem(skb); +} + +/** + * skb_clone - duplicate an sk_buff + * @skb: buffer to clone + * @gfp_mask: allocation priority + * + * Duplicate an &sk_buff. The new one is not owned by a socket. Both + * copies share the same packet data but not structure. The new + * buffer has a reference count of 1. If the allocation fails the + * function returns %NULL otherwise the new buffer is returned. + * + * If this function is called from an interrupt gfp_mask() must be + * %GFP_ATOMIC. + */ + +struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) +{ + struct sk_buff *n; + + n = skb_head_from_pool(); + if (!n) { + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); + if (!n) + return NULL; + } + +#define C(x) n->x = skb->x + + n->next = n->prev = NULL; + n->list = NULL; + n->sk = NULL; + C(stamp); + C(dev); + C(h); + C(nh); + C(mac); + C(dst); + dst_clone(n->dst); + memcpy(n->cb, skb->cb, sizeof(skb->cb)); + C(len); + C(data_len); + C(csum); + n->cloned = 1; + C(pkt_type); + C(ip_summed); + C(priority); + atomic_set(&n->users, 1); + C(protocol); + C(security); + C(truesize); + C(head); + C(data); + C(tail); + C(end); + n->destructor = NULL; +#ifdef CONFIG_NETFILTER + C(nfmark); + C(nfcache); + C(nfct); +#ifdef CONFIG_NETFILTER_DEBUG + C(nf_debug); +#endif +#endif /*CONFIG_NETFILTER*/ +#if defined(CONFIG_HIPPI) + C(private); +#endif +#ifdef CONFIG_NET_SCHED + C(tc_index); +#endif + C(skb_type); + //C(net_page); + atomic_inc(&(skb_shinfo(skb)->dataref)); + skb->cloned = 1; +#ifdef CONFIG_NETFILTER + nf_conntrack_get(skb->nfct); +#endif + return n; +} + +static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) +{ + /* + * Shift between the two data areas in bytes + */ + unsigned long offset = new->data - old->data; + + new->list=NULL; + new->sk=NULL; + new->dev=old->dev; + new->priority=old->priority; + new->protocol=old->protocol; + new->dst=dst_clone(old->dst); + new->h.raw=old->h.raw+offset; + new->nh.raw=old->nh.raw+offset; + new->mac.raw=old->mac.raw+offset; + memcpy(new->cb, old->cb, sizeof(old->cb)); + atomic_set(&new->users, 1); + new->pkt_type=old->pkt_type; + new->stamp=old->stamp; + new->destructor = NULL; + new->security=old->security; +#ifdef CONFIG_NETFILTER + new->nfmark=old->nfmark; + new->nfcache=old->nfcache; + new->nfct=old->nfct; + nf_conntrack_get(new->nfct); +#ifdef CONFIG_NETFILTER_DEBUG + new->nf_debug=old->nf_debug; +#endif +#endif +#ifdef CONFIG_NET_SCHED + new->tc_index = old->tc_index; +#endif +} + +/** + * skb_copy - create private copy of an sk_buff + * @skb: buffer to copy + * @gfp_mask: allocation priority + * + * Make a copy of both an &sk_buff and its data. This is used when the + * caller wishes to modify the data and needs a private copy of the + * data to alter. Returns %NULL on failure or the pointer to the buffer + * on success. The returned buffer has a reference count of 1. + * + * As by-product this function converts non-linear &sk_buff to linear + * one, so that &sk_buff becomes completely private and caller is allowed + * to modify all the data of returned buffer. This means that this + * function is not recommended for use in circumstances when only + * header is going to be modified. Use pskb_copy() instead. + */ + +struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) +{ + struct sk_buff *n; + int headerlen = skb->data-skb->head; + + /* + * Allocate the copy buffer + */ + n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); + if(n==NULL) + return NULL; + + /* Set the data pointer */ + skb_reserve(n,headerlen); + /* Set the tail pointer and length */ + skb_put(n,skb->len); + n->csum = skb->csum; + n->ip_summed = skb->ip_summed; + + if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len)) + BUG(); + + copy_skb_header(n, skb); + + return n; +} + +/* Keep head the same: replace data */ +int skb_linearize(struct sk_buff *skb, int gfp_mask) +{ + unsigned int size; + u8 *data; + long offset; + int headerlen = skb->data - skb->head; + int expand = (skb->tail+skb->data_len) - skb->end; + + if (skb_shared(skb)) + BUG(); + + if (expand <= 0) + expand = 0; + + size = (skb->end - skb->head + expand); + size = SKB_DATA_ALIGN(size); + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); + if (data == NULL) + return -ENOMEM; + + /* Copy entire thing */ + if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len)) + BUG(); + + /* Offset between the two in bytes */ + offset = data - skb->head; + + /* Free old data. */ + skb_release_data(skb); + + skb->head = data; + skb->end = data + size; + + /* Set up new pointers */ + skb->h.raw += offset; + skb->nh.raw += offset; + skb->mac.raw += offset; + skb->tail += offset; + skb->data += offset; + + /* Set up shinfo */ + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->nr_frags = 0; + skb_shinfo(skb)->frag_list = NULL; + + /* We are no longer a clone, even if we were. */ + skb->cloned = 0; + + skb->tail += skb->data_len; + skb->data_len = 0; + return 0; +} + + +/** + * pskb_copy - create copy of an sk_buff with private head. + * @skb: buffer to copy + * @gfp_mask: allocation priority + * + * Make a copy of both an &sk_buff and part of its data, located + * in header. Fragmented data remain shared. This is used when + * the caller wishes to modify only header of &sk_buff and needs + * private copy of the header to alter. Returns %NULL on failure + * or the pointer to the buffer on success. + * The returned buffer has a reference count of 1. + */ + +struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) +{ + struct sk_buff *n; + + /* + * Allocate the copy buffer + */ + n=alloc_skb(skb->end - skb->head, gfp_mask); + if(n==NULL) + return NULL; + + /* Set the data pointer */ + skb_reserve(n,skb->data-skb->head); + /* Set the tail pointer and length */ + skb_put(n,skb_headlen(skb)); + /* Copy the bytes */ + memcpy(n->data, skb->data, n->len); + n->csum = skb->csum; + n->ip_summed = skb->ip_summed; + + n->data_len = skb->data_len; + n->len = skb->len; + + if (skb_shinfo(skb)->nr_frags) { + int i; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; + get_page(skb_shinfo(n)->frags[i].page); + } + skb_shinfo(n)->nr_frags = i; + } + + if (skb_shinfo(skb)->frag_list) { + skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; + skb_clone_fraglist(n); + } + + copy_skb_header(n, skb); + + return n; +} + +/** + * pskb_expand_head - reallocate header of &sk_buff + * @skb: buffer to reallocate + * @nhead: room to add at head + * @ntail: room to add at tail + * @gfp_mask: allocation priority + * + * Expands (or creates identical copy, if &nhead and &ntail are zero) + * header of skb. &sk_buff itself is not changed. &sk_buff MUST have + * reference count of 1. Returns zero in the case of success or error, + * if expansion failed. In the last case, &sk_buff is not changed. + * + * All the pointers pointing into skb header may change and must be + * reloaded after call to this function. + */ + +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) +{ + int i; + u8 *data; + int size = nhead + (skb->end - skb->head) + ntail; + long off; + + if (skb_shared(skb)) + BUG(); + + size = SKB_DATA_ALIGN(size); + + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); + if (data == NULL) + goto nodata; + + /* Copy only real data... and, alas, header. This should be + * optimized for the cases when header is void. */ + memcpy(data+nhead, skb->head, skb->tail-skb->head); + memcpy(data+size, skb->end, sizeof(struct skb_shared_info)); + + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) + get_page(skb_shinfo(skb)->frags[i].page); + + if (skb_shinfo(skb)->frag_list) + skb_clone_fraglist(skb); + + skb_release_data(skb); + + off = (data+nhead) - skb->head; + + skb->head = data; + skb->end = data+size; + + skb->data += off; + skb->tail += off; + skb->mac.raw += off; + skb->h.raw += off; + skb->nh.raw += off; + skb->cloned = 0; + atomic_set(&skb_shinfo(skb)->dataref, 1); + return 0; + +nodata: + return -ENOMEM; +} + +/* Make private copy of skb with writable head and some headroom */ + +struct sk_buff * +skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) +{ + struct sk_buff *skb2; + int delta = headroom - skb_headroom(skb); + + if (delta <= 0) + return pskb_copy(skb, GFP_ATOMIC); + + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2 == NULL || + !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) + return skb2; + + kfree_skb(skb2); + return NULL; +} + + +/** + * skb_copy_expand - copy and expand sk_buff + * @skb: buffer to copy + * @newheadroom: new free bytes at head + * @newtailroom: new free bytes at tail + * @gfp_mask: allocation priority + * + * Make a copy of both an &sk_buff and its data and while doing so + * allocate additional space. + * + * This is used when the caller wishes to modify the data and needs a + * private copy of the data to alter as well as more space for new fields. + * Returns %NULL on failure or the pointer to the buffer + * on success. The returned buffer has a reference count of 1. + * + * You must pass %GFP_ATOMIC as the allocation priority if this function + * is called from an interrupt. + */ + + +struct sk_buff *skb_copy_expand(const struct sk_buff *skb, + int newheadroom, + int newtailroom, + int gfp_mask) +{ + struct sk_buff *n; + + /* + * Allocate the copy buffer + */ + + n=alloc_skb(newheadroom + skb->len + newtailroom, + gfp_mask); + if(n==NULL) + return NULL; + + skb_reserve(n,newheadroom); + + /* Set the tail pointer and length */ + skb_put(n,skb->len); + + /* Copy the data only. */ + if (skb_copy_bits(skb, 0, n->data, skb->len)) + BUG(); + + copy_skb_header(n, skb); + return n; +} + +/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. + * If realloc==0 and trimming is impossible without change of data, + * it is BUG(). + */ + +int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) +{ + int offset = skb_headlen(skb); + int nfrags = skb_shinfo(skb)->nr_frags; + int i; + + for (i=0; i<nfrags; i++) { + int end = offset + skb_shinfo(skb)->frags[i].size; + if (end > len) { + if (skb_cloned(skb)) { + if (!realloc) + BUG(); + if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + return -ENOMEM; + } + if (len <= offset) { + put_page(skb_shinfo(skb)->frags[i].page); + skb_shinfo(skb)->nr_frags--; + } else { + skb_shinfo(skb)->frags[i].size = len-offset; + } + } + offset = end; + } + + if (offset < len) { + skb->data_len -= skb->len - len; + skb->len = len; + } else { + if (len <= skb_headlen(skb)) { + skb->len = len; + skb->data_len = 0; + skb->tail = skb->data + len; + if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) + skb_drop_fraglist(skb); + } else { + skb->data_len -= skb->len - len; + skb->len = len; + } + } + + return 0; +} + +/** + * __pskb_pull_tail - advance tail of skb header + * @skb: buffer to reallocate + * @delta: number of bytes to advance tail + * + * The function makes a sense only on a fragmented &sk_buff, + * it expands header moving its tail forward and copying necessary + * data from fragmented part. + * + * &sk_buff MUST have reference count of 1. + * + * Returns %NULL (and &sk_buff does not change) if pull failed + * or value of new tail of skb in the case of success. + * + * All the pointers pointing into skb header may change and must be + * reloaded after call to this function. + */ + +/* Moves tail of skb head forward, copying data from fragmented part, + * when it is necessary. + * 1. It may fail due to malloc failure. + * 2. It may change skb pointers. + * + * It is pretty complicated. Luckily, it is called only in exceptional cases. + */ +unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta) +{ + int i, k, eat; + + /* If skb has not enough free space at tail, get new one + * plus 128 bytes for future expansions. If we have enough + * room at tail, reallocate without expansion only if skb is cloned. + */ + eat = (skb->tail+delta) - skb->end; + + if (eat > 0 || skb_cloned(skb)) { + if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC)) + return NULL; + } + + if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) + BUG(); + + /* Optimization: no fragments, no reasons to preestimate + * size of pulled pages. Superb. + */ + if (skb_shinfo(skb)->frag_list == NULL) + goto pull_pages; + + /* Estimate size of pulled pages. */ + eat = delta; + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { + if (skb_shinfo(skb)->frags[i].size >= eat) + goto pull_pages; + eat -= skb_shinfo(skb)->frags[i].size; + } + + /* If we need update frag list, we are in troubles. + * Certainly, it possible to add an offset to skb data, + * but taking into account that pulling is expected to + * be very rare operation, it is worth to fight against + * further bloating skb head and crucify ourselves here instead. + * Pure masohism, indeed. 8)8) + */ + if (eat) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + struct sk_buff *clone = NULL; + struct sk_buff *insp = NULL; + + do { + if (list == NULL) + BUG(); + + if (list->len <= eat) { + /* Eaten as whole. */ + eat -= list->len; + list = list->next; + insp = list; + } else { + /* Eaten partially. */ + + if (skb_shared(list)) { + /* Sucks! We need to fork list. :-( */ + clone = skb_clone(list, GFP_ATOMIC); + if (clone == NULL) + return NULL; + insp = list->next; + list = clone; + } else { + /* This may be pulled without + * problems. */ + insp = list; + } + if (pskb_pull(list, eat) == NULL) { + if (clone) + kfree_skb(clone); + return NULL; + } + break; + } + } while (eat); + + /* Free pulled out fragments. */ + while ((list = skb_shinfo(skb)->frag_list) != insp) { + skb_shinfo(skb)->frag_list = list->next; + kfree_skb(list); + } + /* And insert new clone at head. */ + if (clone) { + clone->next = list; + skb_shinfo(skb)->frag_list = clone; + } + } + /* Success! Now we may commit changes to skb data. */ + +pull_pages: + eat = delta; + k = 0; + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { + if (skb_shinfo(skb)->frags[i].size <= eat) { + put_page(skb_shinfo(skb)->frags[i].page); + eat -= skb_shinfo(skb)->frags[i].size; + } else { + skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; + if (eat) { + skb_shinfo(skb)->frags[k].page_offset += eat; + skb_shinfo(skb)->frags[k].size -= eat; + eat = 0; + } + k++; + } + } + skb_shinfo(skb)->nr_frags = k; + + skb->tail += delta; + skb->data_len -= delta; + + return skb->tail; +} + +/* Copy some data bits from skb to kernel buffer. */ + +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) +{ + int i, copy; + int start = skb->len - skb->data_len; + + if (offset > (int)skb->len-len) + goto fault; + + /* Copy header. */ + if ((copy = start-offset) > 0) { + if (copy > len) + copy = len; + memcpy(to, skb->data + offset, copy); + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end-offset) > 0) { + u8 *vaddr; + + if (copy > len) + copy = len; + + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); + memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+ + offset-start, copy); + kunmap_skb_frag(vaddr); + + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list; + + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + list->len; + if ((copy = end-offset) > 0) { + if (copy > len) + copy = len; + if (skb_copy_bits(list, offset-start, to, copy)) + goto fault; + if ((len -= copy) == 0) + return 0; + offset += copy; + to += copy; + } + start = end; + } + } + if (len == 0) + return 0; + +fault: + return -EFAULT; +} + +/* Checksum skb data. */ + +unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum) +{ + int i, copy; + int start = skb->len - skb->data_len; + int pos = 0; + + /* Checksum header. */ + if ((copy = start-offset) > 0) { + if (copy > len) + copy = len; + csum = csum_partial(skb->data+offset, copy, csum); + if ((len -= copy) == 0) + return csum; + offset += copy; + pos = copy; + } + + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end-offset) > 0) { + unsigned int csum2; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (copy > len) + copy = len; + vaddr = kmap_skb_frag(frag); + csum2 = csum_partial(vaddr + frag->page_offset + + offset-start, copy, 0); + kunmap_skb_frag(vaddr); + csum = csum_block_add(csum, csum2, pos); + if (!(len -= copy)) + return csum; + offset += copy; + pos += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list; + + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + list->len; + if ((copy = end-offset) > 0) { + unsigned int csum2; + if (copy > len) + copy = len; + csum2 = skb_checksum(list, offset-start, copy, 0); + csum = csum_block_add(csum, csum2, pos); + if ((len -= copy) == 0) + return csum; + offset += copy; + pos += copy; + } + start = end; + } + } + if (len == 0) + return csum; + + BUG(); + return csum; +} + +/* Both of above in one bottle. */ + +unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum) +{ + int i, copy; + int start = skb->len - skb->data_len; + int pos = 0; + + /* Copy header. */ + if ((copy = start-offset) > 0) { + if (copy > len) + copy = len; + csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum); + if ((len -= copy) == 0) + return csum; + offset += copy; + to += copy; + pos = copy; + } + + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset+len); + + end = start + skb_shinfo(skb)->frags[i].size; + if ((copy = end-offset) > 0) { + unsigned int csum2; + u8 *vaddr; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + if (copy > len) + copy = len; + vaddr = kmap_skb_frag(frag); + csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset + + offset-start, to, copy, 0); + kunmap_skb_frag(vaddr); + csum = csum_block_add(csum, csum2, pos); + if (!(len -= copy)) + return csum; + offset += copy; + to += copy; + pos += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list; + + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { + unsigned int csum2; + int end; + + BUG_TRAP(start <= offset+len); + + end = start + list->len; + if ((copy = end-offset) > 0) { + if (copy > len) + copy = len; + csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0); + csum = csum_block_add(csum, csum2, pos); + if ((len -= copy) == 0) + return csum; + offset += copy; + to += copy; + pos += copy; + } + start = end; + } + } + if (len == 0) + return csum; + + BUG(); + return csum; +} + +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) +{ + unsigned int csum; + long csstart; + + if (skb->ip_summed == CHECKSUM_HW) + csstart = skb->h.raw - skb->data; + else + csstart = skb->len - skb->data_len; + + if (csstart > skb->len - skb->data_len) + BUG(); + + memcpy(to, skb->data, csstart); + + csum = 0; + if (csstart != skb->len) + csum = skb_copy_and_csum_bits(skb, csstart, to+csstart, + skb->len-csstart, 0); + + if (skb->ip_summed == CHECKSUM_HW) { + long csstuff = csstart + skb->csum; + + *((unsigned short *)(to + csstuff)) = csum_fold(csum); + } +} + +#if 0 +/* + * Tune the memory allocator for a new MTU size. + */ +void skb_add_mtu(int mtu) +{ + /* Must match allocation in alloc_skb */ + mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); + + kmem_add_cache_size(mtu); +} +#endif + +void __init skb_init(void) +{ + int i; + + skbuff_head_cache = kmem_cache_create("skbuff_head_cache", + sizeof(struct sk_buff), + 0, + SLAB_HWCACHE_ALIGN, + skb_headerinit, NULL); + if (!skbuff_head_cache) + panic("cannot create skbuff cache"); + + //init_net_pages(NUM_NET_PAGES); + + for (i=0; i<NR_CPUS; i++) + skb_queue_head_init(&skb_head_pool[i].list); +} |