diff options
author | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-05-12 15:54:24 +0000 |
---|---|---|
committer | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-05-12 15:54:24 +0000 |
commit | c9cbf814f77829b03cf273042c147b4ef754e939 (patch) | |
tree | ba86fe2ad548b575e5dd835aadbbd7ec09682599 | |
parent | 95e7814b6642863a49ad38d6a64b7eedde195b71 (diff) | |
download | xen-c9cbf814f77829b03cf273042c147b4ef754e939.tar.gz xen-c9cbf814f77829b03cf273042c147b4ef754e939.tar.bz2 xen-c9cbf814f77829b03cf273042c147b4ef754e939.zip |
bitkeeper revision 1.891.1.12 (40a248b0WTGoOa9206iWkyGN0mTPNw)
Allow forcing of IRQ trigger-type to edge or level
(NB. DANGEROUS!).
-rw-r--r-- | .rootkeys | 1 | ||||
-rw-r--r-- | xen/arch/i386/io_apic.c | 136 | ||||
-rw-r--r-- | xen/arch/i386/irq.c | 6 | ||||
-rw-r--r-- | xen/common/kernel.c | 44 | ||||
-rw-r--r-- | xen/common/physdev.c | 19 | ||||
-rw-r--r-- | xen/include/hypervisor-ifs/physdev.h | 62 | ||||
-rw-r--r-- | xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c | 6 | ||||
-rw-r--r-- | xenolinux-2.4.26-sparse/arch/xen/kernel/evtchn.c | 21 | ||||
-rw-r--r-- | xenolinux-2.4.26-sparse/include/linux/skbuff.h | 1174 |
9 files changed, 1411 insertions, 58 deletions
@@ -753,6 +753,7 @@ 3f056927gMHl7mWB89rb73JahbhQIA xenolinux-2.4.26-sparse/include/linux/blk.h 3e5a4e68WLX3B8owTvktP3HHOtznPQ xenolinux-2.4.26-sparse/include/linux/major.h 401c0590D_kwJDU59X8NyvqSv_Cl2A xenolinux-2.4.26-sparse/include/linux/sched.h +40a248afgI0_JKthdYAe8beVfXSTpQ xenolinux-2.4.26-sparse/include/linux/skbuff.h 3e5a4e686V0nioX2ZpFf056sgvdiQw xenolinux-2.4.26-sparse/include/linux/sunrpc/debug.h 401c0592pLrp_aCbQRo9GXiYQQaVVA xenolinux-2.4.26-sparse/include/linux/timer.h 3e5a4e68W_hpMlM3u_-QOKMp3gzcwQ xenolinux-2.4.26-sparse/init/do_mounts.c diff --git a/xen/arch/i386/io_apic.c b/xen/arch/i386/io_apic.c index 3f0c81be7a..7c307922b3 100644 --- a/xen/arch/i386/io_apic.c +++ b/xen/arch/i386/io_apic.c @@ -208,7 +208,11 @@ static void set_ioapic_affinity (unsigned int irq, unsigned long mask) spin_unlock_irqrestore(&ioapic_lock, flags); } -#if CONFIG_SMP +/* + * In new I/O model, the interrupt is pinned to the CPU of the first + * device-driver domain that attaches. Dynamic balancing is pointless. + */ +#if defined(CONFIG_SMP) && !defined(NO_DEVICES_IN_XEN) typedef struct { unsigned int cpu; @@ -220,8 +224,6 @@ static irq_balance_t irq_balance[NR_IRQS] __cacheline_aligned extern unsigned long irq_affinity [NR_IRQS]; -#endif - #define IDLE_ENOUGH(cpu,now) \ (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1)) @@ -256,7 +258,6 @@ inside: static inline void balance_irq(int irq) { -#if CONFIG_SMP irq_balance_t *entry = irq_balance + irq; unsigned long now = jiffies; @@ -272,9 +273,14 @@ static inline void balance_irq(int irq) entry->cpu = move(entry->cpu, allowed_mask, now, random_number); set_ioapic_affinity(irq, apicid_to_phys_cpu_present(entry->cpu)); } -#endif } +#else + +#define balance_irq(_irq) ((void)0) + +#endif + /* * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to * specific CPU-side IRQs. @@ -883,6 +889,7 @@ void __init UNEXPECTED_IO_APIC(void) void __init print_IO_APIC(void) { +#ifndef NDEBUG int apic, i; struct IO_APIC_reg_00 reg_00; struct IO_APIC_reg_01 reg_01; @@ -1019,10 +1026,12 @@ void __init print_IO_APIC(void) } printk(KERN_INFO ".................................... done.\n"); - - return; +#endif } + +#if 0 /* Maybe useful for debugging, but not currently used anywhere. */ + static void print_APIC_bitfield (int base) { unsigned int v; @@ -1041,6 +1050,7 @@ static void print_APIC_bitfield (int base) } } + void /*__init*/ print_local_APIC(void * dummy) { unsigned int v, ver, maxlvt; @@ -1156,6 +1166,9 @@ void /*__init*/ print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } +#endif /* 0 */ + + static void __init enable_IO_APIC(void) { struct IO_APIC_reg_01 reg_01; @@ -1874,7 +1887,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); if (edge_level) { - irq_desc[irq].handler = &ioapic_level_irq_type; + irq_desc[irq].handler = &ioapic_level_irq_type; } else { irq_desc[irq].handler = &ioapic_edge_irq_type; } @@ -1893,3 +1906,110 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a } #endif /*CONFIG_ACPI_BOOT*/ + +extern char opt_leveltrigger[], opt_edgetrigger[]; + +static int __init ioapic_trigger_setup(void) +{ + char *p; + irq_desc_t *desc; + long irq; + + p = opt_leveltrigger; + while ( *p != '\0' ) + { + irq = simple_strtol(p, &p, 10); + if ( (irq <= 0) || (irq >= NR_IRQS) ) + { + printk("IRQ '%ld' out of range in level-trigger list '%s'\n", + irq, opt_leveltrigger); + break; + } + + printk("Forcing IRQ %ld to level-trigger: ", irq); + + desc = &irq_desc[irq]; + spin_lock_irq(&desc->lock); + + if ( desc->handler == &ioapic_level_irq_type ) + { + printk("already level-triggered (no force applied).\n"); + } + else if ( desc->handler != &ioapic_edge_irq_type ) + { + printk("cannot force (can only force IO-APIC-edge IRQs).\n"); + } + else + { + desc->handler = &ioapic_level_irq_type; + __mask_IO_APIC_irq(irq); + __level_IO_APIC_irq(irq); + printk("done.\n"); + } + + spin_unlock_irq(&desc->lock); + + if ( *p == '\0' ) + break; + + if ( *p != ',' ) + { + printk("Unexpected character '%c' in level-trigger list '%s'\n", + *p, opt_leveltrigger); + break; + } + + p++; + } + + p = opt_edgetrigger; + while ( *p != '\0' ) + { + irq = simple_strtol(p, &p, 10); + if ( (irq <= 0) || (irq >= NR_IRQS) ) + { + printk("IRQ '%ld' out of range in edge-trigger list '%s'\n", + irq, opt_edgetrigger); + break; + } + + printk("Forcing IRQ %ld to edge-trigger: ", irq); + + desc = &irq_desc[irq]; + spin_lock_irq(&desc->lock); + + if ( desc->handler == &ioapic_edge_irq_type ) + { + printk("already edge-triggered (no force applied).\n"); + } + else if ( desc->handler != &ioapic_level_irq_type ) + { + printk("cannot force (can only force IO-APIC-level IRQs).\n"); + } + else + { + desc->handler = &ioapic_edge_irq_type; + __edge_IO_APIC_irq(irq); + desc->status |= IRQ_PENDING; /* may have lost a masked edge */ + printk("done.\n"); + } + + spin_unlock_irq(&desc->lock); + + if ( *p == '\0' ) + break; + + if ( *p != ',' ) + { + printk("Unexpected character '%c' in edge-trigger list '%s'\n", + *p, opt_edgetrigger); + break; + } + + p++; + } + + return 0; +} + +__initcall(ioapic_trigger_setup); diff --git a/xen/arch/i386/irq.c b/xen/arch/i386/irq.c index d3eaf6af12..5b16bb0e63 100644 --- a/xen/arch/i386/irq.c +++ b/xen/arch/i386/irq.c @@ -39,6 +39,7 @@ #include <xen/delay.h> #include <xen/timex.h> #include <xen/perfc.h> +#include <asm/smpboot.h> /* * Linux has a controller-independent x86 interrupt architecture. @@ -1034,6 +1035,11 @@ int pirq_guest_bind(struct task_struct *p, int irq, int will_share) desc->status |= IRQ_GUEST; desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); desc->handler->startup(irq); + + /* Attempt to bind the interrupt target to the correct CPU. */ + if ( desc->handler->set_affinity != NULL ) + desc->handler->set_affinity( + irq, apicid_to_phys_cpu_present(p->processor)); } else if ( !will_share || !action->shareable ) { diff --git a/xen/common/kernel.c b/xen/common/kernel.c index 6cd567d601..dd6fcefa58 100644 --- a/xen/common/kernel.c +++ b/xen/common/kernel.c @@ -74,31 +74,37 @@ unsigned char opt_pdb[10] = "none"; unsigned int opt_tbuf_size = 1; /* opt_sched: scheduler - default to Borrowed Virtual Time */ char opt_sched[10] = "bvt"; -/* opt_physdev_dom0_hide: list of PCI slots to hide from dom0 - * Should have the format '(%02x:%02x.%1x)(%02x:%02x.%1x)...etc' */ -char opt_physdev_dom0_hide[20] = ""; +/* opt_physdev_dom0_hide: list of PCI slots to hide from domain 0. */ +/* Format is '(%02x:%02x.%1x)(%02x:%02x.%1x)' and so on. */ +char opt_physdev_dom0_hide[200] = ""; +/* opt_leveltrigger, opt_edgetrigger: Force an IO-APIC-routed IRQ to be */ +/* level- or edge-triggered. */ +/* Example: 'leveltrigger=4,5,6,20 edgetrigger=21'. */ +char opt_leveltrigger[30] = "", opt_edgetrigger[30] = ""; static struct { unsigned char *name; enum { OPT_IP, OPT_STR, OPT_UINT, OPT_BOOL } type; void *var; } opts[] = { - { "console", OPT_STR, &opt_console }, - { "ser_baud", OPT_UINT, &opt_ser_baud }, - { "com1", OPT_STR, &opt_com1 }, - { "com2", OPT_STR, &opt_com2 }, - { "dom0_mem", OPT_UINT, &opt_dom0_mem }, - { "ifname", OPT_STR, &opt_ifname }, - { "noht", OPT_BOOL, &opt_noht }, - { "noacpi", OPT_BOOL, &opt_noacpi }, - { "nosmp", OPT_BOOL, &opt_nosmp }, - { "noreboot", OPT_BOOL, &opt_noreboot }, - { "ignorebiostables", OPT_BOOL, &opt_ignorebiostables }, - { "watchdog", OPT_BOOL, &opt_watchdog }, - { "pdb", OPT_STR, &opt_pdb }, - { "tbuf_size", OPT_UINT, &opt_tbuf_size }, - { "sched", OPT_STR, &opt_sched }, - { "physdev_dom0_hide",OPT_STR, &opt_physdev_dom0_hide }, + { "console", OPT_STR, &opt_console }, + { "ser_baud", OPT_UINT, &opt_ser_baud }, + { "com1", OPT_STR, &opt_com1 }, + { "com2", OPT_STR, &opt_com2 }, + { "dom0_mem", OPT_UINT, &opt_dom0_mem }, + { "ifname", OPT_STR, &opt_ifname }, + { "noht", OPT_BOOL, &opt_noht }, + { "noacpi", OPT_BOOL, &opt_noacpi }, + { "nosmp", OPT_BOOL, &opt_nosmp }, + { "noreboot", OPT_BOOL, &opt_noreboot }, + { "ignorebiostables", OPT_BOOL, &opt_ignorebiostables }, + { "watchdog", OPT_BOOL, &opt_watchdog }, + { "pdb", OPT_STR, &opt_pdb }, + { "tbuf_size", OPT_UINT, &opt_tbuf_size }, + { "sched", OPT_STR, &opt_sched }, + { "physdev_dom0_hide", OPT_STR, &opt_physdev_dom0_hide }, + { "leveltrigger", OPT_STR, &opt_leveltrigger }, + { "edgetrigger", OPT_STR, &opt_edgetrigger }, { NULL, 0, NULL } }; diff --git a/xen/common/physdev.c b/xen/common/physdev.c index d15183cb6e..61b7b22cb2 100644 --- a/xen/common/physdev.c +++ b/xen/common/physdev.c @@ -634,9 +634,10 @@ static long pci_probe_root_buses(u32 *busmask) */ long do_physdev_op(physdev_op_t *uop) { - phys_dev_t *pdev; + phys_dev_t *pdev; physdev_op_t op; - long ret; + long ret; + int irq; if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) return -EFAULT; @@ -674,10 +675,22 @@ long do_physdev_op(physdev_op_t *uop) ret = pci_probe_root_buses(op.u.pci_probe_root_buses.busmask); break; - case PHYSDEVOP_UNMASK_IRQ: + case PHYSDEVOP_IRQ_UNMASK_NOTIFY: ret = pirq_guest_unmask(current); break; + case PHYSDEVOP_IRQ_STATUS_QUERY: + irq = op.u.irq_status_query.irq; + ret = -EINVAL; + if ( (irq < 0) || (irq >= NR_IRQS) ) + break; + op.u.irq_status_query.flags = 0; + /* Edge-triggered interrupts don't need an explicit unmask downcall. */ + if ( strstr(irq_desc[irq].handler->typename, "edge") == NULL ) + op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY; + ret = 0; + break; + default: ret = -EINVAL; break; diff --git a/xen/include/hypervisor-ifs/physdev.h b/xen/include/hypervisor-ifs/physdev.h index 914a555981..50372bf2be 100644 --- a/xen/include/hypervisor-ifs/physdev.h +++ b/xen/include/hypervisor-ifs/physdev.h @@ -14,44 +14,55 @@ #define PHYSDEVOP_PCI_CFGREG_WRITE 1 #define PHYSDEVOP_PCI_INITIALISE_DEVICE 2 #define PHYSDEVOP_PCI_PROBE_ROOT_BUSES 3 -#define PHYSDEVOP_UNMASK_IRQ 4 +#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 +#define PHYSDEVOP_IRQ_STATUS_QUERY 5 /* Read from PCI configuration space. */ -typedef struct physdevop_pci_cfgreg_read_st -{ - int bus; /* IN */ - int dev; /* IN */ - int func; /* IN */ - int reg; /* IN */ - int len; /* IN */ - u32 value; /* OUT */ +typedef struct { + /* IN */ + int bus; + int dev; + int func; + int reg; + int len; + /* OUT */ + u32 value; } physdevop_pci_cfgreg_read_t; /* Write to PCI configuration space. */ -typedef struct physdevop_pci_cfgreg_write_st -{ - int bus; /* IN */ - int dev; /* IN */ - int func; /* IN */ - int reg; /* IN */ - int len; /* IN */ - u32 value; /* IN */ +typedef struct { + /* IN */ + int bus; + int dev; + int func; + int reg; + int len; + u32 value; } physdevop_pci_cfgreg_write_t; /* Do final initialisation of a PCI device (e.g., last-moment IRQ routing). */ -typedef struct physdevop_pci_initialise_device_st -{ - int bus; /* IN */ - int dev; /* IN */ - int func; /* IN */ +typedef struct { + /* IN */ + int bus; + int dev; + int func; } physdevop_pci_initialise_device_t; /* Find the root buses for subsequent scanning. */ -typedef struct physdevop_pci_probe_root_buses_st -{ - u32 busmask[256/32]; /* OUT */ +typedef struct { + /* OUT */ + u32 busmask[256/32]; } physdevop_pci_probe_root_buses_t; +typedef struct { + /* IN */ + int irq; + /* OUT */ +/* Need to call PHYSDEVOP_IRQ_UNMASK_NOTIFY when the IRQ has been serviced? */ +#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY (1<<0) + unsigned long flags; +} physdevop_irq_status_query_t; + typedef struct _physdev_op_st { unsigned long cmd; @@ -61,6 +72,7 @@ typedef struct _physdev_op_st physdevop_pci_cfgreg_write_t pci_cfgreg_write; physdevop_pci_initialise_device_t pci_initialise_device; physdevop_pci_probe_root_buses_t pci_probe_root_buses; + physdevop_irq_status_query_t irq_status_query; } u; } physdev_op_t; diff --git a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c index 62a4adf27d..8178608959 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c @@ -116,12 +116,14 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) /* * We do not copy the packet unless: - * 1. It is fragmented; or + * 1. The data is shared; or * 2. It spans a page boundary; or * 3. We cannot be sure the whole data page is allocated. * The copying method is taken from skb_copy(). + * NB. We also couldn't cope with fragmented packets, but we won't get + * any because we not advertise the NETIF_F_SG feature. */ - if ( (skb_shinfo(skb)->nr_frags != 0) || + if ( skb_shared(skb) || skb_cloned(skb) || (((unsigned long)skb->end ^ (unsigned long)skb->head) & PAGE_MASK) || ((skb->end - skb->head) < (PAGE_SIZE/2)) ) { diff --git a/xenolinux-2.4.26-sparse/arch/xen/kernel/evtchn.c b/xenolinux-2.4.26-sparse/arch/xen/kernel/evtchn.c index 1d70d00bb5..fc6f22fc34 100644 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/evtchn.c +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/evtchn.c @@ -36,6 +36,9 @@ static int virq_to_irq[NR_VIRQS]; /* Reference counts for bindings to IRQs. */ static int irq_bindcount[NR_IRQS]; +/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */ +static unsigned long pirq_needs_unmask_notify[NR_PIRQS/sizeof(unsigned long)]; + /* Upcall to generic IRQ layer. */ extern asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs); @@ -234,8 +237,22 @@ static struct hw_interrupt_type dynirq_type = { static inline void pirq_unmask_notify(int pirq) { physdev_op_t op; - op.cmd = PHYSDEVOP_UNMASK_IRQ; + if ( unlikely(test_bit(pirq, &pirq_needs_unmask_notify[0])) ) + { + op.cmd = PHYSDEVOP_IRQ_UNMASK_NOTIFY; + (void)HYPERVISOR_physdev_op(&op); + } +} + +static inline void pirq_query_unmask(int pirq) +{ + physdev_op_t op; + op.cmd = PHYSDEVOP_IRQ_STATUS_QUERY; + op.u.irq_status_query.irq = pirq; (void)HYPERVISOR_physdev_op(&op); + clear_bit(pirq, &pirq_needs_unmask_notify[0]); + if ( op.u.irq_status_query.flags & PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY ) + set_bit(pirq, &pirq_needs_unmask_notify[0]); } /* @@ -261,6 +278,8 @@ static unsigned int startup_pirq(unsigned int irq) } evtchn = op.u.bind_pirq.port; + pirq_query_unmask(irq_to_pirq(irq)); + evtchn_to_irq[evtchn] = irq; irq_to_evtchn[irq] = evtchn; diff --git a/xenolinux-2.4.26-sparse/include/linux/skbuff.h b/xenolinux-2.4.26-sparse/include/linux/skbuff.h new file mode 100644 index 0000000000..8c2b49b03e --- /dev/null +++ b/xenolinux-2.4.26-sparse/include/linux/skbuff.h @@ -0,0 +1,1174 @@ +/* + * Definitions for the 'struct sk_buff' memory handlers. + * + * Authors: + * Alan Cox, <gw4pts@gw4pts.ampr.org> + * Florian La Roche, <rzsfl@rz.uni-sb.de> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_SKBUFF_H +#define _LINUX_SKBUFF_H + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <linux/cache.h> + +#include <asm/atomic.h> +#include <asm/types.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/highmem.h> + +#define HAVE_ALLOC_SKB /* For the drivers to know */ +#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ +#define SLAB_SKB /* Slabified skbuffs */ + +#define CHECKSUM_NONE 0 +#define CHECKSUM_HW 1 +#define CHECKSUM_UNNECESSARY 2 + +#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1)) +#define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1)) +#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X),0)) +#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0,2)) + +/* A. Checksumming of received packets by device. + * + * NONE: device failed to checksum this packet. + * skb->csum is undefined. + * + * UNNECESSARY: device parsed packet and wouldbe verified checksum. + * skb->csum is undefined. + * It is bad option, but, unfortunately, many of vendors do this. + * Apparently with secret goal to sell you new device, when you + * will add new protocol to your host. F.e. IPv6. 8) + * + * HW: the most generic way. Device supplied checksum of _all_ + * the packet as seen by netif_rx in skb->csum. + * NOTE: Even if device supports only some protocols, but + * is able to produce some skb->csum, it MUST use HW, + * not UNNECESSARY. + * + * B. Checksumming on output. + * + * NONE: skb is checksummed by protocol or csum is not required. + * + * HW: device is required to csum packet as seen by hard_start_xmit + * from skb->h.raw to the end and to record the checksum + * at skb->h.raw+skb->csum. + * + * Device must show its capabilities in dev->features, set + * at device setup time. + * NETIF_F_HW_CSUM - it is clever device, it is able to checksum + * everything. + * NETIF_F_NO_CSUM - loopback or reliable single hop media. + * NETIF_F_IP_CSUM - device is dumb. It is able to csum only + * TCP/UDP over IPv4. Sigh. Vendors like this + * way by an unknown reason. Though, see comment above + * about CHECKSUM_UNNECESSARY. 8) + * + * Any questions? No questions, good. --ANK + */ + +#ifdef __i386__ +#define NET_CALLER(arg) (*(((void**)&arg)-1)) +#else +#define NET_CALLER(arg) __builtin_return_address(0) +#endif + +#ifdef CONFIG_NETFILTER +struct nf_conntrack { + atomic_t use; + void (*destroy)(struct nf_conntrack *); +}; + +struct nf_ct_info { + struct nf_conntrack *master; +}; +#endif + +struct sk_buff_head { + /* These two members must be first. */ + struct sk_buff * next; + struct sk_buff * prev; + + __u32 qlen; + spinlock_t lock; +}; + +struct sk_buff; + +#define MAX_SKB_FRAGS 6 + +typedef struct skb_frag_struct skb_frag_t; + +struct skb_frag_struct +{ + struct page *page; + __u16 page_offset; + __u16 size; +}; + +/* This data is invariant across clones and lives at + * the end of the header data, ie. at skb->end. + */ +struct skb_shared_info { + atomic_t dataref; + unsigned int nr_frags; + struct sk_buff *frag_list; + skb_frag_t frags[MAX_SKB_FRAGS]; +}; + +struct sk_buff { + /* These two members must be first. */ + struct sk_buff * next; /* Next buffer in list */ + struct sk_buff * prev; /* Previous buffer in list */ + + struct sk_buff_head * list; /* List we are on */ + struct sock *sk; /* Socket we are owned by */ + struct timeval stamp; /* Time we arrived */ + struct net_device *dev; /* Device we arrived on/are leaving by */ + struct net_device *real_dev; /* For support of point to point protocols + (e.g. 802.3ad) over bonding, we must save the + physical device that got the packet before + replacing skb->dev with the virtual device. */ + + /* Transport layer header */ + union + { + struct tcphdr *th; + struct udphdr *uh; + struct icmphdr *icmph; + struct igmphdr *igmph; + struct iphdr *ipiph; + struct spxhdr *spxh; + unsigned char *raw; + } h; + + /* Network layer header */ + union + { + struct iphdr *iph; + struct ipv6hdr *ipv6h; + struct arphdr *arph; + struct ipxhdr *ipxh; + unsigned char *raw; + } nh; + + /* Link layer header */ + union + { + struct ethhdr *ethernet; + unsigned char *raw; + } mac; + + struct dst_entry *dst; + + /* + * This is the control buffer. It is free to use for every + * layer. Please put your private variables there. If you + * want to keep them across layers you have to do a skb_clone() + * first. This is owned by whoever has the skb queued ATM. + */ + char cb[48]; + + unsigned int len; /* Length of actual data */ + unsigned int data_len; + unsigned int csum; /* Checksum */ + unsigned char __unused, /* Dead field, may be reused */ + cloned, /* head may be cloned (check refcnt to be sure). */ + pkt_type, /* Packet class */ + ip_summed; /* Driver fed us an IP checksum */ + __u32 priority; /* Packet queueing priority */ + atomic_t users; /* User count - see datagram.c,tcp.c */ + unsigned short protocol; /* Packet protocol from driver. */ + unsigned short security; /* Security level of packet */ + unsigned int truesize; /* Buffer size */ + + unsigned char *head; /* Head of buffer */ + unsigned char *data; /* Data head pointer */ + unsigned char *tail; /* Tail pointer */ + unsigned char *end; /* End pointer */ + + void (*destructor)(struct sk_buff *); /* Destruct function */ +#ifdef CONFIG_NETFILTER + /* Can be used for communication between hooks. */ + unsigned long nfmark; + /* Cache info */ + __u32 nfcache; + /* Associated connection, if any */ + struct nf_ct_info *nfct; +#ifdef CONFIG_NETFILTER_DEBUG + unsigned int nf_debug; +#endif +#endif /*CONFIG_NETFILTER*/ + +#if defined(CONFIG_HIPPI) + union{ + __u32 ifield; + } private; +#endif + +#ifdef CONFIG_NET_SCHED + __u32 tc_index; /* traffic control index */ +#endif +}; + +#ifdef __KERNEL__ +/* + * Handling routines are only of interest to the kernel + */ +#include <linux/slab.h> + +#include <asm/system.h> + +extern void __kfree_skb(struct sk_buff *skb); +extern struct sk_buff * alloc_skb(unsigned int size, int priority); +extern void kfree_skbmem(struct sk_buff *skb); +extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); +extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); +extern struct sk_buff * pskb_copy(struct sk_buff *skb, int gfp_mask); +extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask); +extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); +extern struct sk_buff * skb_copy_expand(const struct sk_buff *skb, + int newheadroom, + int newtailroom, + int priority); +extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); +#define dev_kfree_skb(a) kfree_skb(a) +extern void skb_over_panic(struct sk_buff *skb, int len, void *here); +extern void skb_under_panic(struct sk_buff *skb, int len, void *here); + +/* Internal */ +#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) + +/** + * skb_queue_empty - check if a queue is empty + * @list: queue head + * + * Returns true if the queue is empty, false otherwise. + */ + +static inline int skb_queue_empty(struct sk_buff_head *list) +{ + return (list->next == (struct sk_buff *) list); +} + +/** + * skb_get - reference buffer + * @skb: buffer to reference + * + * Makes another reference to a socket buffer and returns a pointer + * to the buffer. + */ + +static inline struct sk_buff *skb_get(struct sk_buff *skb) +{ + atomic_inc(&skb->users); + return skb; +} + +/* + * If users==1, we are the only owner and are can avoid redundant + * atomic change. + */ + +/** + * kfree_skb - free an sk_buff + * @skb: buffer to free + * + * Drop a reference to the buffer and free it if the usage count has + * hit zero. + */ + +static inline void kfree_skb(struct sk_buff *skb) +{ + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) + __kfree_skb(skb); +} + +/* Use this if you didn't touch the skb state [for fast switching] */ +static inline void kfree_skb_fast(struct sk_buff *skb) +{ + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) + kfree_skbmem(skb); +} + +/** + * skb_cloned - is the buffer a clone + * @skb: buffer to check + * + * Returns true if the buffer was generated with skb_clone() and is + * one of multiple shared copies of the buffer. Cloned buffers are + * shared data so must not be written to under normal circumstances. + */ + +static inline int skb_cloned(struct sk_buff *skb) +{ + return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1; +} + +/** + * skb_shared - is the buffer shared + * @skb: buffer to check + * + * Returns true if more than one person has a reference to this + * buffer. + */ + +static inline int skb_shared(struct sk_buff *skb) +{ + return (atomic_read(&skb->users) != 1); +} + +/** + * skb_share_check - check if buffer is shared and if so clone it + * @skb: buffer to check + * @pri: priority for memory allocation + * + * If the buffer is shared the buffer is cloned and the old copy + * drops a reference. A new clone with a single reference is returned. + * If the buffer is not shared the original buffer is returned. When + * being called from interrupt status or with spinlocks held pri must + * be GFP_ATOMIC. + * + * NULL is returned on a memory allocation failure. + */ + +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) +{ + if (skb_shared(skb)) { + struct sk_buff *nskb; + nskb = skb_clone(skb, pri); + kfree_skb(skb); + return nskb; + } + return skb; +} + + +/* + * Copy shared buffers into a new sk_buff. We effectively do COW on + * packets to handle cases where we have a local reader and forward + * and a couple of other messy ones. The normal one is tcpdumping + * a packet thats being forwarded. + */ + +/** + * skb_unshare - make a copy of a shared buffer + * @skb: buffer to check + * @pri: priority for memory allocation + * + * If the socket buffer is a clone then this function creates a new + * copy of the data, drops a reference count on the old copy and returns + * the new copy with the reference count at 1. If the buffer is not a clone + * the original buffer is returned. When called with a spinlock held or + * from interrupt state @pri must be %GFP_ATOMIC + * + * %NULL is returned on a memory allocation failure. + */ + +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) +{ + struct sk_buff *nskb; + if(!skb_cloned(skb)) + return skb; + nskb=skb_copy(skb, pri); + kfree_skb(skb); /* Free our shared copy */ + return nskb; +} + +/** + * skb_peek + * @list_: list to peek at + * + * Peek an &sk_buff. Unlike most other operations you _MUST_ + * be careful with this one. A peek leaves the buffer on the + * list and someone else may run off with it. You must hold + * the appropriate locks or have a private queue to do this. + * + * Returns %NULL for an empty list or a pointer to the head element. + * The reference count is not incremented and the reference is therefore + * volatile. Use with caution. + */ + +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) +{ + struct sk_buff *list = ((struct sk_buff *)list_)->next; + if (list == (struct sk_buff *)list_) + list = NULL; + return list; +} + +/** + * skb_peek_tail + * @list_: list to peek at + * + * Peek an &sk_buff. Unlike most other operations you _MUST_ + * be careful with this one. A peek leaves the buffer on the + * list and someone else may run off with it. You must hold + * the appropriate locks or have a private queue to do this. + * + * Returns %NULL for an empty list or a pointer to the tail element. + * The reference count is not incremented and the reference is therefore + * volatile. Use with caution. + */ + +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) +{ + struct sk_buff *list = ((struct sk_buff *)list_)->prev; + if (list == (struct sk_buff *)list_) + list = NULL; + return list; +} + +/** + * skb_queue_len - get queue length + * @list_: list to measure + * + * Return the length of an &sk_buff queue. + */ + +static inline __u32 skb_queue_len(struct sk_buff_head *list_) +{ + return(list_->qlen); +} + +static inline void skb_queue_head_init(struct sk_buff_head *list) +{ + spin_lock_init(&list->lock); + list->prev = (struct sk_buff *)list; + list->next = (struct sk_buff *)list; + list->qlen = 0; +} + +/* + * Insert an sk_buff at the start of a list. + * + * The "__skb_xxxx()" functions are the non-atomic ones that + * can only be called with interrupts disabled. + */ + +/** + * __skb_queue_head - queue a buffer at the list head + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the start of a list. This function takes no locks + * and you must therefore hold required locks before calling it. + * + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) +{ + struct sk_buff *prev, *next; + + newsk->list = list; + list->qlen++; + prev = (struct sk_buff *)list; + next = prev->next; + newsk->next = next; + newsk->prev = prev; + next->prev = newsk; + prev->next = newsk; +} + + +/** + * skb_queue_head - queue a buffer at the list head + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the start of the list. This function takes the + * list lock and can be used safely with other locking &sk_buff functions + * safely. + * + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + __skb_queue_head(list, newsk); + spin_unlock_irqrestore(&list->lock, flags); +} + +/** + * __skb_queue_tail - queue a buffer at the list tail + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the end of a list. This function takes no locks + * and you must therefore hold required locks before calling it. + * + * A buffer cannot be placed on two lists at the same time. + */ + + +static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) +{ + struct sk_buff *prev, *next; + + newsk->list = list; + list->qlen++; + next = (struct sk_buff *)list; + prev = next->prev; + newsk->next = next; + newsk->prev = prev; + next->prev = newsk; + prev->next = newsk; +} + +/** + * skb_queue_tail - queue a buffer at the list tail + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the tail of the list. This function takes the + * list lock and can be used safely with other locking &sk_buff functions + * safely. + * + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + __skb_queue_tail(list, newsk); + spin_unlock_irqrestore(&list->lock, flags); +} + +/** + * __skb_dequeue - remove from the head of the queue + * @list: list to dequeue from + * + * Remove the head of the list. This function does not take any locks + * so must be used with appropriate locks held only. The head item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) +{ + struct sk_buff *next, *prev, *result; + + prev = (struct sk_buff *) list; + next = prev->next; + result = NULL; + if (next != prev) { + result = next; + next = next->next; + list->qlen--; + next->prev = prev; + prev->next = next; + result->next = NULL; + result->prev = NULL; + result->list = NULL; + } + return result; +} + +/** + * skb_dequeue - remove from the head of the queue + * @list: list to dequeue from + * + * Remove the head of the list. The list lock is taken so the function + * may be used safely with other locking list functions. The head item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list) +{ + unsigned long flags; + struct sk_buff *result; + + spin_lock_irqsave(&list->lock, flags); + result = __skb_dequeue(list); + spin_unlock_irqrestore(&list->lock, flags); + return result; +} + +/* + * Insert a packet on a list. + */ + +static inline void __skb_insert(struct sk_buff *newsk, + struct sk_buff * prev, struct sk_buff *next, + struct sk_buff_head * list) +{ + newsk->next = next; + newsk->prev = prev; + next->prev = newsk; + prev->next = newsk; + newsk->list = list; + list->qlen++; +} + +/** + * skb_insert - insert a buffer + * @old: buffer to insert before + * @newsk: buffer to insert + * + * Place a packet before a given packet in a list. The list locks are taken + * and this function is atomic with respect to other list locked calls + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&old->list->lock, flags); + __skb_insert(newsk, old->prev, old, old->list); + spin_unlock_irqrestore(&old->list->lock, flags); +} + +/* + * Place a packet after a given packet in a list. + */ + +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) +{ + __skb_insert(newsk, old, old->next, old->list); +} + +/** + * skb_append - append a buffer + * @old: buffer to insert after + * @newsk: buffer to insert + * + * Place a packet after a given packet in a list. The list locks are taken + * and this function is atomic with respect to other list locked calls. + * A buffer cannot be placed on two lists at the same time. + */ + + +static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&old->list->lock, flags); + __skb_append(old, newsk); + spin_unlock_irqrestore(&old->list->lock, flags); +} + +/* + * remove sk_buff from list. _Must_ be called atomically, and with + * the list known.. + */ + +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) +{ + struct sk_buff * next, * prev; + + list->qlen--; + next = skb->next; + prev = skb->prev; + skb->next = NULL; + skb->prev = NULL; + skb->list = NULL; + next->prev = prev; + prev->next = next; +} + +/** + * skb_unlink - remove a buffer from a list + * @skb: buffer to remove + * + * Place a packet after a given packet in a list. The list locks are taken + * and this function is atomic with respect to other list locked calls + * + * Works even without knowing the list it is sitting on, which can be + * handy at times. It also means that THE LIST MUST EXIST when you + * unlink. Thus a list must have its contents unlinked before it is + * destroyed. + */ + +static inline void skb_unlink(struct sk_buff *skb) +{ + struct sk_buff_head *list = skb->list; + + if(list) { + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + if(skb->list == list) + __skb_unlink(skb, skb->list); + spin_unlock_irqrestore(&list->lock, flags); + } +} + +/* XXX: more streamlined implementation */ + +/** + * __skb_dequeue_tail - remove from the tail of the queue + * @list: list to dequeue from + * + * Remove the tail of the list. This function does not take any locks + * so must be used with appropriate locks held only. The tail item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) +{ + struct sk_buff *skb = skb_peek_tail(list); + if (skb) + __skb_unlink(skb, list); + return skb; +} + +/** + * skb_dequeue - remove from the head of the queue + * @list: list to dequeue from + * + * Remove the head of the list. The list lock is taken so the function + * may be used safely with other locking list functions. The tail item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) +{ + unsigned long flags; + struct sk_buff *result; + + spin_lock_irqsave(&list->lock, flags); + result = __skb_dequeue_tail(list); + spin_unlock_irqrestore(&list->lock, flags); + return result; +} + +static inline int skb_is_nonlinear(const struct sk_buff *skb) +{ + return skb->data_len; +} + +static inline unsigned int skb_headlen(const struct sk_buff *skb) +{ + return skb->len - skb->data_len; +} + +#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) out_of_line_bug(); } while (0) +#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) out_of_line_bug(); } while (0) +#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) out_of_line_bug(); } while (0) + +/* + * Add data to an sk_buff + */ + +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) +{ + unsigned char *tmp=skb->tail; + SKB_LINEAR_ASSERT(skb); + skb->tail+=len; + skb->len+=len; + return tmp; +} + +/** + * skb_put - add data to a buffer + * @skb: buffer to use + * @len: amount of data to add + * + * This function extends the used data area of the buffer. If this would + * exceed the total buffer size the kernel will panic. A pointer to the + * first byte of the extra data is returned. + */ + +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) +{ + unsigned char *tmp=skb->tail; + SKB_LINEAR_ASSERT(skb); + skb->tail+=len; + skb->len+=len; + if(skb->tail>skb->end) { + skb_over_panic(skb, len, current_text_addr()); + } + return tmp; +} + +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) +{ + skb->data-=len; + skb->len+=len; + return skb->data; +} + +/** + * skb_push - add data to the start of a buffer + * @skb: buffer to use + * @len: amount of data to add + * + * This function extends the used data area of the buffer at the buffer + * start. If this would exceed the total buffer headroom the kernel will + * panic. A pointer to the first byte of the extra data is returned. + */ + +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) +{ + skb->data-=len; + skb->len+=len; + if(skb->data<skb->head) { + skb_under_panic(skb, len, current_text_addr()); + } + return skb->data; +} + +static inline char *__skb_pull(struct sk_buff *skb, unsigned int len) +{ + skb->len-=len; + if (skb->len < skb->data_len) + out_of_line_bug(); + return skb->data+=len; +} + +/** + * skb_pull - remove data from the start of a buffer + * @skb: buffer to use + * @len: amount of data to remove + * + * This function removes data from the start of a buffer, returning + * the memory to the headroom. A pointer to the next data in the buffer + * is returned. Once the data has been pulled future pushes will overwrite + * the old data. + */ + +static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len) +{ + if (len > skb->len) + return NULL; + return __skb_pull(skb,len); +} + +extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta); + +static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len) +{ + if (len > skb_headlen(skb) && + __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL) + return NULL; + skb->len -= len; + return skb->data += len; +} + +static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len) +{ + if (len > skb->len) + return NULL; + return __pskb_pull(skb,len); +} + +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) +{ + if (len <= skb_headlen(skb)) + return 1; + if (len > skb->len) + return 0; + return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL); +} + +/** + * skb_headroom - bytes at buffer head + * @skb: buffer to check + * + * Return the number of bytes of free space at the head of an &sk_buff. + */ + +static inline int skb_headroom(const struct sk_buff *skb) +{ + return skb->data-skb->head; +} + +/** + * skb_tailroom - bytes at buffer end + * @skb: buffer to check + * + * Return the number of bytes of free space at the tail of an sk_buff + */ + +static inline int skb_tailroom(const struct sk_buff *skb) +{ + return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail; +} + +/** + * skb_reserve - adjust headroom + * @skb: buffer to alter + * @len: bytes to move + * + * Increase the headroom of an empty &sk_buff by reducing the tail + * room. This is only allowed for an empty buffer. + */ + +static inline void skb_reserve(struct sk_buff *skb, unsigned int len) +{ + skb->data+=len; + skb->tail+=len; +} + +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); + +static inline void __skb_trim(struct sk_buff *skb, unsigned int len) +{ + if (!skb->data_len) { + skb->len = len; + skb->tail = skb->data+len; + } else { + ___pskb_trim(skb, len, 0); + } +} + +/** + * skb_trim - remove end from a buffer + * @skb: buffer to alter + * @len: new length + * + * Cut the length of a buffer down by removing data from the tail. If + * the buffer is already under the length specified it is not modified. + */ + +static inline void skb_trim(struct sk_buff *skb, unsigned int len) +{ + if (skb->len > len) { + __skb_trim(skb, len); + } +} + + +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) +{ + if (!skb->data_len) { + skb->len = len; + skb->tail = skb->data+len; + return 0; + } else { + return ___pskb_trim(skb, len, 1); + } +} + +static inline int pskb_trim(struct sk_buff *skb, unsigned int len) +{ + if (len < skb->len) + return __pskb_trim(skb, len); + return 0; +} + +/** + * skb_orphan - orphan a buffer + * @skb: buffer to orphan + * + * If a buffer currently has an owner then we call the owner's + * destructor function and make the @skb unowned. The buffer continues + * to exist but is no longer charged to its former owner. + */ + + +static inline void skb_orphan(struct sk_buff *skb) +{ + if (skb->destructor) + skb->destructor(skb); + skb->destructor = NULL; + skb->sk = NULL; +} + +/** + * skb_purge - empty a list + * @list: list to empty + * + * Delete all buffers on an &sk_buff list. Each buffer is removed from + * the list and one reference dropped. This function takes the list + * lock and is atomic with respect to other list locking functions. + */ + + +static inline void skb_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + while ((skb=skb_dequeue(list))!=NULL) + kfree_skb(skb); +} + +/** + * __skb_purge - empty a list + * @list: list to empty + * + * Delete all buffers on an &sk_buff list. Each buffer is removed from + * the list and one reference dropped. This function does not take the + * list lock and the caller must hold the relevant locks to use it. + */ + + +static inline void __skb_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + while ((skb=__skb_dequeue(list))!=NULL) + kfree_skb(skb); +} + +/** + * __dev_alloc_skb - allocate an skbuff for sending + * @length: length to allocate + * @gfp_mask: get_free_pages mask, passed to alloc_skb + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned in there is no free memory. + */ + +static inline struct sk_buff *__dev_alloc_skb(unsigned int length, + int gfp_mask) +{ + struct sk_buff *skb; +#if defined(CONFIG_XEN) + length = (PAGE_SIZE/2)+1; /* force slab allocater to give us a page */ +#endif + skb = alloc_skb(length+16, gfp_mask); + if (skb) + skb_reserve(skb,16); + return skb; +} + +/** + * dev_alloc_skb - allocate an skbuff for sending + * @length: length to allocate + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned in there is no free memory. Although this function + * allocates memory it can be called from an interrupt. + */ + +static inline struct sk_buff *dev_alloc_skb(unsigned int length) +{ + return __dev_alloc_skb(length, GFP_ATOMIC); +} + +/** + * skb_cow - copy header of skb when it is required + * @skb: buffer to cow + * @headroom: needed headroom + * + * If the skb passed lacks sufficient headroom or its data part + * is shared, data is reallocated. If reallocation fails, an error + * is returned and original skb is not changed. + * + * The result is skb with writable area skb->head...skb->tail + * and at least @headroom of space at head. + */ + +static inline int +skb_cow(struct sk_buff *skb, unsigned int headroom) +{ + int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); + + if (delta < 0) + delta = 0; + + if (delta || skb_cloned(skb)) + return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC); + return 0; +} + +/** + * skb_padto - pad an skbuff up to a minimal size + * @skb: buffer to pad + * @len: minimal length + * + * Pads up a buffer to ensure the trailing bytes exist and are + * blanked. If the buffer already contains sufficient data it + * is untouched. Returns the buffer, which may be a replacement + * for the original, or NULL for out of memory - in which case + * the original buffer is still freed. + */ + +static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len) +{ + unsigned int size = skb->len; + if(likely(size >= len)) + return skb; + return skb_pad(skb, len-size); +} + +/** + * skb_linearize - convert paged skb to linear one + * @skb: buffer to linarize + * @gfp: allocation mode + * + * If there is no free memory -ENOMEM is returned, otherwise zero + * is returned and the old skb data released. */ +int skb_linearize(struct sk_buff *skb, int gfp); + +static inline void *kmap_skb_frag(const skb_frag_t *frag) +{ +#ifdef CONFIG_HIGHMEM + if (in_irq()) + out_of_line_bug(); + + local_bh_disable(); +#endif + return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); +} + +static inline void kunmap_skb_frag(void *vaddr) +{ + kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); +#ifdef CONFIG_HIGHMEM + local_bh_enable(); +#endif +} + +#define skb_queue_walk(queue, skb) \ + for (skb = (queue)->next; \ + (skb != (struct sk_buff *)(queue)); \ + skb=skb->next) + + +extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); +extern unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); +extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size); +extern int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size); +extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump); +extern int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov); +extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb); + +extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); +extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); +extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum); +extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); + +extern void skb_init(void); +extern void skb_add_mtu(int mtu); + +#ifdef CONFIG_NETFILTER +static inline void +nf_conntrack_put(struct nf_ct_info *nfct) +{ + if (nfct && atomic_dec_and_test(&nfct->master->use)) + nfct->master->destroy(nfct->master); +} +static inline void +nf_conntrack_get(struct nf_ct_info *nfct) +{ + if (nfct) + atomic_inc(&nfct->master->use); +} +#endif + +#endif /* __KERNEL__ */ +#endif /* _LINUX_SKBUFF_H */ |