aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2004-08-19 18:50:23 +0000
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2004-08-19 18:50:23 +0000
commitdc8c5eb262005ea0461c9ce4c079077fa39a02b6 (patch)
tree9d3b53ed6661164c40a83f2834796a06ac9215c6
parentb148045b89453607a9f267bb9a77997846ba45b5 (diff)
downloadxen-dc8c5eb262005ea0461c9ce4c079077fa39a02b6.tar.gz
xen-dc8c5eb262005ea0461c9ce4c079077fa39a02b6.tar.bz2
xen-dc8c5eb262005ea0461c9ce4c079077fa39a02b6.zip
bitkeeper revision 1.1159.42.8 (4124f66fUINxrel-POThC1of633DIA)
Clean up network-backend driver changes to common files. There is now a 'foreign page' hook into the page allocator, and a CONFIG_ option for forcing page-sized rx skbs.
-rw-r--r--.rootkeys3
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/config.in5
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/defconfig-xen01
-rw-r--r--linux-2.4.26-xen-sparse/arch/xen/defconfig-xenU1
-rw-r--r--linux-2.4.26-xen-sparse/include/linux/mm.h703
-rw-r--r--linux-2.4.26-xen-sparse/mm/page_alloc.c5
-rw-r--r--linux-2.6.7-xen-sparse/arch/xen/Kconfig10
-rw-r--r--linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig2
-rw-r--r--linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig2
-rw-r--r--linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c29
-rw-r--r--linux-2.6.7-xen-sparse/include/linux/page-flags.h343
-rw-r--r--linux-2.6.7-xen-sparse/include/linux/skbuff.h1073
-rw-r--r--linux-2.6.7-xen-sparse/mm/page_alloc.c5
13 files changed, 2160 insertions, 22 deletions
diff --git a/.rootkeys b/.rootkeys
index 3b7fe9a960..57822087e0 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -111,6 +111,7 @@
3f1056a9L_kqHcFheV00KbKBzv9j5w linux-2.4.26-xen-sparse/include/asm-xen/vga.h
40659defgWA92arexpMGn8X3QMDj3w linux-2.4.26-xen-sparse/include/asm-xen/xor.h
3f056927gMHl7mWB89rb73JahbhQIA linux-2.4.26-xen-sparse/include/linux/blk.h
+4124f66fPHG6yvB_vXmesjvzrJ3yMg linux-2.4.26-xen-sparse/include/linux/mm.h
401c0590D_kwJDU59X8NyvqSv_Cl2A linux-2.4.26-xen-sparse/include/linux/sched.h
40a248afgI0_JKthdYAe8beVfXSTpQ linux-2.4.26-xen-sparse/include/linux/skbuff.h
401c0592pLrp_aCbQRo9GXiYQQaVVA linux-2.4.26-xen-sparse/include/linux/timer.h
@@ -243,6 +244,8 @@
3fa8e3f0kBLeE4To2vpdi3cpJbIkbQ linux-2.6.7-xen-sparse/include/asm-xen/suspend.h
3f689063BoW-HWV3auUJ-OqXfcGArw linux-2.6.7-xen-sparse/include/asm-xen/xen_proc.h
4124d8c4aocX7A-jIbuGraWN84pxGQ linux-2.6.7-xen-sparse/include/linux/bio.h
+4124f66fp5QwbDHEfoUIa7pqO5Xhag linux-2.6.7-xen-sparse/include/linux/page-flags.h
+4124f66f4NaKNa0xPiGGykn9QaZk3w linux-2.6.7-xen-sparse/include/linux/skbuff.h
40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6.7-xen-sparse/mkbuildtree
410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.7-xen-sparse/mm/page_alloc.c
40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Make.defs
diff --git a/linux-2.4.26-xen-sparse/arch/xen/config.in b/linux-2.4.26-xen-sparse/arch/xen/config.in
index 3a89393de3..daed22d543 100644
--- a/linux-2.4.26-xen-sparse/arch/xen/config.in
+++ b/linux-2.4.26-xen-sparse/arch/xen/config.in
@@ -20,7 +20,10 @@ endmenu
# The IBM S/390 patch needs this.
define_bool CONFIG_NO_IDLE_HZ y
-if [ "$CONFIG_XEN_PHYSDEV_ACCESS" != "y" ]; then
+if [ "$CONFIG_XEN_PHYSDEV_ACCESS" == "y" ]; then
+ define_bool CONFIG_FOREIGN_PAGES y
+else
+ define_bool CONFIG_FOREIGN_PAGES n
define_bool CONFIG_NETDEVICES y
define_bool CONFIG_VT n
fi
diff --git a/linux-2.4.26-xen-sparse/arch/xen/defconfig-xen0 b/linux-2.4.26-xen-sparse/arch/xen/defconfig-xen0
index 0754c9b8c8..c51c2210b4 100644
--- a/linux-2.4.26-xen-sparse/arch/xen/defconfig-xen0
+++ b/linux-2.4.26-xen-sparse/arch/xen/defconfig-xen0
@@ -13,6 +13,7 @@ CONFIG_UID16=y
CONFIG_XEN_PRIVILEGED_GUEST=y
CONFIG_XEN_PHYSDEV_ACCESS=y
CONFIG_NO_IDLE_HZ=y
+CONFIG_FOREIGN_PAGES=y
#
# Code maturity level options
diff --git a/linux-2.4.26-xen-sparse/arch/xen/defconfig-xenU b/linux-2.4.26-xen-sparse/arch/xen/defconfig-xenU
index 8fad307cd1..9d5fdccdf8 100644
--- a/linux-2.4.26-xen-sparse/arch/xen/defconfig-xenU
+++ b/linux-2.4.26-xen-sparse/arch/xen/defconfig-xenU
@@ -13,6 +13,7 @@ CONFIG_UID16=y
# CONFIG_XEN_PRIVILEGED_GUEST is not set
# CONFIG_XEN_PHYSDEV_ACCESS is not set
CONFIG_NO_IDLE_HZ=y
+# CONFIG_FOREIGN_PAGES is not set
CONFIG_NETDEVICES=y
# CONFIG_VT is not set
diff --git a/linux-2.4.26-xen-sparse/include/linux/mm.h b/linux-2.4.26-xen-sparse/include/linux/mm.h
new file mode 100644
index 0000000000..b24c6d6163
--- /dev/null
+++ b/linux-2.4.26-xen-sparse/include/linux/mm.h
@@ -0,0 +1,703 @@
+#ifndef _LINUX_MM_H
+#define _LINUX_MM_H
+
+#include <linux/sched.h>
+#include <linux/errno.h>
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/mmzone.h>
+#include <linux/swap.h>
+#include <linux/rbtree.h>
+
+extern unsigned long max_mapnr;
+extern unsigned long num_physpages;
+extern unsigned long num_mappedpages;
+extern void * high_memory;
+extern int page_cluster;
+/* The inactive_clean lists are per zone. */
+extern struct list_head active_list;
+extern struct list_head inactive_list;
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/atomic.h>
+
+/*
+ * Linux kernel virtual memory manager primitives.
+ * The idea being to have a "virtual" mm in the same way
+ * we have a virtual fs - giving a cleaner interface to the
+ * mm details, and allowing different kinds of memory mappings
+ * (from shared memory to executable loading to arbitrary
+ * mmap() functions).
+ */
+
+/*
+ * This struct defines a memory VMM memory area. There is one of these
+ * per VM-area/task. A VM area is any part of the process virtual memory
+ * space that has a special rule for the page-fault handlers (ie a shared
+ * library, the executable area etc).
+ */
+struct vm_area_struct {
+ struct mm_struct * vm_mm; /* The address space we belong to. */
+ unsigned long vm_start; /* Our start address within vm_mm. */
+ unsigned long vm_end; /* The first byte after our end address
+ within vm_mm. */
+
+ /* linked list of VM areas per task, sorted by address */
+ struct vm_area_struct *vm_next;
+
+ pgprot_t vm_page_prot; /* Access permissions of this VMA. */
+ unsigned long vm_flags; /* Flags, listed below. */
+
+ rb_node_t vm_rb;
+
+ /*
+ * For areas with an address space and backing store,
+ * one of the address_space->i_mmap{,shared} lists,
+ * for shm areas, the list of attaches, otherwise unused.
+ */
+ struct vm_area_struct *vm_next_share;
+ struct vm_area_struct **vm_pprev_share;
+
+ /* Function pointers to deal with this struct. */
+ struct vm_operations_struct * vm_ops;
+
+ /* Information about our backing store: */
+ unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
+ units, *not* PAGE_CACHE_SIZE */
+ struct file * vm_file; /* File we map to (can be NULL). */
+ unsigned long vm_raend; /* XXX: put full readahead info here. */
+ void * vm_private_data; /* was vm_pte (shared mem) */
+};
+
+/*
+ * vm_flags..
+ */
+#define VM_READ 0x00000001 /* currently active flags */
+#define VM_WRITE 0x00000002
+#define VM_EXEC 0x00000004
+#define VM_SHARED 0x00000008
+
+#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */
+#define VM_MAYWRITE 0x00000020
+#define VM_MAYEXEC 0x00000040
+#define VM_MAYSHARE 0x00000080
+
+#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
+#define VM_GROWSUP 0x00000200
+#define VM_SHM 0x00000400 /* shared memory area, don't swap out */
+#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
+
+#define VM_EXECUTABLE 0x00001000
+#define VM_LOCKED 0x00002000
+#define VM_IO 0x00004000 /* Memory mapped I/O or similar */
+
+ /* Used by sys_madvise() */
+#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */
+#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */
+
+#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
+#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
+#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */
+
+#ifndef VM_STACK_FLAGS
+#define VM_STACK_FLAGS 0x00000177
+#endif
+
+#define VM_READHINTMASK (VM_SEQ_READ | VM_RAND_READ)
+#define VM_ClearReadHint(v) (v)->vm_flags &= ~VM_READHINTMASK
+#define VM_NormalReadHint(v) (!((v)->vm_flags & VM_READHINTMASK))
+#define VM_SequentialReadHint(v) ((v)->vm_flags & VM_SEQ_READ)
+#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ)
+
+/* read ahead limits */
+extern int vm_min_readahead;
+extern int vm_max_readahead;
+
+/*
+ * mapping from the currently active vm_flags protection bits (the
+ * low four bits) to a page protection mask..
+ */
+extern pgprot_t protection_map[16];
+
+
+/*
+ * These are the virtual MM functions - opening of an area, closing and
+ * unmapping it (needed to keep files on disk up-to-date etc), pointer
+ * to the functions called when a no-page or a wp-page exception occurs.
+ */
+struct vm_operations_struct {
+ void (*open)(struct vm_area_struct * area);
+ void (*close)(struct vm_area_struct * area);
+ struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int unused);
+};
+
+/*
+ * Each physical page in the system has a struct page associated with
+ * it to keep track of whatever it is we are using the page for at the
+ * moment. Note that we have no way to track which tasks are using
+ * a page.
+ *
+ * Try to keep the most commonly accessed fields in single cache lines
+ * here (16 bytes or greater). This ordering should be particularly
+ * beneficial on 32-bit processors.
+ *
+ * The first line is data used in page cache lookup, the second line
+ * is used for linear searches (eg. clock algorithm scans).
+ *
+ * TODO: make this structure smaller, it could be as small as 32 bytes.
+ */
+typedef struct page {
+ struct list_head list; /* ->mapping has some page lists. */
+ struct address_space *mapping; /* The inode (or ...) we belong to. */
+ unsigned long index; /* Our offset within mapping. */
+ struct page *next_hash; /* Next page sharing our hash bucket in
+ the pagecache hash table. */
+ atomic_t count; /* Usage count, see below. */
+ unsigned long flags; /* atomic flags, some possibly
+ updated asynchronously */
+ struct list_head lru; /* Pageout list, eg. active_list;
+ protected by pagemap_lru_lock !! */
+ struct page **pprev_hash; /* Complement to *next_hash. */
+ struct buffer_head * buffers; /* Buffer maps us to a disk block. */
+
+ /*
+ * On machines where all RAM is mapped into kernel address space,
+ * we can simply calculate the virtual address. On machines with
+ * highmem some memory is mapped into kernel virtual memory
+ * dynamically, so we need a place to store that address.
+ * Note that this field could be 16 bits on x86 ... ;)
+ *
+ * Architectures with slow multiplication can define
+ * WANT_PAGE_VIRTUAL in asm/page.h
+ */
+#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
+ void *virtual; /* Kernel virtual address (NULL if
+ not kmapped, ie. highmem) */
+#endif /* CONFIG_HIGMEM || WANT_PAGE_VIRTUAL */
+} mem_map_t;
+
+/*
+ * Methods to modify the page usage count.
+ *
+ * What counts for a page usage:
+ * - cache mapping (page->mapping)
+ * - disk mapping (page->buffers)
+ * - page mapped in a task's page tables, each mapping
+ * is counted separately
+ *
+ * Also, many kernel routines increase the page count before a critical
+ * routine so they can be sure the page doesn't go away from under them.
+ */
+#define get_page(p) atomic_inc(&(p)->count)
+#define put_page(p) __free_page(p)
+#define put_page_testzero(p) atomic_dec_and_test(&(p)->count)
+#define page_count(p) atomic_read(&(p)->count)
+#define set_page_count(p,v) atomic_set(&(p)->count, v)
+
+/*
+ * Various page->flags bits:
+ *
+ * PG_reserved is set for special pages, which can never be swapped
+ * out. Some of them might not even exist (eg empty_bad_page)...
+ *
+ * Multiple processes may "see" the same page. E.g. for untouched
+ * mappings of /dev/null, all processes see the same page full of
+ * zeroes, and text pages of executables and shared libraries have
+ * only one copy in memory, at most, normally.
+ *
+ * For the non-reserved pages, page->count denotes a reference count.
+ * page->count == 0 means the page is free.
+ * page->count == 1 means the page is used for exactly one purpose
+ * (e.g. a private data page of one process).
+ *
+ * A page may be used for kmalloc() or anyone else who does a
+ * __get_free_page(). In this case the page->count is at least 1, and
+ * all other fields are unused but should be 0 or NULL. The
+ * management of this page is the responsibility of the one who uses
+ * it.
+ *
+ * The other pages (we may call them "process pages") are completely
+ * managed by the Linux memory manager: I/O, buffers, swapping etc.
+ * The following discussion applies only to them.
+ *
+ * A page may belong to an inode's memory mapping. In this case,
+ * page->mapping is the pointer to the inode, and page->index is the
+ * file offset of the page, in units of PAGE_CACHE_SIZE.
+ *
+ * A page may have buffers allocated to it. In this case,
+ * page->buffers is a circular list of these buffer heads. Else,
+ * page->buffers == NULL.
+ *
+ * For pages belonging to inodes, the page->count is the number of
+ * attaches, plus 1 if buffers are allocated to the page, plus one
+ * for the page cache itself.
+ *
+ * All pages belonging to an inode are in these doubly linked lists:
+ * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages;
+ * using the page->list list_head. These fields are also used for
+ * freelist managemet (when page->count==0).
+ *
+ * There is also a hash table mapping (mapping,index) to the page
+ * in memory if present. The lists for this hash table use the fields
+ * page->next_hash and page->pprev_hash.
+ *
+ * All process pages can do I/O:
+ * - inode pages may need to be read from disk,
+ * - inode pages which have been modified and are MAP_SHARED may need
+ * to be written to disk,
+ * - private pages which have been modified may need to be swapped out
+ * to swap space and (later) to be read back into memory.
+ * During disk I/O, PG_locked is used. This bit is set before I/O
+ * and reset when I/O completes. page_waitqueue(page) is a wait queue of all
+ * tasks waiting for the I/O on this page to complete.
+ * PG_uptodate tells whether the page's contents is valid.
+ * When a read completes, the page becomes uptodate, unless a disk I/O
+ * error happened.
+ *
+ * For choosing which pages to swap out, inode pages carry a
+ * PG_referenced bit, which is set any time the system accesses
+ * that page through the (mapping,index) hash table. This referenced
+ * bit, together with the referenced bit in the page tables, is used
+ * to manipulate page->age and move the page across the active,
+ * inactive_dirty and inactive_clean lists.
+ *
+ * Note that the referenced bit, the page->lru list_head and the
+ * active, inactive_dirty and inactive_clean lists are protected by
+ * the pagemap_lru_lock, and *NOT* by the usual PG_locked bit!
+ *
+ * PG_skip is used on sparc/sparc64 architectures to "skip" certain
+ * parts of the address space.
+ *
+ * PG_error is set to indicate that an I/O error occurred on this page.
+ *
+ * PG_arch_1 is an architecture specific page state bit. The generic
+ * code guarantees that this bit is cleared for a page when it first
+ * is entered into the page cache.
+ *
+ * PG_highmem pages are not permanently mapped into the kernel virtual
+ * address space, they need to be kmapped separately for doing IO on
+ * the pages. The struct page (these bits with information) are always
+ * mapped into kernel address space...
+ */
+#define PG_locked 0 /* Page is locked. Don't touch. */
+#define PG_error 1
+#define PG_referenced 2
+#define PG_uptodate 3
+#define PG_dirty 4
+#define PG_unused 5
+#define PG_lru 6
+#define PG_active 7
+#define PG_slab 8
+#define PG_skip 10
+#define PG_highmem 11
+#define PG_checked 12 /* kill me in 2.5.<early>. */
+#define PG_arch_1 13
+#define PG_reserved 14
+#define PG_launder 15 /* written out by VM pressure.. */
+#define PG_fs_1 16 /* Filesystem specific */
+#define PG_foreign 21 /* Page belongs to foreign allocator */
+
+#ifndef arch_set_page_uptodate
+#define arch_set_page_uptodate(page)
+#endif
+
+/* Make it prettier to test the above... */
+#define UnlockPage(page) unlock_page(page)
+#define Page_Uptodate(page) test_bit(PG_uptodate, &(page)->flags)
+#define SetPageUptodate(page) \
+ do { \
+ arch_set_page_uptodate(page); \
+ set_bit(PG_uptodate, &(page)->flags); \
+ } while (0)
+#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags)
+#define PageDirty(page) test_bit(PG_dirty, &(page)->flags)
+#define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags)
+#define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags)
+#define PageLocked(page) test_bit(PG_locked, &(page)->flags)
+#define LockPage(page) set_bit(PG_locked, &(page)->flags)
+#define TryLockPage(page) test_and_set_bit(PG_locked, &(page)->flags)
+#define PageChecked(page) test_bit(PG_checked, &(page)->flags)
+#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags)
+#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags)
+#define PageLaunder(page) test_bit(PG_launder, &(page)->flags)
+#define SetPageLaunder(page) set_bit(PG_launder, &(page)->flags)
+#define ClearPageLaunder(page) clear_bit(PG_launder, &(page)->flags)
+#define ClearPageArch1(page) clear_bit(PG_arch_1, &(page)->flags)
+
+/* A foreign page uses a custom destructor rather than the buddy allocator. */
+#ifdef CONFIG_FOREIGN_PAGES
+#define PageForeign(page) test_bit(PG_foreign, &(page)->flags)
+#define SetPageForeign(page) set_bit(PG_foreign, &(page)->flags)
+#define ClearPageForeign(page) clear_bit(PG_foreign, &(page)->flags)
+#define PageForeignDestructor(page) \
+ ( (void (*) (struct page *)) (page)->mapping )
+#else
+#define PageForeign(page) 0
+#define PageForeignDestructor(page) void
+#endif
+
+/*
+ * The zone field is never updated after free_area_init_core()
+ * sets it, so none of the operations on it need to be atomic.
+ */
+#define NODE_SHIFT 4
+#define ZONE_SHIFT (BITS_PER_LONG - 8)
+
+struct zone_struct;
+extern struct zone_struct *zone_table[];
+
+static inline zone_t *page_zone(struct page *page)
+{
+ return zone_table[page->flags >> ZONE_SHIFT];
+}
+
+static inline void set_page_zone(struct page *page, unsigned long zone_num)
+{
+ page->flags &= ~(~0UL << ZONE_SHIFT);
+ page->flags |= zone_num << ZONE_SHIFT;
+}
+
+/*
+ * In order to avoid #ifdefs within C code itself, we define
+ * set_page_address to a noop for non-highmem machines, where
+ * the field isn't useful.
+ * The same is true for page_address() in arch-dependent code.
+ */
+#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
+
+#define set_page_address(page, address) \
+ do { \
+ (page)->virtual = (address); \
+ } while(0)
+
+#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
+#define set_page_address(page, address) do { } while(0)
+#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
+
+/*
+ * Permanent address of a page. Obviously must never be
+ * called on a highmem page.
+ */
+#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
+
+#define page_address(page) ((page)->virtual)
+
+#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
+
+#define page_address(page) \
+ __va( (((page) - page_zone(page)->zone_mem_map) << PAGE_SHIFT) \
+ + page_zone(page)->zone_start_paddr)
+
+#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
+
+extern void FASTCALL(set_page_dirty(struct page *));
+
+/*
+ * The first mb is necessary to safely close the critical section opened by the
+ * TryLockPage(), the second mb is necessary to enforce ordering between
+ * the clear_bit and the read of the waitqueue (to avoid SMP races with a
+ * parallel wait_on_page).
+ */
+#define PageError(page) test_bit(PG_error, &(page)->flags)
+#define SetPageError(page) set_bit(PG_error, &(page)->flags)
+#define ClearPageError(page) clear_bit(PG_error, &(page)->flags)
+#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags)
+#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags)
+#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags)
+#define PageTestandClearReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags)
+#define PageSlab(page) test_bit(PG_slab, &(page)->flags)
+#define PageSetSlab(page) set_bit(PG_slab, &(page)->flags)
+#define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags)
+#define PageReserved(page) test_bit(PG_reserved, &(page)->flags)
+
+#define PageActive(page) test_bit(PG_active, &(page)->flags)
+#define SetPageActive(page) set_bit(PG_active, &(page)->flags)
+#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags)
+
+#define PageLRU(page) test_bit(PG_lru, &(page)->flags)
+#define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags)
+#define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags)
+
+#ifdef CONFIG_HIGHMEM
+#define PageHighMem(page) test_bit(PG_highmem, &(page)->flags)
+#else
+#define PageHighMem(page) 0 /* needed to optimize away at compile time */
+#endif
+
+#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags)
+#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags)
+
+/*
+ * Error return values for the *_nopage functions
+ */
+#define NOPAGE_SIGBUS (NULL)
+#define NOPAGE_OOM ((struct page *) (-1))
+
+/* The array of struct pages */
+extern mem_map_t * mem_map;
+
+/*
+ * There is only one page-allocator function, and two main namespaces to
+ * it. The alloc_page*() variants return 'struct page *' and as such
+ * can allocate highmem pages, the *get*page*() variants return
+ * virtual kernel addresses to the allocated page(s).
+ */
+extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned int order));
+extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist));
+extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order);
+
+static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order)
+{
+ /*
+ * Gets optimized away by the compiler.
+ */
+ if (order >= MAX_ORDER)
+ return NULL;
+ return _alloc_pages(gfp_mask, order);
+}
+
+#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
+
+extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order));
+extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask));
+
+#define __get_free_page(gfp_mask) \
+ __get_free_pages((gfp_mask),0)
+
+#define __get_dma_pages(gfp_mask, order) \
+ __get_free_pages((gfp_mask) | GFP_DMA,(order))
+
+/*
+ * The old interface name will be removed in 2.5:
+ */
+#define get_free_page get_zeroed_page
+
+/*
+ * There is only one 'core' page-freeing function.
+ */
+extern void FASTCALL(__free_pages(struct page *page, unsigned int order));
+extern void FASTCALL(free_pages(unsigned long addr, unsigned int order));
+
+#define __free_page(page) __free_pages((page), 0)
+#define free_page(addr) free_pages((addr),0)
+
+extern void show_free_areas(void);
+extern void show_free_areas_node(pg_data_t *pgdat);
+
+extern void clear_page_tables(struct mm_struct *, unsigned long, int);
+
+extern int fail_writepage(struct page *);
+struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused);
+struct file *shmem_file_setup(char * name, loff_t size);
+extern void shmem_lock(struct file * file, int lock);
+extern int shmem_zero_setup(struct vm_area_struct *);
+
+extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
+extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
+extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
+extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
+
+extern int vmtruncate(struct inode * inode, loff_t offset);
+extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
+extern pte_t *FASTCALL(pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
+extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
+extern int make_pages_present(unsigned long addr, unsigned long end);
+extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
+extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len);
+extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len);
+extern int ptrace_attach(struct task_struct *tsk);
+extern int ptrace_detach(struct task_struct *, unsigned int);
+extern void ptrace_disable(struct task_struct *);
+extern int ptrace_check_attach(struct task_struct *task, int kill);
+
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
+ int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
+
+/*
+ * On a two-level page table, this ends up being trivial. Thus the
+ * inlining and the symmetry break with pte_alloc() that does all
+ * of this out-of-line.
+ */
+static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+ if (pgd_none(*pgd))
+ return __pmd_alloc(mm, pgd, address);
+ return pmd_offset(pgd, address);
+}
+
+extern int pgt_cache_water[2];
+extern int check_pgt_cache(void);
+
+extern void free_area_init(unsigned long * zones_size);
+extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap,
+ unsigned long * zones_size, unsigned long zone_start_paddr,
+ unsigned long *zholes_size);
+extern void mem_init(void);
+extern void show_mem(void);
+extern void si_meminfo(struct sysinfo * val);
+extern void swapin_readahead(swp_entry_t);
+
+extern struct address_space swapper_space;
+#define PageSwapCache(page) ((page)->mapping == &swapper_space)
+
+static inline int is_page_cache_freeable(struct page * page)
+{
+ return page_count(page) - !!page->buffers == 1;
+}
+
+extern int FASTCALL(can_share_swap_page(struct page *));
+extern int FASTCALL(remove_exclusive_swap_page(struct page *));
+
+extern void __free_pte(pte_t);
+
+/* mmap.c */
+extern void lock_vma_mappings(struct vm_area_struct *);
+extern void unlock_vma_mappings(struct vm_area_struct *);
+extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
+extern void __insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
+extern void build_mmap_rb(struct mm_struct *);
+extern void exit_mmap(struct mm_struct *);
+
+extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+
+extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long prot,
+ unsigned long flag, unsigned long pgoff);
+
+static inline unsigned long do_mmap(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long prot,
+ unsigned long flag, unsigned long offset)
+{
+ unsigned long ret = -EINVAL;
+ if ((offset + PAGE_ALIGN(len)) < offset)
+ goto out;
+ if (!(offset & ~PAGE_MASK))
+ ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
+out:
+ return ret;
+}
+
+extern int do_munmap(struct mm_struct *, unsigned long, size_t);
+
+extern unsigned long do_brk(unsigned long, unsigned long);
+
+static inline void __vma_unlink(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev)
+{
+ prev->vm_next = vma->vm_next;
+ rb_erase(&vma->vm_rb, &mm->mm_rb);
+ if (mm->mmap_cache == vma)
+ mm->mmap_cache = prev;
+}
+
+static inline int can_vma_merge(struct vm_area_struct * vma, unsigned long vm_flags)
+{
+ if (!vma->vm_file && vma->vm_flags == vm_flags)
+ return 1;
+ else
+ return 0;
+}
+
+struct zone_t;
+/* filemap.c */
+extern void remove_inode_page(struct page *);
+extern unsigned long page_unuse(struct page *);
+extern void truncate_inode_pages(struct address_space *, loff_t);
+
+/* generic vm_area_ops exported for stackable file systems */
+extern int filemap_sync(struct vm_area_struct *, unsigned long, size_t, unsigned int);
+extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int);
+
+/*
+ * GFP bitmasks..
+ */
+/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low four bits) */
+#define __GFP_DMA 0x01
+#define __GFP_HIGHMEM 0x02
+
+/* Action modifiers - doesn't change the zoning */
+#define __GFP_WAIT 0x10 /* Can wait and reschedule? */
+#define __GFP_HIGH 0x20 /* Should access emergency pools? */
+#define __GFP_IO 0x40 /* Can start low memory physical IO? */
+#define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */
+#define __GFP_FS 0x100 /* Can call down to low-level FS? */
+
+#define GFP_NOHIGHIO (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
+#define GFP_NOIO (__GFP_HIGH | __GFP_WAIT)
+#define GFP_NOFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO)
+#define GFP_ATOMIC (__GFP_HIGH)
+#define GFP_USER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM)
+#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+#define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
+
+/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
+ platforms, used as appropriate on others */
+
+#define GFP_DMA __GFP_DMA
+
+static inline unsigned int pf_gfp_mask(unsigned int gfp_mask)
+{
+ /* avoid all memory balancing I/O methods if this task cannot block on I/O */
+ if (current->flags & PF_NOIO)
+ gfp_mask &= ~(__GFP_IO | __GFP_HIGHIO | __GFP_FS);
+
+ return gfp_mask;
+}
+
+/* vma is the first one with address < vma->vm_end,
+ * and even address < vma->vm_start. Have to extend vma. */
+static inline int expand_stack(struct vm_area_struct * vma, unsigned long address)
+{
+ unsigned long grow;
+
+ /*
+ * vma->vm_start/vm_end cannot change under us because the caller is required
+ * to hold the mmap_sem in write mode. We need to get the spinlock only
+ * before relocating the vma range ourself.
+ */
+ address &= PAGE_MASK;
+ spin_lock(&vma->vm_mm->page_table_lock);
+ grow = (vma->vm_start - address) >> PAGE_SHIFT;
+ if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
+ ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) {
+ spin_unlock(&vma->vm_mm->page_table_lock);
+ return -ENOMEM;
+ }
+ vma->vm_start = address;
+ vma->vm_pgoff -= grow;
+ vma->vm_mm->total_vm += grow;
+ if (vma->vm_flags & VM_LOCKED)
+ vma->vm_mm->locked_vm += grow;
+ spin_unlock(&vma->vm_mm->page_table_lock);
+ return 0;
+}
+
+/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
+extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
+extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
+ struct vm_area_struct **pprev);
+
+/* Look up the first VMA which intersects the interval start_addr..end_addr-1,
+ NULL if none. Assume start_addr < end_addr. */
+static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
+{
+ struct vm_area_struct * vma = find_vma(mm,start_addr);
+
+ if (vma && end_addr <= vma->vm_start)
+ vma = NULL;
+ return vma;
+}
+
+extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
+
+extern struct page * vmalloc_to_page(void *addr);
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/linux-2.4.26-xen-sparse/mm/page_alloc.c b/linux-2.4.26-xen-sparse/mm/page_alloc.c
index 62ed7751a5..fda37e1929 100644
--- a/linux-2.4.26-xen-sparse/mm/page_alloc.c
+++ b/linux-2.4.26-xen-sparse/mm/page_alloc.c
@@ -89,6 +89,9 @@ static void __free_pages_ok (struct page *page, unsigned int order)
struct page *base;
zone_t *zone;
+ if (PageForeign(page))
+ return (PageForeignDestructor(page))(page);
+
/*
* Yes, think what happens when other parts of the kernel take
* a reference to a page in order to pin it for io. -ben
@@ -102,7 +105,7 @@ static void __free_pages_ok (struct page *page, unsigned int order)
if (page->buffers)
BUG();
if (page->mapping)
- return (*(void(*)(struct page *))page->mapping)(page);
+ BUG();
if (!VALID_PAGE(page))
BUG();
if (PageLocked(page))
diff --git a/linux-2.6.7-xen-sparse/arch/xen/Kconfig b/linux-2.6.7-xen-sparse/arch/xen/Kconfig
index dabb558f2e..9f4c1d8002 100644
--- a/linux-2.6.7-xen-sparse/arch/xen/Kconfig
+++ b/linux-2.6.7-xen-sparse/arch/xen/Kconfig
@@ -44,6 +44,16 @@ config XEN_WRITABLE_PAGETABLES
endmenu
+config FOREIGN_PAGES
+ bool
+ default y if XEN_PHYSDEV_ACCESS
+ default n if !XEN_PHYSDEV_ACCESS
+
+config PAGESIZED_SKBS
+ bool
+ default y if XEN_PHYSDEV_ACCESS
+ default n if !XEN_PHYSDEV_ACCESS
+
#config VT
# bool
# default y
diff --git a/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig b/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig
index ad6c744ebf..bda8feb206 100644
--- a/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig
+++ b/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig
@@ -10,6 +10,8 @@ CONFIG_NO_IDLE_HZ=y
#
CONFIG_XEN_PRIVILEGED_GUEST=y
CONFIG_XEN_PHYSDEV_ACCESS=y
+CONFIG_FOREIGN_PAGES=y
+CONFIG_PAGESIZED_SKBS=y
CONFIG_X86=y
# CONFIG_X86_64 is not set
diff --git a/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig b/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig
index a56fc3f7d4..a8cc83f752 100644
--- a/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig
+++ b/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig
@@ -10,6 +10,8 @@ CONFIG_NO_IDLE_HZ=y
#
# CONFIG_XEN_PRIVILEGED_GUEST is not set
# CONFIG_XEN_PHYSDEV_ACCESS is not set
+# CONFIG_FOREIGN_PAGES is not set
+# CONFIG_PAGESIZED_SKBS is not set
CONFIG_X86=y
# CONFIG_X86_64 is not set
diff --git a/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c
index 009012c9f6..a28115fe0c 100644
--- a/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c
+++ b/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c
@@ -376,7 +376,6 @@ static void net_tx_action(unsigned long unused)
netif_tx_request_t txreq;
u16 pending_idx;
NETIF_RING_IDX i;
- struct page *page;
multicall_entry_t *mcl;
PEND_RING_IDX dc, dp;
@@ -567,10 +566,9 @@ static void net_tx_action(unsigned long unused)
(void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
PKT_PROT_LEN);
- page = virt_to_page(MMAP_VADDR(pending_idx));
-
/* Append the packet payload as a fragment. */
- skb_shinfo(skb)->frags[0].page = page;
+ skb_shinfo(skb)->frags[0].page =
+ virt_to_page(MMAP_VADDR(pending_idx));
skb_shinfo(skb)->frags[0].size = txreq.size - PKT_PROT_LEN;
skb_shinfo(skb)->frags[0].page_offset =
(txreq.addr + PKT_PROT_LEN) & ~PAGE_MASK;
@@ -581,17 +579,6 @@ static void net_tx_action(unsigned long unused)
skb->dev = netif->dev;
skb->protocol = eth_type_trans(skb, skb->dev);
- /*
- * Destructor information. We hideously abuse the 'mapping' pointer,
- * which isn't otherwise used by us. The page deallocator is modified
- * to interpret a non-NULL value as a destructor function to be called.
- * This works okay because in all other cases the pointer must be NULL
- * when the page is freed (normally Linux will explicitly bug out if
- * it sees otherwise.
- */
- page->mapping = (struct address_space *)netif_page_release;
- set_page_count(page, 1);
-
netif->stats.tx_bytes += txreq.size;
netif->stats.tx_packets++;
@@ -607,8 +594,8 @@ static void netif_page_release(struct page *page)
unsigned long flags;
u16 pending_idx = page - virt_to_page(mmap_vstart);
- /* Stop the abuse. */
- page->mapping = NULL;
+ /* Ready for next use. */
+ set_page_count(page, 1);
spin_lock_irqsave(&dealloc_lock, flags);
dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
@@ -742,6 +729,7 @@ static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
static int __init netback_init(void)
{
int i;
+ struct page *page;
if ( !(start_info.flags & SIF_NET_BE_DOMAIN) &&
!(start_info.flags & SIF_INITDOMAIN) )
@@ -757,6 +745,13 @@ static int __init netback_init(void)
if ( (mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS)) == 0 )
BUG();
+ for ( i = 0; i < MAX_PENDING_REQS; i++ )
+ {
+ page = virt_to_page(MMAP_VADDR(i));
+ SetPageForeign(page);
+ PageForeignDestructor(page) = netif_page_release;
+ }
+
pending_cons = 0;
pending_prod = MAX_PENDING_REQS;
for ( i = 0; i < MAX_PENDING_REQS; i++ )
diff --git a/linux-2.6.7-xen-sparse/include/linux/page-flags.h b/linux-2.6.7-xen-sparse/include/linux/page-flags.h
new file mode 100644
index 0000000000..951844c30e
--- /dev/null
+++ b/linux-2.6.7-xen-sparse/include/linux/page-flags.h
@@ -0,0 +1,343 @@
+/*
+ * Macros for manipulating and testing page->flags
+ */
+
+#ifndef PAGE_FLAGS_H
+#define PAGE_FLAGS_H
+
+#include <linux/percpu.h>
+#include <linux/cache.h>
+#include <asm/pgtable.h>
+
+/*
+ * Various page->flags bits:
+ *
+ * PG_reserved is set for special pages, which can never be swapped out. Some
+ * of them might not even exist (eg empty_bad_page)...
+ *
+ * The PG_private bitflag is set if page->private contains a valid value.
+ *
+ * During disk I/O, PG_locked is used. This bit is set before I/O and
+ * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks
+ * waiting for the I/O on this page to complete.
+ *
+ * PG_uptodate tells whether the page's contents is valid. When a read
+ * completes, the page becomes uptodate, unless a disk I/O error happened.
+ *
+ * For choosing which pages to swap out, inode pages carry a PG_referenced bit,
+ * which is set any time the system accesses that page through the (mapping,
+ * index) hash table. This referenced bit, together with the referenced bit
+ * in the page tables, is used to manipulate page->age and move the page across
+ * the active, inactive_dirty and inactive_clean lists.
+ *
+ * Note that the referenced bit, the page->lru list_head and the active,
+ * inactive_dirty and inactive_clean lists are protected by the
+ * zone->lru_lock, and *NOT* by the usual PG_locked bit!
+ *
+ * PG_error is set to indicate that an I/O error occurred on this page.
+ *
+ * PG_arch_1 is an architecture specific page state bit. The generic code
+ * guarantees that this bit is cleared for a page when it first is entered into
+ * the page cache.
+ *
+ * PG_highmem pages are not permanently mapped into the kernel virtual address
+ * space, they need to be kmapped separately for doing IO on the pages. The
+ * struct page (these bits with information) are always mapped into kernel
+ * address space...
+ */
+
+/*
+ * Don't use the *_dontuse flags. Use the macros. Otherwise you'll break
+ * locked- and dirty-page accounting. The top eight bits of page->flags are
+ * used for page->zone, so putting flag bits there doesn't work.
+ */
+#define PG_locked 0 /* Page is locked. Don't touch. */
+#define PG_error 1
+#define PG_referenced 2
+#define PG_uptodate 3
+
+#define PG_dirty 4
+#define PG_lru 5
+#define PG_active 6
+#define PG_slab 7 /* slab debug (Suparna wants this) */
+
+#define PG_highmem 8
+#define PG_checked 9 /* kill me in 2.5.<early>. */
+#define PG_arch_1 10
+#define PG_reserved 11
+
+#define PG_private 12 /* Has something at ->private */
+#define PG_writeback 13 /* Page is under writeback */
+#define PG_nosave 14 /* Used for system suspend/resume */
+#define PG_maplock 15 /* Lock bit for rmap to ptes */
+
+#define PG_swapcache 16 /* Swap page: swp_entry_t in private */
+#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */
+#define PG_reclaim 18 /* To be reclaimed asap */
+#define PG_compound 19 /* Part of a compound page */
+
+#define PG_anon 20 /* Anonymous: anon_vma in mapping */
+#define PG_foreign 21 /* Page belongs to foreign allocator */
+
+
+/*
+ * Global page accounting. One instance per CPU. Only unsigned longs are
+ * allowed.
+ */
+struct page_state {
+ unsigned long nr_dirty; /* Dirty writeable pages */
+ unsigned long nr_writeback; /* Pages under writeback */
+ unsigned long nr_unstable; /* NFS unstable pages */
+ unsigned long nr_page_table_pages;/* Pages used for pagetables */
+ unsigned long nr_mapped; /* mapped into pagetables */
+ unsigned long nr_slab; /* In slab */
+#define GET_PAGE_STATE_LAST nr_slab
+
+ /*
+ * The below are zeroed by get_page_state(). Use get_full_page_state()
+ * to add up all these.
+ */
+ unsigned long pgpgin; /* Disk reads */
+ unsigned long pgpgout; /* Disk writes */
+ unsigned long pswpin; /* swap reads */
+ unsigned long pswpout; /* swap writes */
+ unsigned long pgalloc_high; /* page allocations */
+
+ unsigned long pgalloc_normal;
+ unsigned long pgalloc_dma;
+ unsigned long pgfree; /* page freeings */
+ unsigned long pgactivate; /* pages moved inactive->active */
+ unsigned long pgdeactivate; /* pages moved active->inactive */
+
+ unsigned long pgfault; /* faults (major+minor) */
+ unsigned long pgmajfault; /* faults (major only) */
+ unsigned long pgrefill_high; /* inspected in refill_inactive_zone */
+ unsigned long pgrefill_normal;
+ unsigned long pgrefill_dma;
+
+ unsigned long pgsteal_high; /* total highmem pages reclaimed */
+ unsigned long pgsteal_normal;
+ unsigned long pgsteal_dma;
+ unsigned long pgscan_kswapd_high;/* total highmem pages scanned */
+ unsigned long pgscan_kswapd_normal;
+
+ unsigned long pgscan_kswapd_dma;
+ unsigned long pgscan_direct_high;/* total highmem pages scanned */
+ unsigned long pgscan_direct_normal;
+ unsigned long pgscan_direct_dma;
+ unsigned long pginodesteal; /* pages reclaimed via inode freeing */
+
+ unsigned long slabs_scanned; /* slab objects scanned */
+ unsigned long kswapd_steal; /* pages reclaimed by kswapd */
+ unsigned long kswapd_inodesteal;/* reclaimed via kswapd inode freeing */
+ unsigned long pageoutrun; /* kswapd's calls to page reclaim */
+ unsigned long allocstall; /* direct reclaim calls */
+
+ unsigned long pgrotated; /* pages rotated to tail of the LRU */
+};
+
+DECLARE_PER_CPU(struct page_state, page_states);
+
+extern void get_page_state(struct page_state *ret);
+extern void get_full_page_state(struct page_state *ret);
+extern unsigned long __read_page_state(unsigned offset);
+
+#define read_page_state(member) \
+ __read_page_state(offsetof(struct page_state, member))
+
+#define mod_page_state(member, delta) \
+ do { \
+ unsigned long flags; \
+ local_irq_save(flags); \
+ __get_cpu_var(page_states).member += (delta); \
+ local_irq_restore(flags); \
+ } while (0)
+
+
+#define inc_page_state(member) mod_page_state(member, 1UL)
+#define dec_page_state(member) mod_page_state(member, 0UL - 1)
+#define add_page_state(member,delta) mod_page_state(member, (delta))
+#define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta))
+
+#define mod_page_state_zone(zone, member, delta) \
+ do { \
+ unsigned long flags; \
+ local_irq_save(flags); \
+ if (is_highmem(zone)) \
+ __get_cpu_var(page_states).member##_high += (delta);\
+ else if (is_normal(zone)) \
+ __get_cpu_var(page_states).member##_normal += (delta);\
+ else \
+ __get_cpu_var(page_states).member##_dma += (delta);\
+ local_irq_restore(flags); \
+ } while (0)
+
+/*
+ * Manipulation of page state flags
+ */
+#define PageLocked(page) \
+ test_bit(PG_locked, &(page)->flags)
+#define SetPageLocked(page) \
+ set_bit(PG_locked, &(page)->flags)
+#define TestSetPageLocked(page) \
+ test_and_set_bit(PG_locked, &(page)->flags)
+#define ClearPageLocked(page) \
+ clear_bit(PG_locked, &(page)->flags)
+#define TestClearPageLocked(page) \
+ test_and_clear_bit(PG_locked, &(page)->flags)
+
+#define PageError(page) test_bit(PG_error, &(page)->flags)
+#define SetPageError(page) set_bit(PG_error, &(page)->flags)
+#define ClearPageError(page) clear_bit(PG_error, &(page)->flags)
+
+#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags)
+#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags)
+#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags)
+#define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags)
+
+#ifndef arch_set_page_uptodate
+#define arch_set_page_uptodate(page) do { } while (0)
+#endif
+
+#define PageUptodate(page) test_bit(PG_uptodate, &(page)->flags)
+#define SetPageUptodate(page) \
+ do { \
+ arch_set_page_uptodate(page); \
+ set_bit(PG_uptodate, &(page)->flags); \
+ } while (0)
+#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags)
+
+#define PageDirty(page) test_bit(PG_dirty, &(page)->flags)
+#define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags)
+#define TestSetPageDirty(page) test_and_set_bit(PG_dirty, &(page)->flags)
+#define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags)
+#define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags)
+
+#define SetPageLRU(page) set_bit(PG_lru, &(page)->flags)
+#define PageLRU(page) test_bit(PG_lru, &(page)->flags)
+#define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags)
+#define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags)
+
+#define PageActive(page) test_bit(PG_active, &(page)->flags)
+#define SetPageActive(page) set_bit(PG_active, &(page)->flags)
+#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags)
+#define TestClearPageActive(page) test_and_clear_bit(PG_active, &(page)->flags)
+#define TestSetPageActive(page) test_and_set_bit(PG_active, &(page)->flags)
+
+#define PageSlab(page) test_bit(PG_slab, &(page)->flags)
+#define SetPageSlab(page) set_bit(PG_slab, &(page)->flags)
+#define ClearPageSlab(page) clear_bit(PG_slab, &(page)->flags)
+#define TestClearPageSlab(page) test_and_clear_bit(PG_slab, &(page)->flags)
+#define TestSetPageSlab(page) test_and_set_bit(PG_slab, &(page)->flags)
+
+#ifdef CONFIG_HIGHMEM
+#define PageHighMem(page) test_bit(PG_highmem, &(page)->flags)
+#else
+#define PageHighMem(page) 0 /* needed to optimize away at compile time */
+#endif
+
+#define PageChecked(page) test_bit(PG_checked, &(page)->flags)
+#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags)
+#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags)
+
+#define PageReserved(page) test_bit(PG_reserved, &(page)->flags)
+#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags)
+#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags)
+
+#define SetPagePrivate(page) set_bit(PG_private, &(page)->flags)
+#define ClearPagePrivate(page) clear_bit(PG_private, &(page)->flags)
+#define PagePrivate(page) test_bit(PG_private, &(page)->flags)
+
+#define PageWriteback(page) test_bit(PG_writeback, &(page)->flags)
+#define SetPageWriteback(page) \
+ do { \
+ if (!test_and_set_bit(PG_writeback, \
+ &(page)->flags)) \
+ inc_page_state(nr_writeback); \
+ } while (0)
+#define TestSetPageWriteback(page) \
+ ({ \
+ int ret; \
+ ret = test_and_set_bit(PG_writeback, \
+ &(page)->flags); \
+ if (!ret) \
+ inc_page_state(nr_writeback); \
+ ret; \
+ })
+#define ClearPageWriteback(page) \
+ do { \
+ if (test_and_clear_bit(PG_writeback, \
+ &(page)->flags)) \
+ dec_page_state(nr_writeback); \
+ } while (0)
+#define TestClearPageWriteback(page) \
+ ({ \
+ int ret; \
+ ret = test_and_clear_bit(PG_writeback, \
+ &(page)->flags); \
+ if (ret) \
+ dec_page_state(nr_writeback); \
+ ret; \
+ })
+
+#define PageNosave(page) test_bit(PG_nosave, &(page)->flags)
+#define SetPageNosave(page) set_bit(PG_nosave, &(page)->flags)
+#define TestSetPageNosave(page) test_and_set_bit(PG_nosave, &(page)->flags)
+#define ClearPageNosave(page) clear_bit(PG_nosave, &(page)->flags)
+#define TestClearPageNosave(page) test_and_clear_bit(PG_nosave, &(page)->flags)
+
+#define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags)
+#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
+#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
+
+#define PageReclaim(page) test_bit(PG_reclaim, &(page)->flags)
+#define SetPageReclaim(page) set_bit(PG_reclaim, &(page)->flags)
+#define ClearPageReclaim(page) clear_bit(PG_reclaim, &(page)->flags)
+#define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags)
+
+#define PageCompound(page) test_bit(PG_compound, &(page)->flags)
+#define SetPageCompound(page) set_bit(PG_compound, &(page)->flags)
+#define ClearPageCompound(page) clear_bit(PG_compound, &(page)->flags)
+
+#define PageAnon(page) test_bit(PG_anon, &(page)->flags)
+#define SetPageAnon(page) set_bit(PG_anon, &(page)->flags)
+#define ClearPageAnon(page) clear_bit(PG_anon, &(page)->flags)
+
+/* A foreign page uses a custom destructor rather than the buddy allocator. */
+#ifdef CONFIG_FOREIGN_PAGES
+#define PageForeign(page) test_bit(PG_foreign, &(page)->flags)
+#define SetPageForeign(page) set_bit(PG_foreign, &(page)->flags)
+#define ClearPageForeign(page) clear_bit(PG_foreign, &(page)->flags)
+#define PageForeignDestructor(page) \
+ ( (void (*) (struct page *)) (page)->mapping )
+#else
+#define PageForeign(page) 0
+#define PageForeignDestructor(page) void
+#endif
+
+#ifdef CONFIG_SWAP
+#define PageSwapCache(page) test_bit(PG_swapcache, &(page)->flags)
+#define SetPageSwapCache(page) set_bit(PG_swapcache, &(page)->flags)
+#define ClearPageSwapCache(page) clear_bit(PG_swapcache, &(page)->flags)
+#else
+#define PageSwapCache(page) 0
+#endif
+
+struct page; /* forward declaration */
+
+int test_clear_page_dirty(struct page *page);
+int __clear_page_dirty(struct page *page);
+int test_clear_page_writeback(struct page *page);
+int test_set_page_writeback(struct page *page);
+
+static inline void clear_page_dirty(struct page *page)
+{
+ test_clear_page_dirty(page);
+}
+
+static inline void set_page_writeback(struct page *page)
+{
+ test_set_page_writeback(page);
+}
+
+#endif /* PAGE_FLAGS_H */
diff --git a/linux-2.6.7-xen-sparse/include/linux/skbuff.h b/linux-2.6.7-xen-sparse/include/linux/skbuff.h
new file mode 100644
index 0000000000..37c4342b9b
--- /dev/null
+++ b/linux-2.6.7-xen-sparse/include/linux/skbuff.h
@@ -0,0 +1,1073 @@
+/*
+ * Definitions for the 'struct sk_buff' memory handlers.
+ *
+ * Authors:
+ * Alan Cox, <gw4pts@gw4pts.ampr.org>
+ * Florian La Roche, <rzsfl@rz.uni-sb.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_SKBUFF_H
+#define _LINUX_SKBUFF_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/time.h>
+#include <linux/cache.h>
+
+#include <asm/atomic.h>
+#include <asm/types.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/poll.h>
+#include <linux/net.h>
+
+#define HAVE_ALLOC_SKB /* For the drivers to know */
+#define HAVE_ALIGNABLE_SKB /* Ditto 8) */
+#define SLAB_SKB /* Slabified skbuffs */
+
+#define CHECKSUM_NONE 0
+#define CHECKSUM_HW 1
+#define CHECKSUM_UNNECESSARY 2
+
+#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \
+ ~(SMP_CACHE_BYTES - 1))
+#define SKB_MAX_ORDER(X, ORDER) (((PAGE_SIZE << (ORDER)) - (X) - \
+ sizeof(struct skb_shared_info)) & \
+ ~(SMP_CACHE_BYTES - 1))
+#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0))
+#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2))
+
+/* A. Checksumming of received packets by device.
+ *
+ * NONE: device failed to checksum this packet.
+ * skb->csum is undefined.
+ *
+ * UNNECESSARY: device parsed packet and wouldbe verified checksum.
+ * skb->csum is undefined.
+ * It is bad option, but, unfortunately, many of vendors do this.
+ * Apparently with secret goal to sell you new device, when you
+ * will add new protocol to your host. F.e. IPv6. 8)
+ *
+ * HW: the most generic way. Device supplied checksum of _all_
+ * the packet as seen by netif_rx in skb->csum.
+ * NOTE: Even if device supports only some protocols, but
+ * is able to produce some skb->csum, it MUST use HW,
+ * not UNNECESSARY.
+ *
+ * B. Checksumming on output.
+ *
+ * NONE: skb is checksummed by protocol or csum is not required.
+ *
+ * HW: device is required to csum packet as seen by hard_start_xmit
+ * from skb->h.raw to the end and to record the checksum
+ * at skb->h.raw+skb->csum.
+ *
+ * Device must show its capabilities in dev->features, set
+ * at device setup time.
+ * NETIF_F_HW_CSUM - it is clever device, it is able to checksum
+ * everything.
+ * NETIF_F_NO_CSUM - loopback or reliable single hop media.
+ * NETIF_F_IP_CSUM - device is dumb. It is able to csum only
+ * TCP/UDP over IPv4. Sigh. Vendors like this
+ * way by an unknown reason. Though, see comment above
+ * about CHECKSUM_UNNECESSARY. 8)
+ *
+ * Any questions? No questions, good. --ANK
+ */
+
+#ifdef __i386__
+#define NET_CALLER(arg) (*(((void **)&arg) - 1))
+#else
+#define NET_CALLER(arg) __builtin_return_address(0)
+#endif
+
+#ifdef CONFIG_NETFILTER
+struct nf_conntrack {
+ atomic_t use;
+ void (*destroy)(struct nf_conntrack *);
+};
+
+struct nf_ct_info {
+ struct nf_conntrack *master;
+};
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+struct nf_bridge_info {
+ atomic_t use;
+ struct net_device *physindev;
+ struct net_device *physoutdev;
+#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+ struct net_device *netoutdev;
+#endif
+ unsigned int mask;
+ unsigned long data[32 / sizeof(unsigned long)];
+};
+#endif
+
+#endif
+
+struct sk_buff_head {
+ /* These two members must be first. */
+ struct sk_buff *next;
+ struct sk_buff *prev;
+
+ __u32 qlen;
+ spinlock_t lock;
+};
+
+struct sk_buff;
+
+/* To allow 64K frame to be packed as single skb without frag_list */
+#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
+
+typedef struct skb_frag_struct skb_frag_t;
+
+struct skb_frag_struct {
+ struct page *page;
+ __u16 page_offset;
+ __u16 size;
+};
+
+/* This data is invariant across clones and lives at
+ * the end of the header data, ie. at skb->end.
+ */
+struct skb_shared_info {
+ atomic_t dataref;
+ unsigned int nr_frags;
+ unsigned short tso_size;
+ unsigned short tso_segs;
+ struct sk_buff *frag_list;
+ skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+/**
+ * struct sk_buff - socket buffer
+ * @next: Next buffer in list
+ * @prev: Previous buffer in list
+ * @list: List we are on
+ * @sk: Socket we are owned by
+ * @stamp: Time we arrived
+ * @dev: Device we arrived on/are leaving by
+ * @real_dev: The real device we are using
+ * @h: Transport layer header
+ * @nh: Network layer header
+ * @mac: Link layer header
+ * @dst: FIXME: Describe this field
+ * @cb: Control buffer. Free for use by every layer. Put private vars here
+ * @len: Length of actual data
+ * @data_len: Data length
+ * @mac_len: Length of link layer header
+ * @csum: Checksum
+ * @__unused: Dead field, may be reused
+ * @cloned: Head may be cloned (check refcnt to be sure)
+ * @pkt_type: Packet class
+ * @ip_summed: Driver fed us an IP checksum
+ * @priority: Packet queueing priority
+ * @users: User count - see {datagram,tcp}.c
+ * @protocol: Packet protocol from driver
+ * @security: Security level of packet
+ * @truesize: Buffer size
+ * @head: Head of buffer
+ * @data: Data head pointer
+ * @tail: Tail pointer
+ * @end: End pointer
+ * @destructor: Destruct function
+ * @nfmark: Can be used for communication between hooks
+ * @nfcache: Cache info
+ * @nfct: Associated connection, if any
+ * @nf_debug: Netfilter debugging
+ * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
+ * @private: Data which is private to the HIPPI implementation
+ * @tc_index: Traffic control index
+ */
+
+struct sk_buff {
+ /* These two members must be first. */
+ struct sk_buff *next;
+ struct sk_buff *prev;
+
+ struct sk_buff_head *list;
+ struct sock *sk;
+ struct timeval stamp;
+ struct net_device *dev;
+ struct net_device *real_dev;
+
+ union {
+ struct tcphdr *th;
+ struct udphdr *uh;
+ struct icmphdr *icmph;
+ struct igmphdr *igmph;
+ struct iphdr *ipiph;
+ struct ipv6hdr *ipv6h;
+ unsigned char *raw;
+ } h;
+
+ union {
+ struct iphdr *iph;
+ struct ipv6hdr *ipv6h;
+ struct arphdr *arph;
+ unsigned char *raw;
+ } nh;
+
+ union {
+ struct ethhdr *ethernet;
+ unsigned char *raw;
+ } mac;
+
+ struct dst_entry *dst;
+ struct sec_path *sp;
+
+ /*
+ * This is the control buffer. It is free to use for every
+ * layer. Please put your private variables there. If you
+ * want to keep them across layers you have to do a skb_clone()
+ * first. This is owned by whoever has the skb queued ATM.
+ */
+ char cb[48];
+
+ unsigned int len,
+ data_len,
+ mac_len,
+ csum;
+ unsigned char local_df,
+ cloned,
+ pkt_type,
+ ip_summed;
+ __u32 priority;
+ unsigned short protocol,
+ security;
+
+ void (*destructor)(struct sk_buff *skb);
+#ifdef CONFIG_NETFILTER
+ unsigned long nfmark;
+ __u32 nfcache;
+ struct nf_ct_info *nfct;
+#ifdef CONFIG_NETFILTER_DEBUG
+ unsigned int nf_debug;
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+ struct nf_bridge_info *nf_bridge;
+#endif
+#endif /* CONFIG_NETFILTER */
+#if defined(CONFIG_HIPPI)
+ union {
+ __u32 ifield;
+ } private;
+#endif
+#ifdef CONFIG_NET_SCHED
+ __u32 tc_index; /* traffic control index */
+#endif
+
+ /* These elements must be at the end, see alloc_skb() for details. */
+ unsigned int truesize;
+ atomic_t users;
+ unsigned char *head,
+ *data,
+ *tail,
+ *end;
+};
+
+#ifdef __KERNEL__
+/*
+ * Handling routines are only of interest to the kernel
+ */
+#include <linux/slab.h>
+
+#include <asm/system.h>
+
+extern void __kfree_skb(struct sk_buff *skb);
+extern struct sk_buff *alloc_skb(unsigned int size, int priority);
+extern void kfree_skbmem(struct sk_buff *skb);
+extern struct sk_buff *skb_clone(struct sk_buff *skb, int priority);
+extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority);
+extern struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask);
+extern int pskb_expand_head(struct sk_buff *skb,
+ int nhead, int ntail, int gfp_mask);
+extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
+ unsigned int headroom);
+extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
+ int newheadroom, int newtailroom,
+ int priority);
+extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad);
+#define dev_kfree_skb(a) kfree_skb(a)
+extern void skb_over_panic(struct sk_buff *skb, int len,
+ void *here);
+extern void skb_under_panic(struct sk_buff *skb, int len,
+ void *here);
+
+/* Internal */
+#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end))
+
+/**
+ * skb_queue_empty - check if a queue is empty
+ * @list: queue head
+ *
+ * Returns true if the queue is empty, false otherwise.
+ */
+static inline int skb_queue_empty(const struct sk_buff_head *list)
+{
+ return list->next == (struct sk_buff *)list;
+}
+
+/**
+ * skb_get - reference buffer
+ * @skb: buffer to reference
+ *
+ * Makes another reference to a socket buffer and returns a pointer
+ * to the buffer.
+ */
+static inline struct sk_buff *skb_get(struct sk_buff *skb)
+{
+ atomic_inc(&skb->users);
+ return skb;
+}
+
+/*
+ * If users == 1, we are the only owner and are can avoid redundant
+ * atomic change.
+ */
+
+/**
+ * kfree_skb - free an sk_buff
+ * @skb: buffer to free
+ *
+ * Drop a reference to the buffer and free it if the usage count has
+ * hit zero.
+ */
+static inline void kfree_skb(struct sk_buff *skb)
+{
+ if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
+ __kfree_skb(skb);
+}
+
+/* Use this if you didn't touch the skb state [for fast switching] */
+static inline void kfree_skb_fast(struct sk_buff *skb)
+{
+ if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
+ kfree_skbmem(skb);
+}
+
+/**
+ * skb_cloned - is the buffer a clone
+ * @skb: buffer to check
+ *
+ * Returns true if the buffer was generated with skb_clone() and is
+ * one of multiple shared copies of the buffer. Cloned buffers are
+ * shared data so must not be written to under normal circumstances.
+ */
+static inline int skb_cloned(const struct sk_buff *skb)
+{
+ return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
+}
+
+/**
+ * skb_shared - is the buffer shared
+ * @skb: buffer to check
+ *
+ * Returns true if more than one person has a reference to this
+ * buffer.
+ */
+static inline int skb_shared(const struct sk_buff *skb)
+{
+ return atomic_read(&skb->users) != 1;
+}
+
+/**
+ * skb_share_check - check if buffer is shared and if so clone it
+ * @skb: buffer to check
+ * @pri: priority for memory allocation
+ *
+ * If the buffer is shared the buffer is cloned and the old copy
+ * drops a reference. A new clone with a single reference is returned.
+ * If the buffer is not shared the original buffer is returned. When
+ * being called from interrupt status or with spinlocks held pri must
+ * be GFP_ATOMIC.
+ *
+ * NULL is returned on a memory allocation failure.
+ */
+static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
+{
+ might_sleep_if(pri & __GFP_WAIT);
+ if (skb_shared(skb)) {
+ struct sk_buff *nskb = skb_clone(skb, pri);
+ kfree_skb(skb);
+ skb = nskb;
+ }
+ return skb;
+}
+
+/*
+ * Copy shared buffers into a new sk_buff. We effectively do COW on
+ * packets to handle cases where we have a local reader and forward
+ * and a couple of other messy ones. The normal one is tcpdumping
+ * a packet thats being forwarded.
+ */
+
+/**
+ * skb_unshare - make a copy of a shared buffer
+ * @skb: buffer to check
+ * @pri: priority for memory allocation
+ *
+ * If the socket buffer is a clone then this function creates a new
+ * copy of the data, drops a reference count on the old copy and returns
+ * the new copy with the reference count at 1. If the buffer is not a clone
+ * the original buffer is returned. When called with a spinlock held or
+ * from interrupt state @pri must be %GFP_ATOMIC
+ *
+ * %NULL is returned on a memory allocation failure.
+ */
+static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
+{
+ might_sleep_if(pri & __GFP_WAIT);
+ if (skb_cloned(skb)) {
+ struct sk_buff *nskb = skb_copy(skb, pri);
+ kfree_skb(skb); /* Free our shared copy */
+ skb = nskb;
+ }
+ return skb;
+}
+
+/**
+ * skb_peek
+ * @list_: list to peek at
+ *
+ * Peek an &sk_buff. Unlike most other operations you _MUST_
+ * be careful with this one. A peek leaves the buffer on the
+ * list and someone else may run off with it. You must hold
+ * the appropriate locks or have a private queue to do this.
+ *
+ * Returns %NULL for an empty list or a pointer to the head element.
+ * The reference count is not incremented and the reference is therefore
+ * volatile. Use with caution.
+ */
+static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
+{
+ struct sk_buff *list = ((struct sk_buff *)list_)->next;
+ if (list == (struct sk_buff *)list_)
+ list = NULL;
+ return list;
+}
+
+/**
+ * skb_peek_tail
+ * @list_: list to peek at
+ *
+ * Peek an &sk_buff. Unlike most other operations you _MUST_
+ * be careful with this one. A peek leaves the buffer on the
+ * list and someone else may run off with it. You must hold
+ * the appropriate locks or have a private queue to do this.
+ *
+ * Returns %NULL for an empty list or a pointer to the tail element.
+ * The reference count is not incremented and the reference is therefore
+ * volatile. Use with caution.
+ */
+static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
+{
+ struct sk_buff *list = ((struct sk_buff *)list_)->prev;
+ if (list == (struct sk_buff *)list_)
+ list = NULL;
+ return list;
+}
+
+/**
+ * skb_queue_len - get queue length
+ * @list_: list to measure
+ *
+ * Return the length of an &sk_buff queue.
+ */
+static inline __u32 skb_queue_len(const struct sk_buff_head *list_)
+{
+ return list_->qlen;
+}
+
+static inline void skb_queue_head_init(struct sk_buff_head *list)
+{
+ spin_lock_init(&list->lock);
+ list->prev = list->next = (struct sk_buff *)list;
+ list->qlen = 0;
+}
+
+/*
+ * Insert an sk_buff at the start of a list.
+ *
+ * The "__skb_xxxx()" functions are the non-atomic ones that
+ * can only be called with interrupts disabled.
+ */
+
+/**
+ * __skb_queue_head - queue a buffer at the list head
+ * @list: list to use
+ * @newsk: buffer to queue
+ *
+ * Queue a buffer at the start of a list. This function takes no locks
+ * and you must therefore hold required locks before calling it.
+ *
+ * A buffer cannot be placed on two lists at the same time.
+ */
+extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
+static inline void __skb_queue_head(struct sk_buff_head *list,
+ struct sk_buff *newsk)
+{
+ struct sk_buff *prev, *next;
+
+ newsk->list = list;
+ list->qlen++;
+ prev = (struct sk_buff *)list;
+ next = prev->next;
+ newsk->next = next;
+ newsk->prev = prev;
+ next->prev = prev->next = newsk;
+}
+
+/**
+ * __skb_queue_tail - queue a buffer at the list tail
+ * @list: list to use
+ * @newsk: buffer to queue
+ *
+ * Queue a buffer at the end of a list. This function takes no locks
+ * and you must therefore hold required locks before calling it.
+ *
+ * A buffer cannot be placed on two lists at the same time.
+ */
+extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
+static inline void __skb_queue_tail(struct sk_buff_head *list,
+ struct sk_buff *newsk)
+{
+ struct sk_buff *prev, *next;
+
+ newsk->list = list;
+ list->qlen++;
+ next = (struct sk_buff *)list;
+ prev = next->prev;
+ newsk->next = next;
+ newsk->prev = prev;
+ next->prev = prev->next = newsk;
+}
+
+
+/**
+ * __skb_dequeue - remove from the head of the queue
+ * @list: list to dequeue from
+ *
+ * Remove the head of the list. This function does not take any locks
+ * so must be used with appropriate locks held only. The head item is
+ * returned or %NULL if the list is empty.
+ */
+extern struct sk_buff *skb_dequeue(struct sk_buff_head *list);
+static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
+{
+ struct sk_buff *next, *prev, *result;
+
+ prev = (struct sk_buff *) list;
+ next = prev->next;
+ result = NULL;
+ if (next != prev) {
+ result = next;
+ next = next->next;
+ list->qlen--;
+ next->prev = prev;
+ prev->next = next;
+ result->next = result->prev = NULL;
+ result->list = NULL;
+ }
+ return result;
+}
+
+
+/*
+ * Insert a packet on a list.
+ */
+extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk);
+static inline void __skb_insert(struct sk_buff *newsk,
+ struct sk_buff *prev, struct sk_buff *next,
+ struct sk_buff_head *list)
+{
+ newsk->next = next;
+ newsk->prev = prev;
+ next->prev = prev->next = newsk;
+ newsk->list = list;
+ list->qlen++;
+}
+
+/*
+ * Place a packet after a given packet in a list.
+ */
+extern void skb_append(struct sk_buff *old, struct sk_buff *newsk);
+static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
+{
+ __skb_insert(newsk, old, old->next, old->list);
+}
+
+/*
+ * remove sk_buff from list. _Must_ be called atomically, and with
+ * the list known..
+ */
+extern void skb_unlink(struct sk_buff *skb);
+static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
+{
+ struct sk_buff *next, *prev;
+
+ list->qlen--;
+ next = skb->next;
+ prev = skb->prev;
+ skb->next = skb->prev = NULL;
+ skb->list = NULL;
+ next->prev = prev;
+ prev->next = next;
+}
+
+
+/* XXX: more streamlined implementation */
+
+/**
+ * __skb_dequeue_tail - remove from the tail of the queue
+ * @list: list to dequeue from
+ *
+ * Remove the tail of the list. This function does not take any locks
+ * so must be used with appropriate locks held only. The tail item is
+ * returned or %NULL if the list is empty.
+ */
+extern struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
+static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
+{
+ struct sk_buff *skb = skb_peek_tail(list);
+ if (skb)
+ __skb_unlink(skb, list);
+ return skb;
+}
+
+
+static inline int skb_is_nonlinear(const struct sk_buff *skb)
+{
+ return skb->data_len;
+}
+
+static inline unsigned int skb_headlen(const struct sk_buff *skb)
+{
+ return skb->len - skb->data_len;
+}
+
+static inline int skb_pagelen(const struct sk_buff *skb)
+{
+ int i, len = 0;
+
+ for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--)
+ len += skb_shinfo(skb)->frags[i].size;
+ return len + skb_headlen(skb);
+}
+
+static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ frag->page = page;
+ frag->page_offset = off;
+ frag->size = size;
+ skb_shinfo(skb)->nr_frags = i+1;
+}
+
+#define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags)
+#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list)
+#define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb))
+
+/*
+ * Add data to an sk_buff
+ */
+static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
+{
+ unsigned char *tmp = skb->tail;
+ SKB_LINEAR_ASSERT(skb);
+ skb->tail += len;
+ skb->len += len;
+ return tmp;
+}
+
+/**
+ * skb_put - add data to a buffer
+ * @skb: buffer to use
+ * @len: amount of data to add
+ *
+ * This function extends the used data area of the buffer. If this would
+ * exceed the total buffer size the kernel will panic. A pointer to the
+ * first byte of the extra data is returned.
+ */
+static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
+{
+ unsigned char *tmp = skb->tail;
+ SKB_LINEAR_ASSERT(skb);
+ skb->tail += len;
+ skb->len += len;
+ if (unlikely(skb->tail>skb->end))
+ skb_over_panic(skb, len, current_text_addr());
+ return tmp;
+}
+
+static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
+{
+ skb->data -= len;
+ skb->len += len;
+ return skb->data;
+}
+
+/**
+ * skb_push - add data to the start of a buffer
+ * @skb: buffer to use
+ * @len: amount of data to add
+ *
+ * This function extends the used data area of the buffer at the buffer
+ * start. If this would exceed the total buffer headroom the kernel will
+ * panic. A pointer to the first byte of the extra data is returned.
+ */
+static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
+{
+ skb->data -= len;
+ skb->len += len;
+ if (unlikely(skb->data<skb->head))
+ skb_under_panic(skb, len, current_text_addr());
+ return skb->data;
+}
+
+static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len)
+{
+ skb->len -= len;
+ BUG_ON(skb->len < skb->data_len);
+ return skb->data += len;
+}
+
+/**
+ * skb_pull - remove data from the start of a buffer
+ * @skb: buffer to use
+ * @len: amount of data to remove
+ *
+ * This function removes data from the start of a buffer, returning
+ * the memory to the headroom. A pointer to the next data in the buffer
+ * is returned. Once the data has been pulled future pushes will overwrite
+ * the old data.
+ */
+static inline unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
+{
+ return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
+}
+
+extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
+
+static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len)
+{
+ if (len > skb_headlen(skb) &&
+ !__pskb_pull_tail(skb, len-skb_headlen(skb)))
+ return NULL;
+ skb->len -= len;
+ return skb->data += len;
+}
+
+static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len)
+{
+ return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len);
+}
+
+static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
+{
+ if (likely(len <= skb_headlen(skb)))
+ return 1;
+ if (unlikely(len > skb->len))
+ return 0;
+ return __pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL;
+}
+
+/**
+ * skb_headroom - bytes at buffer head
+ * @skb: buffer to check
+ *
+ * Return the number of bytes of free space at the head of an &sk_buff.
+ */
+static inline int skb_headroom(const struct sk_buff *skb)
+{
+ return skb->data - skb->head;
+}
+
+/**
+ * skb_tailroom - bytes at buffer end
+ * @skb: buffer to check
+ *
+ * Return the number of bytes of free space at the tail of an sk_buff
+ */
+static inline int skb_tailroom(const struct sk_buff *skb)
+{
+ return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail;
+}
+
+/**
+ * skb_reserve - adjust headroom
+ * @skb: buffer to alter
+ * @len: bytes to move
+ *
+ * Increase the headroom of an empty &sk_buff by reducing the tail
+ * room. This is only allowed for an empty buffer.
+ */
+static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
+{
+ skb->data += len;
+ skb->tail += len;
+}
+
+extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
+
+static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+{
+ if (!skb->data_len) {
+ skb->len = len;
+ skb->tail = skb->data + len;
+ } else
+ ___pskb_trim(skb, len, 0);
+}
+
+/**
+ * skb_trim - remove end from a buffer
+ * @skb: buffer to alter
+ * @len: new length
+ *
+ * Cut the length of a buffer down by removing data from the tail. If
+ * the buffer is already under the length specified it is not modified.
+ */
+static inline void skb_trim(struct sk_buff *skb, unsigned int len)
+{
+ if (skb->len > len)
+ __skb_trim(skb, len);
+}
+
+
+static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
+{
+ if (!skb->data_len) {
+ skb->len = len;
+ skb->tail = skb->data+len;
+ return 0;
+ }
+ return ___pskb_trim(skb, len, 1);
+}
+
+static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
+{
+ return (len < skb->len) ? __pskb_trim(skb, len) : 0;
+}
+
+/**
+ * skb_orphan - orphan a buffer
+ * @skb: buffer to orphan
+ *
+ * If a buffer currently has an owner then we call the owner's
+ * destructor function and make the @skb unowned. The buffer continues
+ * to exist but is no longer charged to its former owner.
+ */
+static inline void skb_orphan(struct sk_buff *skb)
+{
+ if (skb->destructor)
+ skb->destructor(skb);
+ skb->destructor = NULL;
+ skb->sk = NULL;
+}
+
+/**
+ * __skb_queue_purge - empty a list
+ * @list: list to empty
+ *
+ * Delete all buffers on an &sk_buff list. Each buffer is removed from
+ * the list and one reference dropped. This function does not take the
+ * list lock and the caller must hold the relevant locks to use it.
+ */
+extern void skb_queue_purge(struct sk_buff_head *list);
+static inline void __skb_queue_purge(struct sk_buff_head *list)
+{
+ struct sk_buff *skb;
+ while ((skb = __skb_dequeue(list)) != NULL)
+ kfree_skb(skb);
+}
+
+/**
+ * __dev_alloc_skb - allocate an skbuff for sending
+ * @length: length to allocate
+ * @gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ * Allocate a new &sk_buff and assign it a usage count of one. The
+ * buffer has unspecified headroom built in. Users should allocate
+ * the headroom they think they need without accounting for the
+ * built in space. The built in space is used for optimisations.
+ *
+ * %NULL is returned in there is no free memory.
+ */
+static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
+ int gfp_mask)
+{
+ struct sk_buff *skb;
+#ifdef CONFIG_PAGESIZED_SKBS
+ length = max(length, (unsigned int)(PAGE_SIZE - 16));
+#endif
+ skb = alloc_skb(length + 16, gfp_mask);
+ if (likely(skb))
+ skb_reserve(skb, 16);
+ return skb;
+}
+
+/**
+ * dev_alloc_skb - allocate an skbuff for sending
+ * @length: length to allocate
+ *
+ * Allocate a new &sk_buff and assign it a usage count of one. The
+ * buffer has unspecified headroom built in. Users should allocate
+ * the headroom they think they need without accounting for the
+ * built in space. The built in space is used for optimisations.
+ *
+ * %NULL is returned in there is no free memory. Although this function
+ * allocates memory it can be called from an interrupt.
+ */
+static inline struct sk_buff *dev_alloc_skb(unsigned int length)
+{
+ return __dev_alloc_skb(length, GFP_ATOMIC);
+}
+
+/**
+ * skb_cow - copy header of skb when it is required
+ * @skb: buffer to cow
+ * @headroom: needed headroom
+ *
+ * If the skb passed lacks sufficient headroom or its data part
+ * is shared, data is reallocated. If reallocation fails, an error
+ * is returned and original skb is not changed.
+ *
+ * The result is skb with writable area skb->head...skb->tail
+ * and at least @headroom of space at head.
+ */
+static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
+{
+ int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
+
+ if (delta < 0)
+ delta = 0;
+
+ if (delta || skb_cloned(skb))
+ return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC);
+ return 0;
+}
+
+/**
+ * skb_padto - pad an skbuff up to a minimal size
+ * @skb: buffer to pad
+ * @len: minimal length
+ *
+ * Pads up a buffer to ensure the trailing bytes exist and are
+ * blanked. If the buffer already contains sufficient data it
+ * is untouched. Returns the buffer, which may be a replacement
+ * for the original, or NULL for out of memory - in which case
+ * the original buffer is still freed.
+ */
+
+static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len)
+{
+ unsigned int size = skb->len;
+ if (likely(size >= len))
+ return skb;
+ return skb_pad(skb, len-size);
+}
+
+/**
+ * skb_linearize - convert paged skb to linear one
+ * @skb: buffer to linarize
+ * @gfp: allocation mode
+ *
+ * If there is no free memory -ENOMEM is returned, otherwise zero
+ * is returned and the old skb data released.
+ */
+extern int __skb_linearize(struct sk_buff *skb, int gfp);
+static inline int skb_linearize(struct sk_buff *skb, int gfp)
+{
+ return __skb_linearize(skb, gfp);
+}
+
+static inline void *kmap_skb_frag(const skb_frag_t *frag)
+{
+#ifdef CONFIG_HIGHMEM
+ BUG_ON(in_irq());
+
+ local_bh_disable();
+#endif
+ return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
+}
+
+static inline void kunmap_skb_frag(void *vaddr)
+{
+ kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
+#ifdef CONFIG_HIGHMEM
+ local_bh_enable();
+#endif
+}
+
+#define skb_queue_walk(queue, skb) \
+ for (skb = (queue)->next, prefetch(skb->next); \
+ (skb != (struct sk_buff *)(queue)); \
+ skb = skb->next, prefetch(skb->next))
+
+
+extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
+ int noblock, int *err);
+extern unsigned int datagram_poll(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait);
+extern int skb_copy_datagram(const struct sk_buff *from,
+ int offset, char __user *to, int size);
+extern int skb_copy_datagram_iovec(const struct sk_buff *from,
+ int offset, struct iovec *to,
+ int size);
+extern int skb_copy_and_csum_datagram(const struct sk_buff *skb,
+ int offset, u8 __user *to,
+ int len, unsigned int *csump);
+extern int skb_copy_and_csum_datagram_iovec(const
+ struct sk_buff *skb,
+ int hlen,
+ struct iovec *iov);
+extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
+extern unsigned int skb_checksum(const struct sk_buff *skb, int offset,
+ int len, unsigned int csum);
+extern int skb_copy_bits(const struct sk_buff *skb, int offset,
+ void *to, int len);
+extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb,
+ int offset, u8 *to, int len,
+ unsigned int csum);
+extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
+
+extern void skb_init(void);
+extern void skb_add_mtu(int mtu);
+
+#ifdef CONFIG_NETFILTER
+static inline void nf_conntrack_put(struct nf_ct_info *nfct)
+{
+ if (nfct && atomic_dec_and_test(&nfct->master->use))
+ nfct->master->destroy(nfct->master);
+}
+static inline void nf_conntrack_get(struct nf_ct_info *nfct)
+{
+ if (nfct)
+ atomic_inc(&nfct->master->use);
+}
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
+{
+ if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
+ kfree(nf_bridge);
+}
+static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
+{
+ if (nf_bridge)
+ atomic_inc(&nf_bridge->use);
+}
+#endif
+
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_SKBUFF_H */
diff --git a/linux-2.6.7-xen-sparse/mm/page_alloc.c b/linux-2.6.7-xen-sparse/mm/page_alloc.c
index 5d9b765d39..6671262ae0 100644
--- a/linux-2.6.7-xen-sparse/mm/page_alloc.c
+++ b/linux-2.6.7-xen-sparse/mm/page_alloc.c
@@ -497,9 +497,8 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
struct per_cpu_pages *pcp;
unsigned long flags;
- /* XXX Xen: use mapping pointer as skb/data-page destructor */
- if (page->mapping)
- return (*(void(*)(struct page *))page->mapping)(page);
+ if (PageForeign(page))
+ return (PageForeignDestructor(page))(page);
kernel_map_pages(page, 1, 0);
inc_page_state(pgfree);