diff options
Diffstat (limited to 'linux-2.4.28-xen-sparse/include')
29 files changed, 7814 insertions, 0 deletions
diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/bugs.h b/linux-2.4.28-xen-sparse/include/asm-xen/bugs.h new file mode 100644 index 0000000000..c46b6a0b15 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/bugs.h @@ -0,0 +1,53 @@ +/* + * include/asm-i386/bugs.h + * + * Copyright (C) 1994 Linus Torvalds + * + * Cyrix stuff, June 1998 by: + * - Rafael R. Reilova (moved everything from head.S), + * <rreilova@ececs.uc.edu> + * - Channing Corn (tests & fixes), + * - Andrew D. Balsa (code cleanup). + * + * Pentium III FXSR, SSE support + * Gareth Hughes <gareth@valinux.com>, May 2000 + */ + +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Needs: + * void check_bugs(void); + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/i387.h> +#include <asm/msr.h> + + +static void __init check_fpu(void) +{ + boot_cpu_data.fdiv_bug = 0; +} + +static void __init check_hlt(void) +{ + boot_cpu_data.hlt_works_ok = 1; +} + +static void __init check_bugs(void) +{ + extern void __init boot_init_fpu(void); + + identify_cpu(&boot_cpu_data); + boot_init_fpu(); +#ifndef CONFIG_SMP + printk("CPU: "); + print_cpu_info(&boot_cpu_data); +#endif + check_fpu(); + check_hlt(); + system_utsname.machine[1] = '0' + + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); +} diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/desc.h b/linux-2.4.28-xen-sparse/include/asm-xen/desc.h new file mode 100644 index 0000000000..33309a9671 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/desc.h @@ -0,0 +1,41 @@ +#ifndef __ARCH_DESC_H +#define __ARCH_DESC_H + +#include <asm/ldt.h> + +#ifndef __ASSEMBLY__ + +struct desc_struct { + unsigned long a,b; +}; + +struct Xgt_desc_struct { + unsigned short size; + unsigned long address __attribute__((packed)); +}; + +extern struct desc_struct default_ldt[]; + +static inline void clear_LDT(void) +{ + /* + * NB. We load the default_ldt for lcall7/27 handling on demand, as + * it slows down context switching. Noone uses it anyway. + */ + queue_set_ldt(0, 0); +} + +static inline void load_LDT(mm_context_t *pc) +{ + void *segments = pc->ldt; + int count = pc->size; + + if ( count == 0 ) + segments = NULL; + + queue_set_ldt((unsigned long)segments, count); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __ARCH_DESC_H__ */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/fixmap.h b/linux-2.4.28-xen-sparse/include/asm-xen/fixmap.h new file mode 100644 index 0000000000..bc6e2c2004 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/fixmap.h @@ -0,0 +1,105 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + */ + +#ifndef _ASM_FIXMAP_H +#define _ASM_FIXMAP_H + +#include <linux/config.h> +#include <linux/kernel.h> +#include <asm/apicdef.h> +#include <asm/page.h> +#ifdef CONFIG_HIGHMEM +#include <linux/threads.h> +#include <asm/kmap_types.h> +#endif + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * these 'compile-time allocated' memory buffers are + * fixed-size 4k pages. (or larger if used with an increment + * highger than 1) use fixmap_set(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ + +enum fixed_addresses { +#ifdef CONFIG_HIGHMEM + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#endif + FIX_BLKRING_BASE, + FIX_NETRING0_BASE, + FIX_NETRING1_BASE, + FIX_NETRING2_BASE, + FIX_NETRING3_BASE, + FIX_SHARED_INFO, + FIX_GNTTAB, +#ifdef CONFIG_VGA_CONSOLE +#define NR_FIX_BTMAPS 32 /* 128KB For the Dom0 VGA Console A0000-C0000 */ +#else +#define NR_FIX_BTMAPS 1 /* in case anyone wants it in future... */ +#endif + FIX_BTMAP_END, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1, + /* our bt_ioremap is permanent, unlike other architectures */ + + __end_of_permanent_fixed_addresses, + __end_of_fixed_addresses = __end_of_permanent_fixed_addresses +}; + +extern void __set_fixmap (enum fixed_addresses idx, + unsigned long phys, pgprot_t flags); + +#define set_fixmap(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL) +/* + * Some hardware wants to get fixmapped without caching. + */ +#define set_fixmap_nocache(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) + +extern void clear_fixmap(enum fixed_addresses idx); + +/* + * used by vmalloc.c. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap, and leave one page empty + * at the top of mem.. + */ +#define FIXADDR_TOP (HYPERVISOR_VIRT_START - 2*PAGE_SIZE) +#define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) + +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) + +/* + * 'index to address' translation. If anyone tries to use the idx + * directly without tranlation, we catch the bug with a NULL-deference + * kernel oops. Illegal ranges of incoming indices are caught too. + */ +static inline unsigned long fix_to_virt(unsigned int idx) +{ + return __fix_to_virt(idx); +} + +#endif diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/highmem.h b/linux-2.4.28-xen-sparse/include/asm-xen/highmem.h new file mode 100644 index 0000000000..25ef32882c --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/highmem.h @@ -0,0 +1,132 @@ +/* + * highmem.h: virtual kernel memory mappings for high memory + * + * Used in CONFIG_HIGHMEM systems for memory pages which + * are not addressable by direct kernel virtual addresses. + * + * Copyright (C) 1999 Gerhard Wichert, Siemens AG + * Gerhard.Wichert@pdb.siemens.de + * + * + * Redesigned the x86 32-bit VM architecture to deal with + * up to 16 Terabyte physical memory. With current x86 CPUs + * we now support up to 64 Gigabytes physical RAM. + * + * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> + */ + +#ifndef _ASM_HIGHMEM_H +#define _ASM_HIGHMEM_H + +#ifdef __KERNEL__ + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <asm/kmap_types.h> +#include <asm/pgtable.h> + +#ifdef CONFIG_DEBUG_HIGHMEM +#define HIGHMEM_DEBUG 1 +#else +#define HIGHMEM_DEBUG 0 +#endif + +/* declarations for highmem.c */ +extern unsigned long highstart_pfn, highend_pfn; + +extern pte_t *kmap_pte; +extern pgprot_t kmap_prot; +extern pte_t *pkmap_page_table; + +extern void kmap_init(void) __init; + +/* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical + * chunk of RAM. + */ +#define PKMAP_BASE (HYPERVISOR_VIRT_START - (1<<23)) +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif +#define LAST_PKMAP_MASK (LAST_PKMAP-1) +#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) + +extern void * FASTCALL(kmap_high(struct page *page, int nonblocking)); +extern void FASTCALL(kunmap_high(struct page *page)); + +#define kmap(page) __kmap(page, 0) +#define kmap_nonblock(page) __kmap(page, 1) + +static inline void *__kmap(struct page *page, int nonblocking) +{ + if (in_interrupt()) + out_of_line_bug(); + if (page < highmem_start_page) + return page_address(page); + return kmap_high(page, nonblocking); +} + +static inline void kunmap(struct page *page) +{ + if (in_interrupt()) + out_of_line_bug(); + if (page < highmem_start_page) + return; + kunmap_high(page); +} + +/* + * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap + * gives a more generic (and caching) interface. But kmap_atomic can + * be used in IRQ contexts, so in some (very limited) cases we need + * it. + */ +static inline void *kmap_atomic(struct page *page, enum km_type type) +{ + enum fixed_addresses idx; + unsigned long vaddr; + + if (page < highmem_start_page) + return page_address(page); + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +#if HIGHMEM_DEBUG + if (!pte_none(*(kmap_pte-idx))) + out_of_line_bug(); +#endif + set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); + __flush_tlb_one(vaddr); + + return (void*) vaddr; +} + +static inline void kunmap_atomic(void *kvaddr, enum km_type type) +{ +#if HIGHMEM_DEBUG + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + + if (vaddr < FIXADDR_START) // FIXME + return; + + if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) + out_of_line_bug(); + + /* + * force other mappings to Oops if they'll try to access + * this pte without first remap it + */ + pte_clear(kmap_pte-idx); + __flush_tlb_one(vaddr); +#endif +} + +#endif /* __KERNEL__ */ + +#endif /* _ASM_HIGHMEM_H */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/hw_irq.h b/linux-2.4.28-xen-sparse/include/asm-xen/hw_irq.h new file mode 100644 index 0000000000..d99d15bd24 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/hw_irq.h @@ -0,0 +1,61 @@ +#ifndef _ASM_HW_IRQ_H +#define _ASM_HW_IRQ_H + +/* + * linux/include/asm/hw_irq.h + * + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + */ + +#include <linux/config.h> +#include <linux/smp.h> +#include <asm/atomic.h> +#include <asm/irq.h> + +#define SYSCALL_VECTOR 0x80 + +extern int irq_vector[NR_IRQS]; + +extern atomic_t irq_err_count; +extern atomic_t irq_mis_count; + +extern char _stext, _etext; + +extern unsigned long prof_cpu_mask; +extern unsigned int * prof_buffer; +extern unsigned long prof_len; +extern unsigned long prof_shift; + +/* + * x86 profiling function, SMP safe. We might want to do this in + * assembly totally? + */ +static inline void x86_do_profile (unsigned long eip) +{ + if (!prof_buffer) + return; + + /* + * Only measure the CPUs specified by /proc/irq/prof_cpu_mask. + * (default is all CPUs.) + */ + if (!((1<<smp_processor_id()) & prof_cpu_mask)) + return; + + eip -= (unsigned long) &_stext; + eip >>= prof_shift; + /* + * Don't ignore out-of-bounds EIP values silently, + * put them into the last histogram slot, so if + * present, they will show up as a sharp peak. + */ + if (eip > prof_len-1) + eip = prof_len-1; + atomic_inc((atomic_t *)&prof_buffer[eip]); +} + +static inline void hw_resend_irq(struct hw_interrupt_type *h, + unsigned int i) +{} + +#endif /* _ASM_HW_IRQ_H */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/io.h b/linux-2.4.28-xen-sparse/include/asm-xen/io.h new file mode 100644 index 0000000000..3ea2752635 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/io.h @@ -0,0 +1,457 @@ +#ifndef _ASM_IO_H +#define _ASM_IO_H + +#include <linux/config.h> + +/* + * This file contains the definitions for the x86 IO instructions + * inb/inw/inl/outb/outw/outl and the "string versions" of the same + * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" + * versions of the single-IO instructions (inb_p/inw_p/..). + * + * This file is not meant to be obfuscating: it's just complicated + * to (a) handle it all in a way that makes gcc able to optimize it + * as well as possible and (b) trying to avoid writing the same thing + * over and over again with slight variations and possibly making a + * mistake somewhere. + */ + +/* + * Thanks to James van Artsdalen for a better timing-fix than + * the two short jumps: using outb's to a nonexistent port seems + * to guarantee better timings even on fast machines. + * + * On the other hand, I'd like to be sure of a non-existent port: + * I feel a bit unsafe about using 0x80 (should be safe, though) + * + * Linus + */ + + /* + * Bit simplified and optimized by Jan Hubicka + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. + * + * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, + * isa_read[wl] and isa_write[wl] fixed + * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> + */ + +#define IO_SPACE_LIMIT 0xffff + +#define XQUAD_PORTIO_BASE 0xfe400000 +#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ +#define XQUAD_PORTIO_LEN 0x80000 /* Only remapping first 2 quads */ + +#ifdef __KERNEL__ + +#include <linux/vmalloc.h> + +/* + * Temporary debugging check to catch old code using + * unmapped ISA addresses. Will be removed in 2.4. + */ +#if CONFIG_DEBUG_IOVIRT + extern void *__io_virt_debug(unsigned long x, const char *file, int line); + extern unsigned long __io_phys_debug(unsigned long x, const char *file, int line); + #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__) +//#define __io_phys(x) __io_phys_debug((unsigned long)(x), __FILE__, __LINE__) +#else + #define __io_virt(x) ((void *)(x)) +//#define __io_phys(x) __pa(x) +#endif + +/** + * virt_to_phys - map virtual addresses to physical + * @address: address to remap + * + * The returned physical address is the physical (CPU) mapping for + * the memory address given. It is only valid to use this function on + * addresses directly mapped or allocated via kmalloc. + * + * This function does not give bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline unsigned long virt_to_phys(volatile void * address) +{ + return __pa(address); +} + +/** + * phys_to_virt - map physical address to virtual + * @address: address to remap + * + * The returned virtual address is a current CPU mapping for + * the memory address given. It is only valid to use this function on + * addresses that have a kernel mapping + * + * This function does not handle bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline void * phys_to_virt(unsigned long address) +{ + return __va(address); +} + +/* + * We define page_to_phys 'incorrectly' because it is used when merging blkdev + * requests, and the correct thing to do there is to use machine addresses. + */ +#define page_to_phys(_x) phys_to_machine(((_x) - mem_map) << PAGE_SHIFT) + +extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); + +/** + * ioremap - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + */ + +static inline void * ioremap (unsigned long offset, unsigned long size) +{ + return __ioremap(offset, size, 0); +} + +/** + * ioremap_nocache - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap_nocache performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked uncachable + * on the CPU as well as honouring existing caching rules from things like + * the PCI bus. Note that there are other caches and buffers on many + * busses. In paticular driver authors should read up on PCI writes + * + * It's useful if some control registers are in such an area and + * write combining or read caching is not desirable: + */ + +static inline void * ioremap_nocache (unsigned long offset, unsigned long size) +{ + return __ioremap(offset, size, _PAGE_PCD); +} + +extern void iounmap(void *addr); + +/* + * bt_ioremap() and bt_iounmap() are for temporary early boot-time + * mappings, before the real ioremap() is functional. + * A boot-time mapping is currently limited to at most 16 pages. + */ +extern void *bt_ioremap(unsigned long offset, unsigned long size); +extern void bt_iounmap(void *addr, unsigned long size); + +#define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x)) +#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x)) +#define page_to_bus(_x) phys_to_machine(((_x) - mem_map) << PAGE_SHIFT) +#define bus_to_phys(_x) machine_to_phys(_x) +#define bus_to_page(_x) (mem_map + (bus_to_phys(_x) >> PAGE_SHIFT)) + +/* + * readX/writeX() are used to access memory mapped devices. On some + * architectures the memory mapped IO stuff needs to be accessed + * differently. On the x86 architecture, we just read/write the + * memory location directly. + */ + +#define readb(addr) (*(volatile unsigned char *) __io_virt(addr)) +#define readw(addr) (*(volatile unsigned short *) __io_virt(addr)) +#define readl(addr) (*(volatile unsigned int *) __io_virt(addr)) +#define __raw_readb readb +#define __raw_readw readw +#define __raw_readl readl + +#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b)) +#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b)) +#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b)) +#define __raw_writeb writeb +#define __raw_writew writew +#define __raw_writel writel + +#define memset_io(a,b,c) __memset(__io_virt(a),(b),(c)) +#define memcpy_fromio(a,b,c) __memcpy((a),__io_virt(b),(c)) +#define memcpy_toio(a,b,c) __memcpy(__io_virt(a),(b),(c)) + +/* + * ISA space is 'always mapped' on a typical x86 system, no need to + * explicitly ioremap() it. The fact that the ISA IO space is mapped + * to PAGE_OFFSET is pure coincidence - it does not mean ISA values + * are physical addresses. The following constant pointer can be + * used as the IO-area pointer (it can be iounmapped as well, so the + * analogy with PCI is quite large): + */ +#define __ISA_IO_base ((char *)(PAGE_OFFSET)) + +#define isa_readb(a) readb(__ISA_IO_base + (a)) +#define isa_readw(a) readw(__ISA_IO_base + (a)) +#define isa_readl(a) readl(__ISA_IO_base + (a)) +#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a)) +#define isa_writew(w,a) writew(w,__ISA_IO_base + (a)) +#define isa_writel(l,a) writel(l,__ISA_IO_base + (a)) +#define isa_memset_io(a,b,c) memset_io(__ISA_IO_base + (a),(b),(c)) +#define isa_memcpy_fromio(a,b,c) memcpy_fromio((a),__ISA_IO_base + (b),(c)) +#define isa_memcpy_toio(a,b,c) memcpy_toio(__ISA_IO_base + (a),(b),(c)) + + +/* + * Again, i386 does not require mem IO specific function. + */ + +#define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(b),(c),(d)) +#define isa_eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d)) + +/** + * check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the mmio address io_addr. This + * address should have been obtained by ioremap. + * Returns 1 on a match. + */ + +static inline int check_signature(unsigned long io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + +/** + * isa_check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the ISA mmio address io_addr. + * Returns 1 on a match. + * + * This function is deprecated. New drivers should use ioremap and + * check_signature. + */ + + +static inline int isa_check_signature(unsigned long io_addr, + const unsigned char *signature, int length) +{ + int retval = 0; + do { + if (isa_readb(io_addr) != *signature) + goto out; + io_addr++; + signature++; + length--; + } while (length); + retval = 1; +out: + return retval; +} + +/* + * Cache management + * + * This needed for two cases + * 1. Out of order aware processors + * 2. Accidentally out of order processors (PPro errata #51) + */ + +#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) + +static inline void flush_write_buffers(void) +{ + __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); +} + +#define dma_cache_inv(_start,_size) flush_write_buffers() +#define dma_cache_wback(_start,_size) flush_write_buffers() +#define dma_cache_wback_inv(_start,_size) flush_write_buffers() + +#else + +/* Nothing to do */ + +#define dma_cache_inv(_start,_size) do { } while (0) +#define dma_cache_wback(_start,_size) do { } while (0) +#define dma_cache_wback_inv(_start,_size) do { } while (0) +#define flush_write_buffers() + +#endif + +#endif /* __KERNEL__ */ + +#ifdef SLOW_IO_BY_JUMPING +#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:" +#elif defined(__UNSAFE_IO__) +#define __SLOW_DOWN_IO "\noutb %%al,$0x80" +#else +#define __SLOW_DOWN_IO "\n1: outb %%al,$0x80\n" \ + "2:\n" \ + ".section __ex_table,\"a\"\n\t" \ + ".align 4\n\t" \ + ".long 1b,2b\n" \ + ".previous" +#endif + +#ifdef REALLY_SLOW_IO +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO +#else +#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO +#endif + +#ifdef CONFIG_MULTIQUAD +extern void *xquad_portio; /* Where the IO area was mapped */ +#endif /* CONFIG_MULTIQUAD */ + +/* + * Talk about misusing macros.. + */ +#define __OUT1(s,x) \ +static inline void out##s(unsigned x value, unsigned short port) { + +#ifdef __UNSAFE_IO__ +#define __OUT2(s,s1,s2) \ +__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" +#else +#define __OUT2(s,s1,s2) \ +__asm__ __volatile__ ("1: out" #s " %" s1 "0,%" s2 "1\n" \ + "2:\n" \ + ".section __ex_table,\"a\"\n\t" \ + ".align 4\n\t" \ + ".long 1b,2b\n" \ + ".previous" +#endif + +#if defined (CONFIG_MULTIQUAD) && !defined(STANDALONE) +#define __OUTQ(s,ss,x) /* Do the equivalent of the portio op on quads */ \ +static inline void out##ss(unsigned x value, unsigned short port) { \ + if (xquad_portio) \ + write##s(value, (unsigned long) xquad_portio + port); \ + else /* We're still in early boot, running on quad 0 */ \ + out##ss##_local(value, port); \ +} \ +static inline void out##ss##_quad(unsigned x value, unsigned short port, int quad) { \ + if (xquad_portio) \ + write##s(value, (unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\ + + port); \ +} + +#define __INQ(s,ss) /* Do the equivalent of the portio op on quads */ \ +static inline RETURN_TYPE in##ss(unsigned short port) { \ + if (xquad_portio) \ + return read##s((unsigned long) xquad_portio + port); \ + else /* We're still in early boot, running on quad 0 */ \ + return in##ss##_local(port); \ +} \ +static inline RETURN_TYPE in##ss##_quad(unsigned short port, int quad) { \ + if (xquad_portio) \ + return read##s((unsigned long) xquad_portio + (XQUAD_PORTIO_QUAD*quad)\ + + port); \ + else\ + return 0;\ +} +#endif /* CONFIG_MULTIQUAD && !STANDALONE */ + +#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE) +#define __OUT(s,s1,x) \ +__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ +__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} +#else +/* Make the default portio routines operate on quad 0 */ +#define __OUT(s,s1,x) \ +__OUT1(s##_local,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ +__OUT1(s##_p_local,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ +__OUTQ(s,s,x) \ +__OUTQ(s,s##_p,x) +#endif /* !CONFIG_MULTIQUAD || STANDALONE */ + +#define __IN1(s) \ +static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; + +#ifdef __UNSAFE_IO__ +#define __IN2(s,s1,s2) \ +__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" +#else +#define __IN2(s,s1,s2) \ +__asm__ __volatile__ ("1: in" #s " %" s2 "1,%" s1 "0\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: mov" #s " $~0,%" s1 "0\n\t" \ + "jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n\t" \ + ".align 4\n\t" \ + ".long 1b,3b\n" \ + ".previous" +#endif + +#if !defined(CONFIG_MULTIQUAD) || defined(STANDALONE) +#define __IN(s,s1,i...) \ +__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } +#else +/* Make the default portio routines operate on quad 0 */ +#define __IN(s,s1,i...) \ +__IN1(s##_local) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s##_p_local) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__INQ(s,s) \ +__INQ(s,s##_p) +#endif /* !CONFIG_MULTIQUAD || STANDALONE */ + +#define __INS(s) \ +static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ +{ __asm__ __volatile__ ("rep ; ins" #s \ +: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } + +#define __OUTS(s) \ +static inline void outs##s(unsigned short port, const void * addr, unsigned long count) \ +{ __asm__ __volatile__ ("rep ; outs" #s \ +: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); } + +#define RETURN_TYPE unsigned char +__IN(b,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned short +__IN(w,"") +#undef RETURN_TYPE +#define RETURN_TYPE unsigned int +__IN(l,"") +#undef RETURN_TYPE + +__OUT(b,"b",char) +__OUT(w,"w",short) +__OUT(l,,int) + +__INS(b) +__INS(w) +__INS(l) + +__OUTS(b) +__OUTS(w) +__OUTS(l) + +#endif diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/irq.h b/linux-2.4.28-xen-sparse/include/asm-xen/irq.h new file mode 100644 index 0000000000..836629fe92 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/irq.h @@ -0,0 +1,65 @@ +#ifndef _ASM_IRQ_H +#define _ASM_IRQ_H + +/* + * linux/include/asm/irq.h + * + * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar + * + * IRQ/IPI changes taken from work by Thomas Radke + * <tomsoft@informatik.tu-chemnitz.de> + */ + +#include <linux/config.h> +#include <asm/hypervisor.h> +#include <asm/ptrace.h> + +/* + * The flat IRQ space is divided into two regions: + * 1. A one-to-one mapping of real physical IRQs. This space is only used + * if we have physical device-access privilege. This region is at the + * start of the IRQ space so that existing device drivers do not need + * to be modified to translate physical IRQ numbers into our IRQ space. + * 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These + * are bound using the provided bind/unbind functions. + */ + +#define PIRQ_BASE 0 +#define NR_PIRQS 128 + +#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS) +#define NR_DYNIRQS 128 + +#define NR_IRQS (NR_PIRQS + NR_DYNIRQS) + +#define pirq_to_irq(_x) ((_x) + PIRQ_BASE) +#define irq_to_pirq(_x) ((_x) - PIRQ_BASE) + +#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE) +#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE) + +/* Dynamic binding of event channels and VIRQ sources to Linux IRQ space. */ +extern int bind_virq_to_irq(int virq); +extern void unbind_virq_from_irq(int virq); +extern int bind_evtchn_to_irq(int evtchn); +extern void unbind_evtchn_from_irq(int evtchn); + +static __inline__ int irq_cannonicalize(int irq) +{ + return (irq == 2) ? 9 : irq; +} + +extern void disable_irq(unsigned int); +extern void disable_irq_nosync(unsigned int); +extern void enable_irq(unsigned int); + +extern void irq_suspend(void); +extern void irq_resume(void); + + +#define CPU_MASK_NONE 0 + +/* XXX SMH: no-op for compat w/ 2.6 shared files */ +#define irq_ctx_init(cpu) do { ; } while (0) + +#endif /* _ASM_IRQ_H */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/keyboard.h b/linux-2.4.28-xen-sparse/include/asm-xen/keyboard.h new file mode 100644 index 0000000000..81c49a6908 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/keyboard.h @@ -0,0 +1,74 @@ +/* + * linux/include/asm-i386/keyboard.h + * + * Created 3 Nov 1996 by Geert Uytterhoeven + */ + +/* + * This file contains the i386 architecture specific keyboard definitions + */ + +#ifndef _I386_KEYBOARD_H +#define _I386_KEYBOARD_H + +#ifdef __KERNEL__ + +#include <linux/kernel.h> +#include <linux/ioport.h> +#include <linux/kd.h> +#include <linux/pm.h> +#include <asm/io.h> + +#define KEYBOARD_IRQ 1 +#define DISABLE_KBD_DURING_INTERRUPTS 0 + +extern int pckbd_setkeycode(unsigned int scancode, unsigned int keycode); +extern int pckbd_getkeycode(unsigned int scancode); +extern int pckbd_translate(unsigned char scancode, unsigned char *keycode, + char raw_mode); +extern char pckbd_unexpected_up(unsigned char keycode); +extern void pckbd_leds(unsigned char leds); +extern void pckbd_init_hw(void); +extern int pckbd_pm_resume(struct pm_dev *, pm_request_t, void *); +extern pm_callback pm_kbd_request_override; +extern unsigned char pckbd_sysrq_xlate[128]; + +#define kbd_setkeycode pckbd_setkeycode +#define kbd_getkeycode pckbd_getkeycode +#define kbd_translate pckbd_translate +#define kbd_unexpected_up pckbd_unexpected_up +#define kbd_leds pckbd_leds +#define kbd_init_hw pckbd_init_hw +#define kbd_sysrq_xlate pckbd_sysrq_xlate + +#define SYSRQ_KEY 0x54 + +#define kbd_controller_present() (xen_start_info.flags & SIF_INITDOMAIN) + +/* resource allocation */ +#define kbd_request_region() +#define kbd_request_irq(handler) request_irq(KEYBOARD_IRQ, handler, 0, \ + "keyboard", NULL) + +/* How to access the keyboard macros on this platform. */ +#define kbd_read_input() inb(KBD_DATA_REG) +#define kbd_read_status() inb(KBD_STATUS_REG) +#define kbd_write_output(val) outb(val, KBD_DATA_REG) +#define kbd_write_command(val) outb(val, KBD_CNTL_REG) + +/* Some stoneage hardware needs delays after some operations. */ +#define kbd_pause() do { } while(0) + +/* + * Machine specific bits for the PS/2 driver + */ + +#define AUX_IRQ 12 + +#define aux_request_irq(hand, dev_id) \ + request_irq(AUX_IRQ, hand, SA_SHIRQ, "PS/2 Mouse", dev_id) + +#define aux_free_irq(dev_id) free_irq(AUX_IRQ, dev_id) + +#endif /* __KERNEL__ */ +#endif /* _I386_KEYBOARD_H */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/mmu_context.h b/linux-2.4.28-xen-sparse/include/asm-xen/mmu_context.h new file mode 100644 index 0000000000..7972ce7d74 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/mmu_context.h @@ -0,0 +1,74 @@ +#ifndef __I386_MMU_CONTEXT_H +#define __I386_MMU_CONTEXT_H + +#include <linux/config.h> +#include <asm/desc.h> +#include <asm/atomic.h> +#include <asm/pgalloc.h> + +/* + * hooks to add arch specific data into the mm struct. + * Note that destroy_context is called even if init_new_context + * fails. + */ +int init_new_context(struct task_struct *tsk, struct mm_struct *mm); +void destroy_context(struct mm_struct *mm); + +#ifdef CONFIG_SMP + +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +{ + if(cpu_tlbstate[cpu].state == TLBSTATE_OK) + cpu_tlbstate[cpu].state = TLBSTATE_LAZY; +} +#else +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) +{ +} +#endif + +extern pgd_t *cur_pgd; + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) +{ + if (prev != next) { + /* stop flush ipis for the previous mm */ + clear_bit(cpu, &prev->cpu_vm_mask); +#ifdef CONFIG_SMP + cpu_tlbstate[cpu].state = TLBSTATE_OK; + cpu_tlbstate[cpu].active_mm = next; +#endif + + /* Re-load page tables */ + cur_pgd = next->pgd; + queue_pt_switch(__pa(cur_pgd)); + /* load_LDT, if either the previous or next thread + * has a non-default LDT. + */ + if (next->context.size+prev->context.size) + load_LDT(&next->context); + } +#ifdef CONFIG_SMP + else { + cpu_tlbstate[cpu].state = TLBSTATE_OK; + if(cpu_tlbstate[cpu].active_mm != next) + out_of_line_bug(); + if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) { + /* We were in lazy tlb mode and leave_mm disabled + * tlb flush IPI delivery. We must reload %cr3. + */ + cur_pgd = next->pgd; + queue_pt_switch(__pa(cur_pgd)); + load_LDT(next); + } + } +#endif +} + +#define activate_mm(prev, next) \ +do { \ + switch_mm((prev),(next),NULL,smp_processor_id()); \ + flush_page_update_queue(); \ +} while ( 0 ) + +#endif diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/module.h b/linux-2.4.28-xen-sparse/include/asm-xen/module.h new file mode 100644 index 0000000000..17cd5ff07c --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/module.h @@ -0,0 +1,14 @@ +#ifndef _ASM_I386_MODULE_H +#define _ASM_I386_MODULE_H +/* + * This file contains the i386 architecture specific module code. + */ + +extern int xen_module_init(struct module *mod); + +#define module_map(x) vmalloc(x) +#define module_unmap(x) vfree(x) +#define module_arch_init(x) xen_module_init(x) +#define arch_init_modules(x) do { } while (0) + +#endif /* _ASM_I386_MODULE_H */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/msr.h b/linux-2.4.28-xen-sparse/include/asm-xen/msr.h new file mode 100644 index 0000000000..1a2c8765a8 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/msr.h @@ -0,0 +1,138 @@ +#ifndef __ASM_MSR_H +#define __ASM_MSR_H + +/* + * Access to machine-specific registers (available on 586 and better only) + * Note: the rd* operations modify the parameters directly (without using + * pointer indirection), this allows gcc to optimize better + */ + +#define rdmsr(msr,val1,val2) \ +{ \ + dom0_op_t op; \ + op.cmd = DOM0_MSR; \ + op.u.msr.write = 0; \ + op.u.msr.msr = msr; \ + op.u.msr.cpu_mask = (1 << current->processor); \ + HYPERVISOR_dom0_op(&op); \ + val1 = op.u.msr.out1; \ + val2 = op.u.msr.out2; \ +} + +#define wrmsr(msr,val1,val2) \ +{ \ + dom0_op_t op; \ + op.cmd = DOM0_MSR; \ + op.u.msr.write = 1; \ + op.u.msr.cpu_mask = (1 << current->processor); \ + op.u.msr.msr = msr; \ + op.u.msr.in1 = val1; \ + op.u.msr.in2 = val2; \ + HYPERVISOR_dom0_op(&op); \ +} + +#define rdtsc(low,high) \ + __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) + +#define rdtscl(low) \ + __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + +#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) + +#define rdpmc(counter,low,high) \ + __asm__ __volatile__("rdpmc" \ + : "=a" (low), "=d" (high) \ + : "c" (counter)) + +/* symbolic names for some interesting MSRs */ +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0 +#define MSR_IA32_P5_MC_TYPE 1 +#define MSR_IA32_PLATFORM_ID 0x17 +#define MSR_IA32_EBL_CR_POWERON 0x2a + +#define MSR_IA32_APICBASE 0x1b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_IA32_UCODE_WRITE 0x79 +#define MSR_IA32_UCODE_REV 0x8b + +#define MSR_IA32_BBL_CR_CTL 0x119 + +#define MSR_IA32_MCG_CAP 0x179 +#define MSR_IA32_MCG_STATUS 0x17a +#define MSR_IA32_MCG_CTL 0x17b + +#define MSR_IA32_THERM_CONTROL 0x19a +#define MSR_IA32_THERM_INTERRUPT 0x19b +#define MSR_IA32_THERM_STATUS 0x19c +#define MSR_IA32_MISC_ENABLE 0x1a0 + +#define MSR_IA32_DEBUGCTLMSR 0x1d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x1db +#define MSR_IA32_LASTBRANCHTOIP 0x1dc +#define MSR_IA32_LASTINTFROMIP 0x1dd +#define MSR_IA32_LASTINTTOIP 0x1de + +#define MSR_IA32_MC0_CTL 0x400 +#define MSR_IA32_MC0_STATUS 0x401 +#define MSR_IA32_MC0_ADDR 0x402 +#define MSR_IA32_MC0_MISC 0x403 + +#define MSR_P6_PERFCTR0 0xc1 +#define MSR_P6_PERFCTR1 0xc2 +#define MSR_P6_EVNTSEL0 0x186 +#define MSR_P6_EVNTSEL1 0x187 + +#define MSR_IA32_PERF_STATUS 0x198 +#define MSR_IA32_PERF_CTL 0x199 + +/* AMD Defined MSRs */ +#define MSR_K6_EFER 0xC0000080 +#define MSR_K6_STAR 0xC0000081 +#define MSR_K6_WHCR 0xC0000082 +#define MSR_K6_UWCCR 0xC0000085 +#define MSR_K6_EPMR 0xC0000086 +#define MSR_K6_PSOR 0xC0000087 +#define MSR_K6_PFIR 0xC0000088 + +#define MSR_K7_EVNTSEL0 0xC0010000 +#define MSR_K7_PERFCTR0 0xC0010004 +#define MSR_K7_HWCR 0xC0010015 +#define MSR_K7_CLK_CTL 0xC001001b +#define MSR_K7_FID_VID_CTL 0xC0010041 +#define MSR_K7_VID_STATUS 0xC0010042 + +/* Centaur-Hauls/IDT defined MSRs. */ +#define MSR_IDT_FCR1 0x107 +#define MSR_IDT_FCR2 0x108 +#define MSR_IDT_FCR3 0x109 +#define MSR_IDT_FCR4 0x10a + +#define MSR_IDT_MCR0 0x110 +#define MSR_IDT_MCR1 0x111 +#define MSR_IDT_MCR2 0x112 +#define MSR_IDT_MCR3 0x113 +#define MSR_IDT_MCR4 0x114 +#define MSR_IDT_MCR5 0x115 +#define MSR_IDT_MCR6 0x116 +#define MSR_IDT_MCR7 0x117 +#define MSR_IDT_MCR_CTRL 0x120 + +/* VIA Cyrix defined MSRs*/ +#define MSR_VIA_FCR 0x1107 +#define MSR_VIA_LONGHAUL 0x110a +#define MSR_VIA_BCR2 0x1147 + +/* Transmeta defined MSRs */ +#define MSR_TMTA_LONGRUN_CTRL 0x80868010 +#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 +#define MSR_TMTA_LRTI_READOUT 0x80868018 +#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a + +#endif /* __ASM_MSR_H */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/page.h b/linux-2.4.28-xen-sparse/include/asm-xen/page.h new file mode 100644 index 0000000000..546cb65b22 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/page.h @@ -0,0 +1,181 @@ +#ifndef _I386_PAGE_H +#define _I386_PAGE_H + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +#include <linux/config.h> +#include <linux/string.h> +#include <asm/types.h> +#include <asm-xen/xen-public/xen.h> + +#ifdef CONFIG_XEN_SCRUB_PAGES +#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT) +#else +#define scrub_pages(_p,_n) ((void)0) +#endif + +#ifdef CONFIG_X86_USE_3DNOW + +#include <asm/mmx.h> + +#define clear_page(page) mmx_clear_page((void *)(page)) +#define copy_page(to,from) mmx_copy_page(to,from) + +#else + +/* + * On older X86 processors its not a win to use MMX here it seems. + * Maybe the K6-III ? + */ + +#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) +#define copy_page(to,from) memcpy((void *)(to), (void *)(from), PAGE_SIZE) + +#endif + +#define clear_user_page(page, vaddr) clear_page(page) +#define copy_user_page(to, from, vaddr) copy_page(to, from) + +/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ +extern unsigned long *phys_to_machine_mapping; +#define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)]) +#define mfn_to_pfn(_mfn) (machine_to_phys_mapping[(_mfn)]) +static inline unsigned long phys_to_machine(unsigned long phys) +{ + unsigned long machine = pfn_to_mfn(phys >> PAGE_SHIFT); + machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK); + return machine; +} +static inline unsigned long machine_to_phys(unsigned long machine) +{ + unsigned long phys = mfn_to_pfn(machine >> PAGE_SHIFT); + phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK); + return phys; +} + +/* + * These are used to make use of C type-checking.. + */ +#if CONFIG_X86_PAE +typedef struct { unsigned long pte_low, pte_high; } pte_t; +typedef struct { unsigned long long pmd; } pmd_t; +typedef struct { unsigned long long pgd; } pgd_t; +#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) +#else +typedef struct { unsigned long pte_low; } pte_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pgd; } pgd_t; +static inline unsigned long pte_val(pte_t x) +{ + unsigned long ret = x.pte_low; + if ( (ret & 1) ) ret = machine_to_phys(ret); + return ret; +} +#endif +#define PTE_MASK PAGE_MASK + +typedef struct { unsigned long pgprot; } pgprot_t; + +static inline unsigned long pmd_val(pmd_t x) +{ + unsigned long ret = x.pmd; + if ( (ret & 1) ) ret = machine_to_phys(ret); + return ret; +} +#define pgd_val(x) ({ BUG(); (unsigned long)0; }) +#define pgprot_val(x) ((x).pgprot) + +static inline pte_t __pte(unsigned long x) +{ + if ( (x & 1) ) x = phys_to_machine(x); + return ((pte_t) { (x) }); +} +static inline pmd_t __pmd(unsigned long x) +{ + if ( (x & 1) ) x = phys_to_machine(x); + return ((pmd_t) { (x) }); +} +#define __pgd(x) ({ BUG(); (pgprot_t) { 0 }; }) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#endif /* !__ASSEMBLY__ */ + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + +/* + * This handles the memory map.. We could make this a config + * option, but too many people screw it up, and too few need + * it. + * + * A __PAGE_OFFSET of 0xC0000000 means that the kernel has + * a virtual address space of one gigabyte, which limits the + * amount of physical memory you can use to about 950MB. + * + * If you want more physical memory than this then see the CONFIG_HIGHMEM4G + * and CONFIG_HIGHMEM64G options in the kernel configuration. + */ + +#define __PAGE_OFFSET (0xC0000000) + +#ifndef __ASSEMBLY__ + +/* + * Tell the user there is some problem. Beep too, so we can + * see^H^H^Hhear bugs in early bootup as well! + * The offending file and line are encoded after the "officially + * undefined" opcode for parsing in the trap handler. + */ + +#if 1 /* Set to zero for a slightly smaller kernel */ +#define BUG() \ + __asm__ __volatile__( "ud2\n" \ + "\t.word %c0\n" \ + "\t.long %c1\n" \ + : : "i" (__LINE__), "i" (__FILE__)) +#else +#define BUG() __asm__ __volatile__("ud2\n") +#endif + +#define PAGE_BUG(page) do { \ + BUG(); \ +} while (0) + +/* Pure 2^n version of get_order */ +static __inline__ int get_order(unsigned long size) +{ + int order; + + size = (size-1) >> (PAGE_SHIFT-1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + return order; +} + +#endif /* __ASSEMBLY__ */ + +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) +#define virt_to_page(kaddr) (mem_map + (__pa(kaddr) >> PAGE_SHIFT)) +#define VALID_PAGE(page) ((page - mem_map) < max_mapnr) + +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +/* VIRT <-> MACHINE conversion */ +#define virt_to_machine(_a) (phys_to_machine(__pa(_a))) +#define machine_to_virt(_m) (__va(machine_to_phys(_m))) + +#endif /* __KERNEL__ */ + +#endif /* _I386_PAGE_H */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/pci.h b/linux-2.4.28-xen-sparse/include/asm-xen/pci.h new file mode 100644 index 0000000000..74ae5ba8b1 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/pci.h @@ -0,0 +1,283 @@ +#ifndef __i386_PCI_H +#define __i386_PCI_H + +#include <linux/config.h> + +#ifdef __KERNEL__ + +/* Can be used to override the logic in pci_scan_bus for skipping + already-configured bus numbers - to be used for buggy BIOSes + or architectures with incomplete PCI setup by the loader */ + +#ifdef CONFIG_PCI +extern unsigned int pcibios_assign_all_busses(void); +#else +#define pcibios_assign_all_busses() 0 +#endif +#define pcibios_scan_all_fns() 0 + +extern unsigned long pci_mem_start; +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM (pci_mem_start) + +void pcibios_config_init(void); +struct pci_bus * pcibios_scan_root(int bus); +extern int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value); +extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value); + +void pcibios_set_master(struct pci_dev *dev); +void pcibios_penalize_isa_irq(int irq); +struct irq_routing_table *pcibios_get_irq_routing_table(void); +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); + +/* Dynamic DMA mapping stuff. + * i386 has everything mapped statically. + */ + +#include <linux/types.h> +#include <linux/slab.h> +#include <asm/scatterlist.h> +#include <linux/string.h> +#include <asm/io.h> + +struct pci_dev; + +/* The networking and block device layers use this boolean for bounce + * buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS (0) + +/* Allocate and map kernel buffer using consistent mode DMA for a device. + * hwdev should be valid struct pci_dev pointer for PCI devices, + * NULL for PCI-like buses (ISA, EISA). + * Returns non-NULL cpu-view pointer to the buffer if successful and + * sets *dma_addrp to the pci side dma address as well, else *dma_addrp + * is undefined. + */ +extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, + dma_addr_t *dma_handle); + +/* Free and unmap a consistent DMA buffer. + * cpu_addr is what was returned from pci_alloc_consistent, + * size must be the same as what as passed into pci_alloc_consistent, + * and likewise dma_addr must be the same as what *dma_addrp was set to. + * + * References to the memory and mappings associated with cpu_addr/dma_addr + * past this call are illegal. + */ +extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle); + +/* Map a single buffer of the indicated size for DMA in streaming mode. + * The 32-bit bus address to use is returned. + * + * Once the device is given the dma address, the device owns this memory + * until either pci_unmap_single or pci_dma_sync_single is performed. + */ +static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, + size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + out_of_line_bug(); + flush_write_buffers(); + return virt_to_bus(ptr); +} + +/* Unmap a single streaming mode DMA translation. The dma_addr and size + * must match what was provided for in a previous pci_map_single call. All + * other usages are undefined. + * + * After this call, reads by the cpu to the buffer are guarenteed to see + * whatever the device wrote there. + */ +static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, + size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + out_of_line_bug(); + /* Nothing to do */ +} + +/* + * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical + * to pci_map_single, but takes a struct page instead of a virtual address + */ +static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page, + unsigned long offset, size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + out_of_line_bug(); + + return page_to_bus(page) + offset; +} + +static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, + size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + out_of_line_bug(); + /* Nothing to do */ +} + +/* pci_unmap_{page,single} is a nop so... */ +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) +#define pci_unmap_addr(PTR, ADDR_NAME) (0) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) +#define pci_unmap_len(PTR, LEN_NAME) (0) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) + +/* Map a set of buffers described by scatterlist in streaming + * mode for DMA. This is the scather-gather version of the + * above pci_map_single interface. Here the scatter gather list + * elements are each tagged with the appropriate dma address + * and length. They are obtained via sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for pci_map_single are + * the same here. + */ +static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction) +{ + int i; + + if (direction == PCI_DMA_NONE) + out_of_line_bug(); + + /* + * temporary 2.4 hack + */ + for (i = 0; i < nents; i++ ) { + if (sg[i].address && sg[i].page) + out_of_line_bug(); + else if (!sg[i].address && !sg[i].page) + out_of_line_bug(); + + if (sg[i].address) + sg[i].dma_address = virt_to_bus(sg[i].address); + else + sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; + } + + flush_write_buffers(); + return nents; +} + +/* Unmap a set of streaming mode DMA translations. + * Again, cpu read rules concerning calls here are the same as for + * pci_unmap_single() above. + */ +static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction) +{ + if (direction == PCI_DMA_NONE) + out_of_line_bug(); + /* Nothing to do */ +} + +/* Make physical memory consistent for a single + * streaming mode DMA translation after a transfer. + * + * If you perform a pci_map_single() but wish to interrogate the + * buffer using the cpu, yet do not wish to teardown the PCI dma + * mapping, you must call this function before doing so. At the + * next point you give the PCI dma address back to the card, the + * device again owns the buffer. + */ +static inline void pci_dma_sync_single(struct pci_dev *hwdev, + dma_addr_t dma_handle, + size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + out_of_line_bug(); + flush_write_buffers(); +} + +/* Make physical memory consistent for a set of streaming + * mode DMA translations after a transfer. + * + * The same as pci_dma_sync_single but for a scatter-gather list, + * same rules and usage. + */ +static inline void pci_dma_sync_sg(struct pci_dev *hwdev, + struct scatterlist *sg, + int nelems, int direction) +{ + if (direction == PCI_DMA_NONE) + out_of_line_bug(); + flush_write_buffers(); +} + +/* Return whether the given PCI device DMA address mask can + * be supported properly. For example, if your device can + * only drive the low 24-bits during PCI bus mastering, then + * you would pass 0x00ffffff as the mask to this function. + */ +static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) +{ + /* + * we fall back to GFP_DMA when the mask isn't all 1s, + * so we can't guarantee allocations that must be + * within a tighter range than GFP_DMA.. + */ + if(mask < 0x00ffffff) + return 0; + + return 1; +} + +/* This is always fine. */ +#define pci_dac_dma_supported(pci_dev, mask) (1) + +static __inline__ dma64_addr_t +pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction) +{ + return ((dma64_addr_t) page_to_bus(page) + + (dma64_addr_t) offset); +} + +static __inline__ struct page * +pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr) +{ + return bus_to_page(dma_addr); +} + +static __inline__ unsigned long +pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr) +{ + return (dma_addr & ~PAGE_MASK); +} + +static __inline__ void +pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction) +{ + flush_write_buffers(); +} + +/* These macros should be used after a pci_map_sg call has been done + * to get bus addresses of each of the SG entries and their lengths. + * You should only work with the number of sg entries pci_map_sg + * returns. + */ +#define sg_dma_address(sg) ((sg)->dma_address) +#define sg_dma_len(sg) ((sg)->length) + +/* Return the index of the PCI controller for device. */ +static inline int pci_controller_num(struct pci_dev *dev) +{ + return 0; +} + +#define HAVE_PCI_MMAP +extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine); + +#endif /* __KERNEL__ */ + +#endif /* __i386_PCI_H */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/pgalloc.h b/linux-2.4.28-xen-sparse/include/asm-xen/pgalloc.h new file mode 100644 index 0000000000..f6bee4d689 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/pgalloc.h @@ -0,0 +1,285 @@ +#ifndef _I386_PGALLOC_H +#define _I386_PGALLOC_H + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/fixmap.h> +#include <asm/hypervisor.h> +#include <linux/threads.h> + +/* + * Quick lists are aligned so that least significant bits of array pointer + * are all zero when list is empty, and all one when list is full. + */ +#define QUICKLIST_ENTRIES 256 +#define QUICKLIST_EMPTY(_l) !((unsigned long)(_l) & ((QUICKLIST_ENTRIES*4)-1)) +#define QUICKLIST_FULL(_l) QUICKLIST_EMPTY((_l)+1) +#define pgd_quicklist (current_cpu_data.pgd_quick) +#define pmd_quicklist (current_cpu_data.pmd_quick) +#define pte_quicklist (current_cpu_data.pte_quick) +#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) + +#define pmd_populate(mm, pmd, pte) \ + do { \ + set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ + XEN_flush_page_update_queue(); \ + } while ( 0 ) + +/* + * Allocate and free page tables. + */ + +#if defined (CONFIG_X86_PAE) + +#error "no PAE support as yet" + +/* + * We can't include <linux/slab.h> here, thus these uglinesses. + */ +struct kmem_cache_s; + +extern struct kmem_cache_s *pae_pgd_cachep; +extern void *kmem_cache_alloc(struct kmem_cache_s *, int); +extern void kmem_cache_free(struct kmem_cache_s *, void *); + + +static inline pgd_t *get_pgd_slow(void) +{ + int i; + pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL); + + if (pgd) { + for (i = 0; i < USER_PTRS_PER_PGD; i++) { + unsigned long pmd = __get_free_page(GFP_KERNEL); + if (!pmd) + goto out_oom; + clear_page(pmd); + set_pgd(pgd + i, __pgd(1 + __pa(pmd))); + } + memcpy(pgd + USER_PTRS_PER_PGD, + init_mm.pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + } + return pgd; +out_oom: + for (i--; i >= 0; i--) + free_page((unsigned long)__va(pgd_val(pgd[i])-1)); + kmem_cache_free(pae_pgd_cachep, pgd); + return NULL; +} + +#else + +static inline pgd_t *get_pgd_slow(void) +{ + pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); + + if (pgd) { + memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); + memcpy(pgd + USER_PTRS_PER_PGD, + init_mm.pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + __make_page_readonly(pgd); + queue_pgd_pin(__pa(pgd)); + } + return pgd; +} + +#endif /* CONFIG_X86_PAE */ + +static inline pgd_t *get_pgd_fast(void) +{ + unsigned long ret; + + if ( !QUICKLIST_EMPTY(pgd_quicklist) ) { + ret = *(--pgd_quicklist); + pgtable_cache_size--; + + } else + ret = (unsigned long)get_pgd_slow(); + return (pgd_t *)ret; +} + +static inline void free_pgd_slow(pgd_t *pgd) +{ +#if defined(CONFIG_X86_PAE) +#error + int i; + + for (i = 0; i < USER_PTRS_PER_PGD; i++) + free_page((unsigned long)__va(pgd_val(pgd[i])-1)); + kmem_cache_free(pae_pgd_cachep, pgd); +#else + queue_pgd_unpin(__pa(pgd)); + __make_page_writable(pgd); + free_page((unsigned long)pgd); +#endif +} + +static inline void free_pgd_fast(pgd_t *pgd) +{ + if ( !QUICKLIST_FULL(pgd_quicklist) ) { + *(pgd_quicklist++) = (unsigned long)pgd; + pgtable_cache_size++; + } else + free_pgd_slow(pgd); +} + +static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pte_t *pte; + + pte = (pte_t *) __get_free_page(GFP_KERNEL); + if (pte) + { + clear_page(pte); + __make_page_readonly(pte); + queue_pte_pin(__pa(pte)); + } + return pte; + +} + +static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, + unsigned long address) +{ + unsigned long ret = 0; + if ( !QUICKLIST_EMPTY(pte_quicklist) ) { + ret = *(--pte_quicklist); + pgtable_cache_size--; + } + return (pte_t *)ret; +} + +static __inline__ void pte_free_slow(pte_t *pte) +{ + queue_pte_unpin(__pa(pte)); + __make_page_writable(pte); + free_page((unsigned long)pte); +} + +static inline void pte_free_fast(pte_t *pte) +{ + if ( !QUICKLIST_FULL(pte_quicklist) ) { + *(pte_quicklist++) = (unsigned long)pte; + pgtable_cache_size++; + } else + pte_free_slow(pte); +} + +#define pte_free(pte) pte_free_fast(pte) +#define pgd_free(pgd) free_pgd_fast(pgd) +#define pgd_alloc(mm) get_pgd_fast() + +/* + * allocating and freeing a pmd is trivial: the 1-entry pmd is + * inside the pgd, so has no extra memory associated with it. + * (In the PAE case we free the pmds as part of the pgd.) + */ + +#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) +#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_free_slow(x) do { } while (0) +#define pmd_free_fast(x) do { } while (0) +#define pmd_free(x) do { } while (0) +#define pgd_populate(mm, pmd, pte) BUG() + +extern int do_check_pgt_cache(int, int); + +/* + * TLB flushing: + * + * - flush_tlb() flushes the current mm struct TLBs + * - flush_tlb_all() flushes all processes TLBs + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(mm, start, end) flushes a range of pages + * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables + * + * ..but the i386 has somewhat limited tlb flushing capabilities, + * and page-granular flushes are available only on i486 and up. + */ + +#ifndef CONFIG_SMP + +#define flush_tlb() __flush_tlb() +#define flush_tlb_all() __flush_tlb_all() +#define local_flush_tlb() __flush_tlb() + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + if (mm == current->active_mm) queue_tlb_flush(); + XEN_flush_page_update_queue(); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long addr) +{ + if (vma->vm_mm == current->active_mm) queue_invlpg(addr); + XEN_flush_page_update_queue(); +} + +static inline void flush_tlb_range(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + if (mm == current->active_mm) queue_tlb_flush(); + XEN_flush_page_update_queue(); +} + +#else +#error no guestos SMP support yet... +#include <asm/smp.h> + +#define local_flush_tlb() \ + __flush_tlb() + +extern void flush_tlb_all(void); +extern void flush_tlb_current_task(void); +extern void flush_tlb_mm(struct mm_struct *); +extern void flush_tlb_page(struct vm_area_struct *, unsigned long); + +#define flush_tlb() flush_tlb_current_task() + +static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end) +{ + flush_tlb_mm(mm); +} + +#define TLBSTATE_OK 1 +#define TLBSTATE_LAZY 2 + +struct tlb_state +{ + struct mm_struct *active_mm; + int state; +} ____cacheline_aligned; +extern struct tlb_state cpu_tlbstate[NR_CPUS]; + +#endif /* CONFIG_SMP */ + +static inline void flush_tlb_pgtables(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + /* i386 does not keep any page table caches in TLB */ + XEN_flush_page_update_queue(); +} + +/* + * NB. The 'domid' field should be zero if mapping I/O space (non RAM). + * Otherwise it identifies the owner of the memory that is being mapped. + */ +extern int direct_remap_area_pages(struct mm_struct *mm, + unsigned long address, + unsigned long machine_addr, + unsigned long size, + pgprot_t prot, + domid_t domid); + +extern int __direct_remap_area_pages(struct mm_struct *mm, + unsigned long address, + unsigned long size, + mmu_update_t *v); + + + +#endif /* _I386_PGALLOC_H */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/pgtable-2level.h b/linux-2.4.28-xen-sparse/include/asm-xen/pgtable-2level.h new file mode 100644 index 0000000000..9ddd30bf73 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/pgtable-2level.h @@ -0,0 +1,103 @@ +#ifndef _I386_PGTABLE_2LEVEL_H +#define _I386_PGTABLE_2LEVEL_H + +/* + * traditional i386 two-level paging structure: + */ + +#define PGDIR_SHIFT 22 +#define PTRS_PER_PGD 1024 + +/* + * the i386 is two-level, so we don't really have any + * PMD directory physically. + */ +#define PMD_SHIFT 22 +#define PTRS_PER_PMD 1 + +#define PTRS_PER_PTE 1024 + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the pgd is never bad, and a pmd always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +#define pgd_clear(xp) do { } while (0) + +#define set_pte(pteptr, pteval) queue_l1_entry_update(pteptr, (pteval).pte_low) +#define set_pte_atomic(pteptr, pteval) queue_l1_entry_update(pteptr, (pteval).pte_low) +#define set_pmd(pmdptr, pmdval) queue_l2_entry_update((pmdptr), (pmdval).pmd) +#define set_pgd(pgdptr, pgdval) ((void)0) + +#define pgd_page(pgd) \ +((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) + +static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) +{ + return (pmd_t *) dir; +} + +#define pte_same(a, b) ((a).pte_low == (b).pte_low) + +/* + * We detect special mappings in one of two ways: + * 1. If the MFN is an I/O page then Xen will set the m2p entry + * to be outside our maximum possible pseudophys range. + * 2. If the MFN belongs to a different domain then we will certainly + * not have MFN in our p2m table. Conversely, if the page is ours, + * then we'll have p2m(m2p(MFN))==MFN. + * If we detect a special mapping then it doesn't have a 'struct page'. + * We force !VALID_PAGE() by returning an out-of-range pointer. + * + * NB. These checks require that, for any MFN that is not in our reservation, + * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if + * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN. + * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety. + * + * NB2. When deliberately mapping foreign pages into the p2m table, you *must* + * use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we + * require. In all the cases we care about, the high bit gets shifted out + * (e.g., phys_to_machine()) so behaviour there is correct. + */ +#define INVALID_P2M_ENTRY (~0UL) +#define FOREIGN_FRAME(_m) ((_m) | (1UL<<((sizeof(unsigned long)*8)-1))) +#define pte_page(_pte) \ +({ \ + unsigned long mfn = (_pte).pte_low >> PAGE_SHIFT; \ + unsigned long pfn = mfn_to_pfn(mfn); \ + if ( (pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn) ) \ + pfn = max_mapnr; /* specia: force !VALID_PAGE() */ \ + &mem_map[pfn]; \ +}) + +#define pte_none(x) (!(x).pte_low) +#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) + +/* + * A note on implementation of this atomic 'get-and-clear' operation. + * This is actually very simple because XenoLinux can only run on a single + * processor. Therefore, we cannot race other processors setting the 'accessed' + * or 'dirty' bits on a page-table entry. + * Even if pages are shared between domains, that is not a problem because + * each domain will have separate page tables, with their own versions of + * accessed & dirty state. + */ +static inline pte_t ptep_get_and_clear(pte_t *xp) +{ + pte_t pte = *xp; + if ( !pte_none(pte) ) + queue_l1_entry_update(xp, 0); + return pte; +} + +#endif /* _I386_PGTABLE_2LEVEL_H */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/pgtable.h b/linux-2.4.28-xen-sparse/include/asm-xen/pgtable.h new file mode 100644 index 0000000000..d3ece3a2a5 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/pgtable.h @@ -0,0 +1,370 @@ +#ifndef _I386_PGTABLE_H +#define _I386_PGTABLE_H + +#include <linux/config.h> + +/* + * The Linux memory management assumes a three-level page table setup. On + * the i386, we use that, but "fold" the mid level into the top-level page + * table, so that we physically have the same two-level page table as the + * i386 mmu expects. + * + * This file contains the functions and defines necessary to modify and use + * the i386 page table tree. + */ +#ifndef __ASSEMBLY__ +#include <asm/processor.h> +#include <asm/hypervisor.h> +#include <linux/threads.h> +#include <asm/fixmap.h> + +#ifndef _I386_BITOPS_H +#include <asm/bitops.h> +#endif + +#define swapper_pg_dir 0 +extern void paging_init(void); + +/* Caches aren't brain-dead on the intel. */ +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_range(mm, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr) do { } while (0) +#define flush_page_to_ram(page) do { } while (0) +#define flush_dcache_page(page) do { } while (0) +#define flush_icache_range(start, end) do { } while (0) +#define flush_icache_page(vma,pg) do { } while (0) +#define flush_icache_user_range(vma,pg,adr,len) do { } while (0) + +extern unsigned long pgkern_mask; + +#define __flush_tlb() ({ queue_tlb_flush(); XEN_flush_page_update_queue(); }) +#define __flush_tlb_global() __flush_tlb() +#define __flush_tlb_all() __flush_tlb_global() +#define __flush_tlb_one(addr) ({ queue_invlpg(addr); XEN_flush_page_update_queue(); }) +#define __flush_tlb_single(addr) ({ queue_invlpg(addr); XEN_flush_page_update_queue(); }) + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[1024]; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +#endif /* !__ASSEMBLY__ */ + +/* + * The Linux x86 paging architecture is 'compile-time dual-mode', it + * implements both the traditional 2-level x86 page tables and the + * newer 3-level PAE-mode page tables. + */ +#ifndef __ASSEMBLY__ +#if CONFIG_X86_PAE +# include <asm/pgtable-3level.h> + +/* + * Need to initialise the X86 PAE caches + */ +extern void pgtable_cache_init(void); + +#else +# include <asm/pgtable-2level.h> + +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + +#endif +#endif + +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) +#define FIRST_USER_PGD_NR 0 + +#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) +#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) + +#define TWOLEVEL_PGDIR_SHIFT 22 +#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) +#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) + + +#ifndef __ASSEMBLY__ +/* 4MB is just a nice "safety zone". Also, we align to a fresh pde. */ +#define VMALLOC_OFFSET (4*1024*1024) +extern void * high_memory; +#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \ + ~(VMALLOC_OFFSET-1)) +#define VMALLOC_VMADDR(x) ((unsigned long)(x)) +#if CONFIG_HIGHMEM +# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) +#else +# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) +#endif + +#define _PAGE_BIT_PRESENT 0 +#define _PAGE_BIT_RW 1 +#define _PAGE_BIT_USER 2 +#define _PAGE_BIT_PWT 3 +#define _PAGE_BIT_PCD 4 +#define _PAGE_BIT_ACCESSED 5 +#define _PAGE_BIT_DIRTY 6 +#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */ +#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ + +#define _PAGE_PRESENT 0x001 +#define _PAGE_RW 0x002 +#define _PAGE_USER 0x004 +#define _PAGE_PWT 0x008 +#define _PAGE_PCD 0x010 +#define _PAGE_ACCESSED 0x020 +#define _PAGE_DIRTY 0x040 +#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */ +#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */ + +#define _PAGE_PROTNONE 0x080 /* If not present */ + +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) + +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) + +#define __PAGE_KERNEL \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define __PAGE_KERNEL_NOCACHE \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED) +#define __PAGE_KERNEL_RO \ + (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) + +#if 0 +#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL) +#else +#define MAKE_GLOBAL(x) __pgprot(x) +#endif + +#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL) +#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO) +#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE) + +/* + * The i386 can't do page protection for execute, and considers that + * the same are read. Also, write permissions imply read permissions. + * This is the closest we can get.. + */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY +#define __P101 PAGE_READONLY +#define __P110 PAGE_COPY +#define __P111 PAGE_COPY + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY +#define __S101 PAGE_READONLY +#define __S110 PAGE_SHARED +#define __S111 PAGE_SHARED + +#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) +#define pte_clear(xp) queue_l1_entry_update(xp, 0) + +#define pmd_none(x) (!(x).pmd) +#define pmd_present(x) ((x).pmd & _PAGE_PRESENT) +#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) +#define pmd_bad(x) (((x).pmd & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) + + +#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; } +static inline int pte_exec(pte_t pte) { return (pte).pte_low & _PAGE_USER; } +static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; } +static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; } +static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; } + +static inline pte_t pte_rdprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } +static inline pte_t pte_exprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_USER; return pte; } +static inline pte_t pte_mkclean(pte_t pte) { (pte).pte_low &= ~_PAGE_DIRTY; return pte; } +static inline pte_t pte_mkold(pte_t pte) { (pte).pte_low &= ~_PAGE_ACCESSED; return pte; } +static inline pte_t pte_wrprotect(pte_t pte) { (pte).pte_low &= ~_PAGE_RW; return pte; } +static inline pte_t pte_mkread(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } +static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return pte; } +static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; } +static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; } +static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; } + +static inline int ptep_test_and_clear_dirty(pte_t *ptep) +{ + unsigned long pteval = *(unsigned long *)ptep; + int ret = pteval & _PAGE_DIRTY; + if ( ret ) queue_l1_entry_update(ptep, pteval & ~_PAGE_DIRTY); + return ret; +} +static inline int ptep_test_and_clear_young(pte_t *ptep) +{ + unsigned long pteval = *(unsigned long *)ptep; + int ret = pteval & _PAGE_ACCESSED; + if ( ret ) queue_l1_entry_update(ptep, pteval & ~_PAGE_ACCESSED); + return ret; +} +static inline void ptep_set_wrprotect(pte_t *ptep) +{ + unsigned long pteval = *(unsigned long *)ptep; + if ( (pteval & _PAGE_RW) ) + queue_l1_entry_update(ptep, pteval & ~_PAGE_RW); +} +static inline void ptep_mkdirty(pte_t *ptep) +{ + unsigned long pteval = *(unsigned long *)ptep; + if ( !(pteval & _PAGE_DIRTY) ) + queue_l1_entry_update(ptep, pteval | _PAGE_DIRTY); +} + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ + +#define mk_pte(page, pgprot) __mk_pte((page) - mem_map, (pgprot)) + +/* This takes a physical page address that is used by the remapping functions */ +#define mk_pte_phys(physpage, pgprot) __mk_pte((physpage) >> PAGE_SHIFT, pgprot) + +static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +{ + pte.pte_low &= _PAGE_CHG_MASK; + pte.pte_low |= pgprot_val(newprot); + return pte; +} + +#define page_pte(page) page_pte_prot(page, __pgprot(0)) + +#define pmd_page(pmd) \ +((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) + +/* to find an entry in a page-table-directory. */ +#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) + +#define __pgd_offset(address) pgd_index(address) + +#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) + +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +#define __pmd_offset(address) \ + (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) + +/* Find an entry in the third-level page table.. */ +#define __pte_offset(address) \ + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \ + __pte_offset(address)) + +/* + * The i386 doesn't have any external MMU info: the kernel page + * tables contain all the necessary information. + */ +#define update_mmu_cache(vma,address,pte) do { } while (0) + +/* Encode and de-code a swap entry */ +#define SWP_TYPE(x) (((x).val >> 1) & 0x3f) +#define SWP_OFFSET(x) ((x).val >> 8) +#define SWP_ENTRY(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) +#define pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) +#define swp_entry_to_pte(x) ((pte_t) { (x).val }) + +struct page; +int change_page_attr(struct page *, int, pgprot_t prot); + +static inline void __make_page_readonly(void *va) +{ + pgd_t *pgd = pgd_offset_k((unsigned long)va); + pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pte_t *pte = pte_offset(pmd, (unsigned long)va); + queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW); +} + +static inline void __make_page_writable(void *va) +{ + pgd_t *pgd = pgd_offset_k((unsigned long)va); + pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pte_t *pte = pte_offset(pmd, (unsigned long)va); + queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW); +} + +static inline void make_page_readonly(void *va) +{ + pgd_t *pgd = pgd_offset_k((unsigned long)va); + pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pte_t *pte = pte_offset(pmd, (unsigned long)va); + queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW); + if ( (unsigned long)va >= VMALLOC_START ) + __make_page_readonly(machine_to_virt( + *(unsigned long *)pte&PAGE_MASK)); +} + +static inline void make_page_writable(void *va) +{ + pgd_t *pgd = pgd_offset_k((unsigned long)va); + pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pte_t *pte = pte_offset(pmd, (unsigned long)va); + queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW); + if ( (unsigned long)va >= VMALLOC_START ) + __make_page_writable(machine_to_virt( + *(unsigned long *)pte&PAGE_MASK)); +} + +static inline void make_pages_readonly(void *va, unsigned int nr) +{ + while ( nr-- != 0 ) + { + make_page_readonly(va); + va = (void *)((unsigned long)va + PAGE_SIZE); + } +} + +static inline void make_pages_writable(void *va, unsigned int nr) +{ + while ( nr-- != 0 ) + { + make_page_writable(va); + va = (void *)((unsigned long)va + PAGE_SIZE); + } +} + +static inline unsigned long arbitrary_virt_to_phys(void *va) +{ + pgd_t *pgd = pgd_offset_k((unsigned long)va); + pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pte_t *pte = pte_offset(pmd, (unsigned long)va); + unsigned long pa = (*(unsigned long *)pte) & PAGE_MASK; + return pa | ((unsigned long)va & (PAGE_SIZE-1)); +} + +#endif /* !__ASSEMBLY__ */ + +/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ +#define PageSkip(page) (0) +#define kern_addr_valid(addr) (1) + +#define io_remap_page_range remap_page_range + +#endif /* _I386_PGTABLE_H */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/processor.h b/linux-2.4.28-xen-sparse/include/asm-xen/processor.h new file mode 100644 index 0000000000..9036704e23 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/processor.h @@ -0,0 +1,483 @@ +/* + * include/asm-i386/processor.h + * + * Copyright (C) 1994 Linus Torvalds + */ + +#ifndef __ASM_I386_PROCESSOR_H +#define __ASM_I386_PROCESSOR_H + +#include <asm/vm86.h> +#include <asm/math_emu.h> +#include <asm/segment.h> +#include <asm/page.h> +#include <asm/types.h> +#include <asm/sigcontext.h> +#include <asm/cpufeature.h> +#include <linux/cache.h> +#include <linux/config.h> +#include <linux/threads.h> + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +#define current_text_addr() ({ void *pc; __asm__("movl $1f,%0\n1:":"=g" (pc)); pc; }) + +/* + * CPU type and hardware bug flags. Kept separately for each CPU. + * Members of this structure are referenced in head.S, so think twice + * before touching them. [mj] + */ + +struct cpuinfo_x86 { + __u8 x86; /* CPU family */ + __u8 x86_vendor; /* CPU vendor */ + __u8 x86_model; + __u8 x86_mask; + char wp_works_ok; /* It doesn't on 386's */ + char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ + char hard_math; + char rfu; + int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ + __u32 x86_capability[NCAPINTS]; + char x86_vendor_id[16]; + char x86_model_id[64]; + int x86_cache_size; /* in KB - valid for CPUS which support this + call */ + int fdiv_bug; + int f00f_bug; + int coma_bug; + unsigned long loops_per_jiffy; + unsigned long *pgd_quick; + unsigned long *pmd_quick; + unsigned long *pte_quick; + unsigned long pgtable_cache_sz; +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + +#define X86_VENDOR_INTEL 0 +#define X86_VENDOR_CYRIX 1 +#define X86_VENDOR_AMD 2 +#define X86_VENDOR_UMC 3 +#define X86_VENDOR_NEXGEN 4 +#define X86_VENDOR_CENTAUR 5 +#define X86_VENDOR_RISE 6 +#define X86_VENDOR_TRANSMETA 7 +#define X86_VENDOR_NSC 8 +#define X86_VENDOR_SIS 9 +#define X86_VENDOR_UNKNOWN 0xff + +/* + * capabilities of CPUs + */ + +extern struct cpuinfo_x86 boot_cpu_data; +extern struct tss_struct init_tss[NR_CPUS]; + +#ifdef CONFIG_SMP +extern struct cpuinfo_x86 cpu_data[]; +#define current_cpu_data cpu_data[smp_processor_id()] +#else +#define cpu_data (&boot_cpu_data) +#define current_cpu_data boot_cpu_data +#endif + +extern char ignore_irq13; + +extern void identify_cpu(struct cpuinfo_x86 *); +extern void print_cpu_info(struct cpuinfo_x86 *); + +/* + * EFLAGS bits + */ +#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ +#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ +#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ +#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ +#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ +#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ +#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ +#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ +#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ +#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ +#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ +#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ +#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ +#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ +#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ +#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ +#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ + +/* + * Generic CPUID function + */ +static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) +{ + __asm__("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (op)); +} + +/* + * CPUID functions returning a single datum + */ +static inline unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax; + + __asm__("cpuid" + : "=a" (eax) + : "0" (op) + : "bx", "cx", "dx"); + return eax; +} +static inline unsigned int cpuid_ebx(unsigned int op) +{ + unsigned int eax, ebx; + + __asm__("cpuid" + : "=a" (eax), "=b" (ebx) + : "0" (op) + : "cx", "dx" ); + return ebx; +} +static inline unsigned int cpuid_ecx(unsigned int op) +{ + unsigned int eax, ecx; + + __asm__("cpuid" + : "=a" (eax), "=c" (ecx) + : "0" (op) + : "bx", "dx" ); + return ecx; +} +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, edx; + + __asm__("cpuid" + : "=a" (eax), "=d" (edx) + : "0" (op) + : "bx", "cx"); + return edx; +} + +/* + * Intel CPU features in CR4 + */ +#define X86_CR4_VME 0x0001 /* enable vm86 extensions */ +#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */ +#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */ +#define X86_CR4_DE 0x0008 /* enable debugging extensions */ +#define X86_CR4_PSE 0x0010 /* enable page size extensions */ +#define X86_CR4_PAE 0x0020 /* enable physical address extensions */ +#define X86_CR4_MCE 0x0040 /* Machine check enable */ +#define X86_CR4_PGE 0x0080 /* enable global pages */ +#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */ +#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */ +#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */ + +#define load_cr3(pgdir) \ + asm volatile("movl %0,%%cr3": :"r" (__pa(pgdir))); + +extern unsigned long mmu_cr4_features; + +#include <asm/hypervisor.h> + +static inline void set_in_cr4 (unsigned long mask) +{ + BUG(); +} + +static inline void clear_in_cr4 (unsigned long mask) +{ + BUG(); +} + +/* + * Cyrix CPU configuration register indexes + */ +#define CX86_CCR0 0xc0 +#define CX86_CCR1 0xc1 +#define CX86_CCR2 0xc2 +#define CX86_CCR3 0xc3 +#define CX86_CCR4 0xe8 +#define CX86_CCR5 0xe9 +#define CX86_CCR6 0xea +#define CX86_CCR7 0xeb +#define CX86_DIR0 0xfe +#define CX86_DIR1 0xff +#define CX86_ARR_BASE 0xc4 +#define CX86_RCR_BASE 0xdc + +/* + * Cyrix CPU indexed register access macros + */ + +#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); }) + +#define setCx86(reg, data) do { \ + outb((reg), 0x22); \ + outb((data), 0x23); \ +} while (0) + +/* + * Bus types (default is ISA, but people can check others with these..) + */ +#ifdef CONFIG_EISA +extern int EISA_bus; +#else +#define EISA_bus (0) +#endif +extern int MCA_bus; + +/* from system description table in BIOS. Mostly for MCA use, but +others may find it useful. */ +extern unsigned int machine_id; +extern unsigned int machine_submodel_id; +extern unsigned int BIOS_revision; +extern unsigned int mca_pentium_flag; + +/* + * User space process size: 3GB (default). + */ +#define TASK_SIZE (PAGE_OFFSET) + +/* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE (TASK_SIZE / 3) + +/* + * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. + */ +#define IO_BITMAP_SIZE 32 +#define IO_BITMAP_BYTES (IO_BITMAP_SIZE * 4) +#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap) +#define INVALID_IO_BITMAP_OFFSET 0x8000 + +struct i387_fsave_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + long status; /* software status information */ +}; + +struct i387_fxsave_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; + unsigned short fop; + long fip; + long fcs; + long foo; + long fos; + long mxcsr; + long reserved; + long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + long padding[56]; +} __attribute__ ((aligned (16))); + +struct i387_soft_struct { + long cwd; + long swd; + long twd; + long fip; + long fcs; + long foo; + long fos; + long st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ + unsigned char ftop, changed, lookahead, no_update, rm, alimit; + struct info *info; + unsigned long entry_eip; +}; + +union i387_union { + struct i387_fsave_struct fsave; + struct i387_fxsave_struct fxsave; + struct i387_soft_struct soft; +}; + +typedef struct { + unsigned long seg; +} mm_segment_t; + +struct tss_struct { + unsigned short back_link,__blh; + unsigned long esp0; + unsigned short ss0,__ss0h; + unsigned long esp1; + unsigned short ss1,__ss1h; + unsigned long esp2; + unsigned short ss2,__ss2h; + unsigned long __cr3; + unsigned long eip; + unsigned long eflags; + unsigned long eax,ecx,edx,ebx; + unsigned long esp; + unsigned long ebp; + unsigned long esi; + unsigned long edi; + unsigned short es, __esh; + unsigned short cs, __csh; + unsigned short ss, __ssh; + unsigned short ds, __dsh; + unsigned short fs, __fsh; + unsigned short gs, __gsh; + unsigned short ldt, __ldth; + unsigned short trace, bitmap; + unsigned long io_bitmap[IO_BITMAP_SIZE+1]; + /* + * pads the TSS to be cacheline-aligned (size is 0x100) + */ + unsigned long __cacheline_filler[5]; +}; + +struct thread_struct { + unsigned long esp0; + unsigned long eip; + unsigned long esp; + unsigned long fs; + unsigned long gs; + unsigned int io_pl; +/* Hardware debugging registers */ + unsigned long debugreg[8]; /* %%db0-7 debug registers */ +/* fault info */ + unsigned long cr2, trap_no, error_code; +/* floating point info */ + union i387_union i387; +/* virtual 86 mode info */ + struct vm86_struct * vm86_info; + unsigned long screen_bitmap; + unsigned long v86flags, v86mask, saved_esp0; +}; + +#define INIT_THREAD { sizeof(init_stack) + (long) &init_stack, \ + 0, 0, 0, 0, 0, 0, {0}, 0, 0, 0, {{0}}, 0, 0, 0, 0, 0 } + +#define INIT_TSS { \ + 0,0, /* back_link, __blh */ \ + sizeof(init_stack) + (long) &init_stack, /* esp0 */ \ + __KERNEL_DS, 0, /* ss0 */ \ + 0,0,0,0,0,0, /* stack1, stack2 */ \ + 0, /* cr3 */ \ + 0,0, /* eip,eflags */ \ + 0,0,0,0, /* eax,ecx,edx,ebx */ \ + 0,0,0,0, /* esp,ebp,esi,edi */ \ + 0,0,0,0,0,0, /* es,cs,ss */ \ + 0,0,0,0,0,0, /* ds,fs,gs */ \ + 0,0, /* ldt */ \ + 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ + {~0, } /* ioperm */ \ +} + +#define start_thread(regs, new_eip, new_esp) do { \ + __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ + set_fs(USER_DS); \ + regs->xds = __USER_DS; \ + regs->xes = __USER_DS; \ + regs->xss = __USER_DS; \ + regs->xcs = __USER_CS; \ + regs->eip = new_eip; \ + regs->esp = new_esp; \ +} while (0) + +/* Forward declaration, a strange C thing */ +struct task_struct; +struct mm_struct; + +/* Free all resources held by a thread. */ +extern void release_thread(struct task_struct *); +/* + * create a kernel thread without removing it from tasklists + */ +extern int arch_kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); + +/* Copy and release all segment info associated with a VM + * Unusable due to lack of error handling, use {init_new,destroy}_context + * instead. + */ +static inline void copy_segments(struct task_struct *p, struct mm_struct * mm) { } +static inline void release_segments(struct mm_struct * mm) { } + +/* + * Return saved PC of a blocked thread. + */ +static inline unsigned long thread_saved_pc(struct thread_struct *t) +{ + return ((unsigned long *)t->esp)[3]; +} + +unsigned long get_wchan(struct task_struct *p); +#define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019]) +#define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022]) + +#define THREAD_SIZE (2*PAGE_SIZE) +#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1)) +#define free_task_struct(p) free_pages((unsigned long) (p), 1) +#define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) + +#define init_task (init_task_union.task) +#define init_stack (init_task_union.stack) + +struct microcode { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int reserved[5]; + unsigned int bits[500]; +}; + +/* '6' because it used to be for P6 only (but now covers Pentium 4 as well) */ +#define MICROCODE_IOCFREE _IO('6',0) + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__("rep;nop" ::: "memory"); +} + +#define cpu_relax() rep_nop() + +/* Prefetch instructions for Pentium III and AMD Athlon */ +#if defined(CONFIG_MPENTIUMIII) || defined (CONFIG_MPENTIUM4) + +#define ARCH_HAS_PREFETCH +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetchnta (%0)" : : "r"(x)); +} + +#elif CONFIG_X86_USE_3DNOW + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define ARCH_HAS_SPINLOCK_PREFETCH + +extern inline void prefetch(const void *x) +{ + __asm__ __volatile__ ("prefetch (%0)" : : "r"(x)); +} + +extern inline void prefetchw(const void *x) +{ + __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x)); +} +#define spin_lock_prefetch(x) prefetchw(x) + +#endif + +#endif /* __ASM_I386_PROCESSOR_H */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/queues.h b/linux-2.4.28-xen-sparse/include/asm-xen/queues.h new file mode 100644 index 0000000000..dd527603ce --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/queues.h @@ -0,0 +1,20 @@ + +/* Work-queue emulation over task queues. Pretty simple. */ + +#ifndef __QUEUES_H__ +#define __QUEUES_H__ + +#include <linux/version.h> +#include <linux/list.h> +#include <linux/tqueue.h> + +#define DECLARE_TQUEUE(_name, _fn, _arg) \ + struct tq_struct _name = { LIST_HEAD_INIT((_name).list), 0, _fn, _arg } +#define DECLARE_WORK(_name, _fn, _arg) DECLARE_TQUEUE(_name, _fn, _arg) + +#define work_struct tq_struct +#define INIT_WORK(_work, _fn, _arg) INIT_TQUEUE(_work, _fn, _arg) + +#define schedule_work(_w) schedule_task(_w) + +#endif /* __QUEUES_H__ */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/segment.h b/linux-2.4.28-xen-sparse/include/asm-xen/segment.h new file mode 100644 index 0000000000..5b3a4392f3 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/segment.h @@ -0,0 +1,15 @@ +#ifndef _ASM_SEGMENT_H +#define _ASM_SEGMENT_H + +#ifndef __ASSEMBLY__ +#include <linux/types.h> +#endif +#include <asm-xen/xen-public/xen.h> + +#define __KERNEL_CS FLAT_RING1_CS +#define __KERNEL_DS FLAT_RING1_DS + +#define __USER_CS FLAT_RING3_CS +#define __USER_DS FLAT_RING3_DS + +#endif diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/smp.h b/linux-2.4.28-xen-sparse/include/asm-xen/smp.h new file mode 100644 index 0000000000..025a93a890 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/smp.h @@ -0,0 +1,102 @@ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + +/* + * We need the APIC definitions automatically as part of 'smp.h' + */ +#ifndef __ASSEMBLY__ +#include <linux/config.h> +#include <linux/threads.h> +#include <linux/ptrace.h> +#endif + +#ifdef CONFIG_X86_LOCAL_APIC +#ifndef __ASSEMBLY__ +#include <asm/bitops.h> +#include <asm/mpspec.h> +#ifdef CONFIG_X86_IO_APIC +#include <asm/io_apic.h> +#endif +#include <asm/apic.h> +#endif +#endif + +#ifdef CONFIG_SMP +#ifndef __ASSEMBLY__ + +/* + * Private routines/data + */ + +extern void smp_alloc_memory(void); +extern unsigned long phys_cpu_present_map; +extern unsigned long cpu_online_map; +extern volatile unsigned long smp_invalidate_needed; +extern int pic_mode; +extern int smp_num_siblings; +extern int cpu_sibling_map[]; + +extern void smp_flush_tlb(void); +extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); +extern void fastcall smp_send_reschedule(int cpu); +extern void smp_invalidate_rcv(void); /* Process an NMI */ +extern void (*mtrr_hook) (void); +extern void zap_low_mappings (void); + +/* + * On x86 all CPUs are mapped 1:1 to the APIC space. + * This simplifies scheduling and IPI sending and + * compresses data structures. + */ +static inline int cpu_logical_map(int cpu) +{ + return cpu; +} +static inline int cpu_number_map(int cpu) +{ + return cpu; +} + +/* + * Some lowlevel functions might want to know about + * the real APIC ID <-> CPU # mapping. + */ +#define MAX_APICID 256 +extern volatile int cpu_to_physical_apicid[NR_CPUS]; +extern volatile int physical_apicid_to_cpu[MAX_APICID]; +extern volatile int cpu_to_logical_apicid[NR_CPUS]; +extern volatile int logical_apicid_to_cpu[MAX_APICID]; + +/* + * General functions that each host system must provide. + */ + +extern void smp_boot_cpus(void); +extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */ + +/* + * This function is needed by all SMP systems. It must _always_ be valid + * from the initial startup. We map APIC_BASE very early in page_setup(), + * so this is correct in the x86 case. + */ + +#define smp_processor_id() (current->processor) + +#endif /* !__ASSEMBLY__ */ + +#define NO_PROC_ID 0xFF /* No processor magic marker */ + +/* + * This magic constant controls our willingness to transfer + * a process across CPUs. Such a transfer incurs misses on the L1 + * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My + * gut feeling is this will vary by board in value. For a board + * with separate L2 cache it probably depends also on the RSS, and + * for a board with shared L2 cache it ought to decay fast as other + * processes are run. + */ + +#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */ + +#endif +#endif diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/synch_bitops.h b/linux-2.4.28-xen-sparse/include/asm-xen/synch_bitops.h new file mode 100644 index 0000000000..8093de0ac9 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/synch_bitops.h @@ -0,0 +1,83 @@ +#ifndef __XEN_SYNCH_BITOPS_H__ +#define __XEN_SYNCH_BITOPS_H__ + +/* + * Copyright 1992, Linus Torvalds. + * Heavily modified to provide guaranteed strong synchronisation + * when communicating with Xen or other guest OSes running on other CPUs. + */ + +#include <linux/config.h> + +#define ADDR (*(volatile long *) addr) + +static __inline__ void synch_set_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__ ( + "lock btsl %1,%0" + : "=m" (ADDR) : "Ir" (nr) : "memory" ); +} + +static __inline__ void synch_clear_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__ ( + "lock btrl %1,%0" + : "=m" (ADDR) : "Ir" (nr) : "memory" ); +} + +static __inline__ void synch_change_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__ ( + "lock btcl %1,%0" + : "=m" (ADDR) : "Ir" (nr) : "memory" ); +} + +static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "lock btsl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} + +static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "lock btrl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} + +static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__ ( + "lock btcl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} + +static __inline__ int synch_const_test_bit(int nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & + (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline__ int synch_var_test_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "btl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) ); + return oldbit; +} + +#define synch_test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + synch_const_test_bit((nr),(addr)) : \ + synch_var_test_bit((nr),(addr))) + +#endif /* __XEN_SYNCH_BITOPS_H__ */ diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/system.h b/linux-2.4.28-xen-sparse/include/asm-xen/system.h new file mode 100644 index 0000000000..9be6e291fe --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/system.h @@ -0,0 +1,408 @@ +#ifndef __ASM_SYSTEM_H +#define __ASM_SYSTEM_H + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/bitops.h> +#include <asm/synch_bitops.h> +#include <asm/segment.h> +#include <asm/hypervisor.h> +#include <asm/evtchn.h> + +#ifdef __KERNEL__ + +struct task_struct; +extern void FASTCALL(__switch_to(struct task_struct *prev, + struct task_struct *next)); + +#define prepare_to_switch() \ +do { \ + struct thread_struct *__t = ¤t->thread; \ + __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (*(int *)&__t->fs) ); \ + __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (*(int *)&__t->gs) ); \ +} while (0) +#define switch_to(prev,next,last) do { \ + asm volatile("pushl %%esi\n\t" \ + "pushl %%edi\n\t" \ + "pushl %%ebp\n\t" \ + "movl %%esp,%0\n\t" /* save ESP */ \ + "movl %3,%%esp\n\t" /* restore ESP */ \ + "movl $1f,%1\n\t" /* save EIP */ \ + "pushl %4\n\t" /* restore EIP */ \ + "jmp __switch_to\n" \ + "1:\t" \ + "popl %%ebp\n\t" \ + "popl %%edi\n\t" \ + "popl %%esi\n\t" \ + :"=m" (prev->thread.esp),"=m" (prev->thread.eip), \ + "=b" (last) \ + :"m" (next->thread.esp),"m" (next->thread.eip), \ + "a" (prev), "d" (next), \ + "b" (prev)); \ +} while (0) + +#define _set_base(addr,base) do { unsigned long __pr; \ +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %%dl,%2\n\t" \ + "movb %%dh,%3" \ + :"=&d" (__pr) \ + :"m" (*((addr)+2)), \ + "m" (*((addr)+4)), \ + "m" (*((addr)+7)), \ + "0" (base) \ + ); } while(0) + +#define _set_limit(addr,limit) do { unsigned long __lr; \ +__asm__ __volatile__ ("movw %%dx,%1\n\t" \ + "rorl $16,%%edx\n\t" \ + "movb %2,%%dh\n\t" \ + "andb $0xf0,%%dh\n\t" \ + "orb %%dh,%%dl\n\t" \ + "movb %%dl,%2" \ + :"=&d" (__lr) \ + :"m" (*(addr)), \ + "m" (*((addr)+6)), \ + "0" (limit) \ + ); } while(0) + +#define set_base(ldt,base) _set_base( ((char *)&(ldt)) , (base) ) +#define set_limit(ldt,limit) _set_limit( ((char *)&(ldt)) , ((limit)-1)>>12 ) + +static inline unsigned long _get_base(char * addr) +{ + unsigned long __base; + __asm__("movb %3,%%dh\n\t" + "movb %2,%%dl\n\t" + "shll $16,%%edx\n\t" + "movw %1,%%dx" + :"=&d" (__base) + :"m" (*((addr)+2)), + "m" (*((addr)+4)), + "m" (*((addr)+7))); + return __base; +} + +#define get_base(ldt) _get_base( ((char *)&(ldt)) ) + +/* + * Load a segment. Fall back on loading the zero + * segment if something goes wrong.. + */ +#define loadsegment(seg,value) \ + asm volatile("\n" \ + "1:\t" \ + "movl %0,%%" #seg "\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3:\t" \ + "pushl $0\n\t" \ + "popl %%" #seg "\n\t" \ + "jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n\t" \ + ".align 4\n\t" \ + ".long 1b,3b\n" \ + ".previous" \ + : :"m" (*(unsigned int *)&(value))) + +/* NB. 'clts' is done for us by Xen during virtual trap. */ +#define clts() ((void)0) +#define stts() (HYPERVISOR_fpu_taskswitch()) + +#endif /* __KERNEL__ */ + +static inline unsigned long get_limit(unsigned long segment) +{ + unsigned long __limit; + __asm__("lsll %1,%0" + :"=r" (__limit):"r" (segment)); + return __limit+1; +} + +#define nop() __asm__ __volatile__ ("nop") + +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) + +#define tas(ptr) (xchg((ptr),1)) + +struct __xchg_dummy { unsigned long a[100]; }; +#define __xg(x) ((struct __xchg_dummy *)(x)) + + +/* + * The semantics of XCHGCMP8B are a bit strange, this is why + * there is a loop and the loading of %%eax and %%edx has to + * be inside. This inlines well in most cases, the cached + * cost is around ~38 cycles. (in the future we might want + * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that + * might have an implicit FPU-save as a cost, so it's not + * clear which path to go.) + * + * chmxchg8b must be used with the lock prefix here to allow + * the instruction to be executed atomically, see page 3-102 + * of the instruction set reference 24319102.pdf. We need + * the reader side to see the coherent 64bit value. + */ +static inline void __set_64bit (unsigned long long * ptr, + unsigned int low, unsigned int high) +{ + __asm__ __volatile__ ( + "\n1:\t" + "movl (%0), %%eax\n\t" + "movl 4(%0), %%edx\n\t" + "lock cmpxchg8b (%0)\n\t" + "jnz 1b" + : /* no outputs */ + : "D"(ptr), + "b"(low), + "c"(high) + : "ax","dx","memory"); +} + +static inline void __set_64bit_constant (unsigned long long *ptr, + unsigned long long value) +{ + __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL)); +} +#define ll_low(x) *(((unsigned int*)&(x))+0) +#define ll_high(x) *(((unsigned int*)&(x))+1) + +static inline void __set_64bit_var (unsigned long long *ptr, + unsigned long long value) +{ + __set_64bit(ptr,ll_low(value), ll_high(value)); +} + +#define set_64bit(ptr,value) \ +(__builtin_constant_p(value) ? \ + __set_64bit_constant(ptr, value) : \ + __set_64bit_var(ptr, value) ) + +#define _set_64bit(ptr,value) \ +(__builtin_constant_p(value) ? \ + __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \ + __set_64bit(ptr, ll_low(value), ll_high(value)) ) + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway + * Note 2: xchg has side effect, so that attribute volatile is necessary, + * but generally the primitive is invalid, *ptr is output argument. --ANK + */ +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 4: + __asm__ __volatile__("xchgl %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + } + return x; +} + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +#ifdef CONFIG_X86_CMPXCHG +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("lock cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("lock cmpxchgw %w1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__("lock cmpxchgl %1,%2" + : "=a"(prev) + : "q"(new), "m"(*__xg(ptr)), "0"(old) + : "memory"); + return prev; + } + return old; +} + +#define cmpxchg(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) + +#else +/* Compiling for a 386 proper. Is it worth implementing via cli/sti? */ +#endif + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + * + * For now, "wmb()" doesn't actually do anything, as all + * Intel CPU's follow what Intel calls a *Processor Order*, + * in which all writes are seen in the program order even + * outside the CPU. + * + * I expect future Intel CPU's to have a weaker ordering, + * but I'd also expect them to finally get their act together + * and add some real memory barriers if so. + * + * Some non intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ + +#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#define rmb() mb() + +#ifdef CONFIG_X86_OOSTORE +#define wmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#else +#define wmb() __asm__ __volatile__ ("": : :"memory") +#endif + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define set_mb(var, value) do { xchg(&var, value); } while (0) +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif + +#define set_wmb(var, value) do { var = value; wmb(); } while (0) + +#define safe_halt() ((void)0) + +/* + * The use of 'barrier' in the following reflects their use as local-lock + * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following + * critical operations are executed. All critical operatiosn must complete + * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also + * includes these barriers, for example. + */ + +#define __cli() \ +do { \ + HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \ + barrier(); \ +} while (0) + +#define __sti() \ +do { \ + shared_info_t *_shared = HYPERVISOR_shared_info; \ + barrier(); \ + _shared->vcpu_data[0].evtchn_upcall_mask = 0; \ + barrier(); /* unmask then check (avoid races) */ \ + if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \ + force_evtchn_callback(); \ +} while (0) + +#define __save_flags(x) \ +do { \ + (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \ +} while (0) + +#define __restore_flags(x) \ +do { \ + shared_info_t *_shared = HYPERVISOR_shared_info; \ + barrier(); \ + if ( (_shared->vcpu_data[0].evtchn_upcall_mask = x) == 0 ) { \ + barrier(); /* unmask then check (avoid races) */ \ + if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \ + force_evtchn_callback(); \ + } \ +} while (0) + +#define __save_and_cli(x) \ +do { \ + (x) = HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask; \ + HYPERVISOR_shared_info->vcpu_data[0].evtchn_upcall_mask = 1; \ + barrier(); \ +} while (0) + +#define __save_and_sti(x) \ +do { \ + shared_info_t *_shared = HYPERVISOR_shared_info; \ + barrier(); \ + (x) = _shared->vcpu_data[0].evtchn_upcall_mask; \ + _shared->vcpu_data[0].evtchn_upcall_mask = 0; \ + barrier(); /* unmask then check (avoid races) */ \ + if ( unlikely(_shared->vcpu_data[0].evtchn_upcall_pending) ) \ + force_evtchn_callback(); \ +} while (0) + +#define local_irq_save(x) __save_and_cli(x) +#define local_irq_set(x) __save_and_sti(x) +#define local_irq_restore(x) __restore_flags(x) +#define local_irq_disable() __cli() +#define local_irq_enable() __sti() + + +#ifdef CONFIG_SMP +#error no SMP +extern void __global_cli(void); +extern void __global_sti(void); +extern unsigned long __global_save_flags(void); +extern void __global_restore_flags(unsigned long); +#define cli() __global_cli() +#define sti() __global_sti() +#define save_flags(x) ((x)=__global_save_flags()) +#define restore_flags(x) __global_restore_flags(x) +#define save_and_cli(x) do { save_flags(x); cli(); } while(0); +#define save_and_sti(x) do { save_flags(x); sti(); } while(0); + +#else + +#define cli() __cli() +#define sti() __sti() +#define save_flags(x) __save_flags(x) +#define restore_flags(x) __restore_flags(x) +#define save_and_cli(x) __save_and_cli(x) +#define save_and_sti(x) __save_and_sti(x) + +#endif + +/* + * disable hlt during certain critical i/o operations + */ +#define HAVE_DISABLE_HLT +void disable_hlt(void); +void enable_hlt(void); + +extern unsigned long dmi_broken; +extern int is_sony_vaio_laptop; + +#define BROKEN_ACPI_Sx 0x0001 +#define BROKEN_INIT_AFTER_S1 0x0002 +#define BROKEN_PNP_BIOS 0x0004 + +#endif diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/vga.h b/linux-2.4.28-xen-sparse/include/asm-xen/vga.h new file mode 100644 index 0000000000..d0624cf480 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/vga.h @@ -0,0 +1,42 @@ +/* + * Access to VGA videoram + * + * (c) 1998 Martin Mares <mj@ucw.cz> + */ + +#ifndef _LINUX_ASM_VGA_H_ +#define _LINUX_ASM_VGA_H_ + +#include <asm/io.h> + +extern unsigned char *vgacon_mmap; + +static unsigned long VGA_MAP_MEM(unsigned long x) +{ + if( vgacon_mmap == NULL ) + { + /* This is our first time in this function. This whole thing + is a rather grim hack. We know we're going to get asked + to map a 32KB region between 0xb0000 and 0xb8000 because + that's what VGAs are. We used the boot time permanent + fixed map region, and map it to machine pages. + */ + if( x != 0xb8000 ) + panic("Argghh! VGA Console is weird. 1:%08lx\n",x); + + vgacon_mmap = (unsigned char*) bt_ioremap( 0xa0000, 128*1024 ); + return (unsigned long) (vgacon_mmap+x-0xa0000); + } + else + { + if( x != 0xc0000 && x != 0xa0000 ) /* vidmem_end or charmap fonts */ + panic("Argghh! VGA Console is weird. 2:%08lx\n",x); + return (unsigned long) (vgacon_mmap+x-0xa0000); + } + return 0; +} + +static inline unsigned char vga_readb(unsigned char * x) { return (*(x)); } +static inline void vga_writeb(unsigned char x, unsigned char *y) { *(y) = (x); } + +#endif diff --git a/linux-2.4.28-xen-sparse/include/asm-xen/xor.h b/linux-2.4.28-xen-sparse/include/asm-xen/xor.h new file mode 100644 index 0000000000..9e6cca8a8a --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/asm-xen/xor.h @@ -0,0 +1,879 @@ +/* + * include/asm-i386/xor.h + * + * Optimized RAID-5 checksumming functions for MMX and SSE. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * High-speed RAID5 checksumming functions utilizing MMX instructions. + * Copyright (C) 1998 Ingo Molnar. + */ + +#define FPU_SAVE \ + do { \ + if (!(current->flags & PF_USEDFPU)) \ + clts(); \ + __asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0])); \ + } while (0) + +#define FPU_RESTORE \ + do { \ + __asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0])); \ + if (!(current->flags & PF_USEDFPU)) \ + stts(); \ + } while (0) + +#define LD(x,y) " movq 8*("#x")(%1), %%mm"#y" ;\n" +#define ST(x,y) " movq %%mm"#y", 8*("#x")(%1) ;\n" +#define XO1(x,y) " pxor 8*("#x")(%2), %%mm"#y" ;\n" +#define XO2(x,y) " pxor 8*("#x")(%3), %%mm"#y" ;\n" +#define XO3(x,y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" +#define XO4(x,y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" + + +static void +xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + ST(i,0) \ + XO1(i+1,1) \ + ST(i+1,1) \ + XO1(i+2,2) \ + ST(i+2,2) \ + XO1(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2) + : + : "memory"); + + FPU_RESTORE; +} + +static void +xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + ST(i,0) \ + XO2(i+1,1) \ + ST(i+1,1) \ + XO2(i+2,2) \ + ST(i+2,2) \ + XO2(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : + : "memory"); + + FPU_RESTORE; +} + +static void +xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + XO3(i,0) \ + ST(i,0) \ + XO3(i+1,1) \ + ST(i+1,1) \ + XO3(i+2,2) \ + ST(i+2,2) \ + XO3(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " addl $128, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) + : + : "memory"); + + FPU_RESTORE; +} + + +static void +xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + /* need to save/restore p4/p5 manually otherwise gcc's 10 argument + limit gets exceeded (+ counts as two arguments) */ + __asm__ __volatile__ ( + " pushl %4\n" + " pushl %5\n" +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + XO3(i,0) \ + XO3(i+1,1) \ + XO3(i+2,2) \ + XO3(i+3,3) \ + XO4(i,0) \ + ST(i,0) \ + XO4(i+1,1) \ + ST(i+1,1) \ + XO4(i+2,2) \ + ST(i+2,2) \ + XO4(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " addl $128, %4 ;\n" + " addl $128, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + " popl %5\n" + " popl %4\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : "r" (p4), "r" (p5) + : "memory"); + + FPU_RESTORE; +} + +#undef LD +#undef XO1 +#undef XO2 +#undef XO3 +#undef XO4 +#undef ST +#undef BLOCK + +static void +xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 8(%2), %%mm1 ;\n" + " movq 24(%1), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " pxor 16(%2), %%mm2 ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq 40(%1), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%2), %%mm4 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq 56(%1), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%2), %%mm6 ;\n" + " pxor 56(%2), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2) + : + : "memory"); + + FPU_RESTORE; +} + +static void +xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " movq 24(%1), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 32(%2), %%mm4 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " movq 56(%1), %%mm7 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 56(%2), %%mm7 ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : + : "memory" ); + + FPU_RESTORE; +} + +static void +xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor (%4), %%mm0 ;\n" + " movq 24(%1), %%mm3 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " pxor 8(%4), %%mm1 ;\n" + " movq %%mm0, (%1) ;\n" + " movq 32(%1), %%mm4 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " pxor 16(%4), %%mm2 ;\n" + " movq %%mm1, 8(%1) ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 32(%2), %%mm4 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 40(%2), %%mm5 ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 24(%4), %%mm3 ;\n" + " movq %%mm3, 24(%1) ;\n" + " movq 56(%1), %%mm7 ;\n" + " movq 48(%1), %%mm6 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " pxor 32(%4), %%mm4 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 56(%2), %%mm7 ;\n" + " pxor 40(%4), %%mm5 ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%4), %%mm6 ;\n" + " pxor 56(%4), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " addl $64, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) + : + : "memory"); + + FPU_RESTORE; +} + +static void +xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + /* need to save p4/p5 manually to not exceed gcc's 10 argument limit */ + __asm__ __volatile__ ( + " pushl %4\n" + " pushl %5\n" + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " pxor (%4), %%mm0 ;\n" + " pxor 8(%4), %%mm1 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " movq 24(%1), %%mm3 ;\n" + " pxor (%5), %%mm0 ;\n" + " pxor 8(%5), %%mm1 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 16(%4), %%mm2 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " pxor 16(%5), %%mm2 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 24(%4), %%mm3 ;\n" + " pxor 32(%2), %%mm4 ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 24(%5), %%mm3 ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%4), %%mm4 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq 56(%1), %%mm7 ;\n" + " pxor 32(%5), %%mm4 ;\n" + " pxor 40(%4), %%mm5 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " pxor 56(%2), %%mm7 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " pxor 40(%5), %%mm5 ;\n" + " pxor 48(%4), %%mm6 ;\n" + " pxor 56(%4), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%5), %%mm6 ;\n" + " pxor 56(%5), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " addl $64, %4 ;\n" + " addl $64, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + " popl %5\n" + " popl %4\n" + : "+g" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : "r" (p4), "r" (p5) + : "memory"); + + FPU_RESTORE; +} + +static struct xor_block_template xor_block_pII_mmx = { + name: "pII_mmx", + do_2: xor_pII_mmx_2, + do_3: xor_pII_mmx_3, + do_4: xor_pII_mmx_4, + do_5: xor_pII_mmx_5, +}; + +static struct xor_block_template xor_block_p5_mmx = { + name: "p5_mmx", + do_2: xor_p5_mmx_2, + do_3: xor_p5_mmx_3, + do_4: xor_p5_mmx_4, + do_5: xor_p5_mmx_5, +}; + +#undef FPU_SAVE +#undef FPU_RESTORE + +/* + * Cache avoiding checksumming functions utilizing KNI instructions + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) + */ + +#define XMMS_SAVE \ + if (!(current->flags & PF_USEDFPU)) \ + clts(); \ + __asm__ __volatile__ ( \ + "movups %%xmm0,(%1) ;\n\t" \ + "movups %%xmm1,0x10(%1) ;\n\t" \ + "movups %%xmm2,0x20(%1) ;\n\t" \ + "movups %%xmm3,0x30(%1) ;\n\t" \ + : "=&r" (cr0) \ + : "r" (xmm_save) \ + : "memory") + +#define XMMS_RESTORE \ + __asm__ __volatile__ ( \ + "sfence ;\n\t" \ + "movups (%1),%%xmm0 ;\n\t" \ + "movups 0x10(%1),%%xmm1 ;\n\t" \ + "movups 0x20(%1),%%xmm2 ;\n\t" \ + "movups 0x30(%1),%%xmm3 ;\n\t" \ + : \ + : "r" (cr0), "r" (xmm_save) \ + : "memory"); \ + if (!(current->flags & PF_USEDFPU)) \ + stts() + +#define ALIGN16 __attribute__((aligned(16))) + +#define OFFS(x) "16*("#x")" +#define PF_OFFS(x) "256+16*("#x")" +#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" +#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" +#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" +#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n" +#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n" +#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n" +#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n" +#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n" +#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" +#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" +#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" +#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" +#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" + + +static void +xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4] ALIGN16; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + PF1(i) \ + PF1(i+2) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF0(i+4) \ + PF0(i+6) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2) + : + : "memory"); + + XMMS_RESTORE; +} + +static void +xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4] ALIGN16; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i+2) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF2(i) \ + PF2(i+2) \ + PF0(i+4) \ + PF0(i+6) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r"(p2), "+r"(p3) + : + : "memory" ); + + XMMS_RESTORE; +} + +static void +xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4] ALIGN16; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i+2) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF2(i) \ + PF2(i+2) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + PF3(i) \ + PF3(i+2) \ + PF0(i+4) \ + PF0(i+6) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + XO3(i,0) \ + XO3(i+1,1) \ + XO3(i+2,2) \ + XO3(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " addl $256, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) + : + : "memory" ); + + XMMS_RESTORE; +} + +static void +xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4] ALIGN16; + int cr0; + + XMMS_SAVE; + + /* need to save p4/p5 manually to not exceed gcc's 10 argument limit */ + __asm__ __volatile__ ( + " pushl %4\n" + " pushl %5\n" +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i+2) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF2(i) \ + PF2(i+2) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + PF3(i) \ + PF3(i+2) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + PF4(i) \ + PF4(i+2) \ + PF0(i+4) \ + PF0(i+6) \ + XO3(i,0) \ + XO3(i+1,1) \ + XO3(i+2,2) \ + XO3(i+3,3) \ + XO4(i,0) \ + XO4(i+1,1) \ + XO4(i+2,2) \ + XO4(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " addl $256, %4 ;\n" + " addl $256, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + " popl %5\n" + " popl %4\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : "r" (p4), "r" (p5) + : "memory"); + + XMMS_RESTORE; +} + +static struct xor_block_template xor_block_pIII_sse = { + name: "pIII_sse", + do_2: xor_sse_2, + do_3: xor_sse_3, + do_4: xor_sse_4, + do_5: xor_sse_5, +}; + +/* Also try the generic routines. */ +#include <asm-generic/xor.h> + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_32regs); \ + if (cpu_has_xmm) \ + xor_speed(&xor_block_pIII_sse); \ + if (md_cpu_has_mmx()) { \ + xor_speed(&xor_block_pII_mmx); \ + xor_speed(&xor_block_p5_mmx); \ + } \ + } while (0) + +/* We force the use of the SSE xor block because it can write around L2. + We may also be able to load into the L1 only depending on how the cpu + deals with a load to a line that is being prefetched. */ +#define XOR_SELECT_TEMPLATE(FASTEST) \ + (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) diff --git a/linux-2.4.28-xen-sparse/include/linux/blk.h b/linux-2.4.28-xen-sparse/include/linux/blk.h new file mode 100644 index 0000000000..d032dff7ba --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/linux/blk.h @@ -0,0 +1,409 @@ +#ifndef _BLK_H +#define _BLK_H + +#include <linux/blkdev.h> +#include <linux/locks.h> +#include <linux/config.h> +#include <linux/spinlock.h> + +/* + * Spinlock for protecting the request queue which + * is mucked around with in interrupts on potentially + * multiple CPU's.. + */ +extern spinlock_t io_request_lock; + +/* + * Initialization functions. + */ +extern int isp16_init(void); +extern int cdu31a_init(void); +extern int acsi_init(void); +extern int mcd_init(void); +extern int mcdx_init(void); +extern int sbpcd_init(void); +extern int aztcd_init(void); +extern int sony535_init(void); +extern int gscd_init(void); +extern int cm206_init(void); +extern int optcd_init(void); +extern int sjcd_init(void); +extern int cdi_init(void); +extern int hd_init(void); +extern int ide_init(void); +extern int xd_init(void); +extern int mfm_init(void); +extern int loop_init(void); +extern int md_init(void); +extern int ap_init(void); +extern int ddv_init(void); +extern int z2_init(void); +extern int swim3_init(void); +extern int swimiop_init(void); +extern int amiga_floppy_init(void); +extern int atari_floppy_init(void); +extern int ez_init(void); +extern int bpcd_init(void); +extern int ps2esdi_init(void); +extern int jsfd_init(void); +extern int viodasd_init(void); +extern int viocd_init(void); + +#if defined(CONFIG_ARCH_S390) +extern int dasd_init(void); +extern int xpram_init(void); +extern int tapeblock_init(void); +#endif /* CONFIG_ARCH_S390 */ + +#if defined(CONFIG_XEN) +extern int xlblk_init(void); +#endif /* CONFIG_XEN */ + +extern void set_device_ro(kdev_t dev,int flag); +void add_blkdev_randomness(int major); + +extern int floppy_init(void); +extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */ +extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */ +extern int rd_image_start; /* starting block # of image */ + +#ifdef CONFIG_BLK_DEV_INITRD + +#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */ + +extern unsigned long initrd_start,initrd_end; +extern int initrd_below_start_ok; /* 1 if it is not an error if initrd_start < memory_start */ +void initrd_init(void); + +#endif + + +/* + * end_request() and friends. Must be called with the request queue spinlock + * acquired. All functions called within end_request() _must_be_ atomic. + * + * Several drivers define their own end_request and call + * end_that_request_first() and end_that_request_last() + * for parts of the original function. This prevents + * code duplication in drivers. + */ + +static inline void blkdev_dequeue_request(struct request * req) +{ + list_del(&req->queue); +} + +int end_that_request_first(struct request *req, int uptodate, char *name); +void end_that_request_last(struct request *req); + +#if defined(MAJOR_NR) || defined(IDE_DRIVER) + +#undef DEVICE_ON +#undef DEVICE_OFF + +/* + * Add entries as needed. + */ + +#ifdef IDE_DRIVER + +#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS) +#define DEVICE_NAME "ide" + +#elif (MAJOR_NR == RAMDISK_MAJOR) + +/* ram disk */ +#define DEVICE_NAME "ramdisk" +#define DEVICE_NR(device) (MINOR(device)) +#define DEVICE_NO_RANDOM + +#elif (MAJOR_NR == Z2RAM_MAJOR) + +/* Zorro II Ram */ +#define DEVICE_NAME "Z2RAM" +#define DEVICE_REQUEST do_z2_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == FLOPPY_MAJOR) + +static void floppy_off(unsigned int nr); + +#define DEVICE_NAME "floppy" +#define DEVICE_INTR do_floppy +#define DEVICE_REQUEST do_fd_request +#define DEVICE_NR(device) ( (MINOR(device) & 3) | ((MINOR(device) & 0x80 ) >> 5 )) +#define DEVICE_OFF(device) floppy_off(DEVICE_NR(device)) + +#elif (MAJOR_NR == HD_MAJOR) + +/* Hard disk: timeout is 6 seconds. */ +#define DEVICE_NAME "hard disk" +#define DEVICE_INTR do_hd +#define TIMEOUT_VALUE (6*HZ) +#define DEVICE_REQUEST do_hd_request +#define DEVICE_NR(device) (MINOR(device)>>6) + +#elif (SCSI_DISK_MAJOR(MAJOR_NR)) + +#define DEVICE_NAME "scsidisk" +#define TIMEOUT_VALUE (2*HZ) +#define DEVICE_NR(device) (((MAJOR(device) & SD_MAJOR_MASK) << (8 - 4)) + (MINOR(device) >> 4)) + +/* Kludge to use the same number for both char and block major numbers */ +#elif (MAJOR_NR == MD_MAJOR) && defined(MD_DRIVER) + +#define DEVICE_NAME "Multiple devices driver" +#define DEVICE_REQUEST do_md_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == SCSI_TAPE_MAJOR) + +#define DEVICE_NAME "scsitape" +#define DEVICE_INTR do_st +#define DEVICE_NR(device) (MINOR(device) & 0x7f) + +#elif (MAJOR_NR == OSST_MAJOR) + +#define DEVICE_NAME "onstream" +#define DEVICE_INTR do_osst +#define DEVICE_NR(device) (MINOR(device) & 0x7f) +#define DEVICE_ON(device) +#define DEVICE_OFF(device) + +#elif (MAJOR_NR == SCSI_CDROM_MAJOR) + +#define DEVICE_NAME "CD-ROM" +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == XT_DISK_MAJOR) + +#define DEVICE_NAME "xt disk" +#define DEVICE_REQUEST do_xd_request +#define DEVICE_NR(device) (MINOR(device) >> 6) + +#elif (MAJOR_NR == PS2ESDI_MAJOR) + +#define DEVICE_NAME "PS/2 ESDI" +#define DEVICE_REQUEST do_ps2esdi_request +#define DEVICE_NR(device) (MINOR(device) >> 6) + +#elif (MAJOR_NR == CDU31A_CDROM_MAJOR) + +#define DEVICE_NAME "CDU31A" +#define DEVICE_REQUEST do_cdu31a_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == ACSI_MAJOR) && (defined(CONFIG_ATARI_ACSI) || defined(CONFIG_ATARI_ACSI_MODULE)) + +#define DEVICE_NAME "ACSI" +#define DEVICE_INTR do_acsi +#define DEVICE_REQUEST do_acsi_request +#define DEVICE_NR(device) (MINOR(device) >> 4) + +#elif (MAJOR_NR == MITSUMI_CDROM_MAJOR) + +#define DEVICE_NAME "Mitsumi CD-ROM" +/* #define DEVICE_INTR do_mcd */ +#define DEVICE_REQUEST do_mcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MITSUMI_X_CDROM_MAJOR) + +#define DEVICE_NAME "Mitsumi CD-ROM" +/* #define DEVICE_INTR do_mcdx */ +#define DEVICE_REQUEST do_mcdx_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MATSUSHITA_CDROM_MAJOR) + +#define DEVICE_NAME "Matsushita CD-ROM controller #1" +#define DEVICE_REQUEST do_sbpcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MATSUSHITA_CDROM2_MAJOR) + +#define DEVICE_NAME "Matsushita CD-ROM controller #2" +#define DEVICE_REQUEST do_sbpcd2_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MATSUSHITA_CDROM3_MAJOR) + +#define DEVICE_NAME "Matsushita CD-ROM controller #3" +#define DEVICE_REQUEST do_sbpcd3_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MATSUSHITA_CDROM4_MAJOR) + +#define DEVICE_NAME "Matsushita CD-ROM controller #4" +#define DEVICE_REQUEST do_sbpcd4_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == AZTECH_CDROM_MAJOR) + +#define DEVICE_NAME "Aztech CD-ROM" +#define DEVICE_REQUEST do_aztcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == CDU535_CDROM_MAJOR) + +#define DEVICE_NAME "SONY-CDU535" +#define DEVICE_INTR do_cdu535 +#define DEVICE_REQUEST do_cdu535_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == GOLDSTAR_CDROM_MAJOR) + +#define DEVICE_NAME "Goldstar R420" +#define DEVICE_REQUEST do_gscd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == CM206_CDROM_MAJOR) +#define DEVICE_NAME "Philips/LMS CD-ROM cm206" +#define DEVICE_REQUEST do_cm206_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == OPTICS_CDROM_MAJOR) + +#define DEVICE_NAME "DOLPHIN 8000AT CD-ROM" +#define DEVICE_REQUEST do_optcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == SANYO_CDROM_MAJOR) + +#define DEVICE_NAME "Sanyo H94A CD-ROM" +#define DEVICE_REQUEST do_sjcd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == APBLOCK_MAJOR) + +#define DEVICE_NAME "apblock" +#define DEVICE_REQUEST ap_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == DDV_MAJOR) + +#define DEVICE_NAME "ddv" +#define DEVICE_REQUEST ddv_request +#define DEVICE_NR(device) (MINOR(device)>>PARTN_BITS) + +#elif (MAJOR_NR == MFM_ACORN_MAJOR) + +#define DEVICE_NAME "mfm disk" +#define DEVICE_INTR do_mfm +#define DEVICE_REQUEST do_mfm_request +#define DEVICE_NR(device) (MINOR(device) >> 6) + +#elif (MAJOR_NR == NBD_MAJOR) + +#define DEVICE_NAME "nbd" +#define DEVICE_REQUEST do_nbd_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == MDISK_MAJOR) + +#define DEVICE_NAME "mdisk" +#define DEVICE_REQUEST mdisk_request +#define DEVICE_NR(device) (MINOR(device)) + +#elif (MAJOR_NR == DASD_MAJOR) + +#define DEVICE_NAME "dasd" +#define DEVICE_REQUEST do_dasd_request +#define DEVICE_NR(device) (MINOR(device) >> PARTN_BITS) + +#elif (MAJOR_NR == I2O_MAJOR) + +#define DEVICE_NAME "I2O block" +#define DEVICE_REQUEST i2ob_request +#define DEVICE_NR(device) (MINOR(device)>>4) + +#elif (MAJOR_NR == COMPAQ_SMART2_MAJOR) + +#define DEVICE_NAME "ida" +#define TIMEOUT_VALUE (25*HZ) +#define DEVICE_REQUEST do_ida_request +#define DEVICE_NR(device) (MINOR(device) >> 4) + +#endif /* MAJOR_NR == whatever */ + +/* provide DEVICE_xxx defaults, if not explicitly defined + * above in the MAJOR_NR==xxx if-elif tree */ +#ifndef DEVICE_ON +#define DEVICE_ON(device) do {} while (0) +#endif +#ifndef DEVICE_OFF +#define DEVICE_OFF(device) do {} while (0) +#endif + +#if (MAJOR_NR != SCSI_TAPE_MAJOR) && (MAJOR_NR != OSST_MAJOR) +#if !defined(IDE_DRIVER) + +#ifndef CURRENT +#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head) +#endif +#ifndef QUEUE_EMPTY +#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head) +#endif + +#ifndef DEVICE_NAME +#define DEVICE_NAME "unknown" +#endif + +#define CURRENT_DEV DEVICE_NR(CURRENT->rq_dev) + +#ifdef DEVICE_INTR +static void (*DEVICE_INTR)(void) = NULL; +#endif + +#define SET_INTR(x) (DEVICE_INTR = (x)) + +#ifdef DEVICE_REQUEST +static void (DEVICE_REQUEST)(request_queue_t *); +#endif + +#ifdef DEVICE_INTR +#define CLEAR_INTR SET_INTR(NULL) +#else +#define CLEAR_INTR +#endif + +#define INIT_REQUEST \ + if (QUEUE_EMPTY) {\ + CLEAR_INTR; \ + return; \ + } \ + if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \ + panic(DEVICE_NAME ": request list destroyed"); \ + if (CURRENT->bh) { \ + if (!buffer_locked(CURRENT->bh)) \ + panic(DEVICE_NAME ": block not locked"); \ + } + +#endif /* !defined(IDE_DRIVER) */ + + +#ifndef LOCAL_END_REQUEST /* If we have our own end_request, we do not want to include this mess */ + +#if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR) + +static inline void end_request(int uptodate) { + struct request *req = CURRENT; + + if (end_that_request_first(req, uptodate, DEVICE_NAME)) + return; + +#ifndef DEVICE_NO_RANDOM + add_blkdev_randomness(MAJOR(req->rq_dev)); +#endif + DEVICE_OFF(req->rq_dev); + blkdev_dequeue_request(req); + end_that_request_last(req); +} + +#endif /* ! SCSI_BLK_MAJOR(MAJOR_NR) */ +#endif /* LOCAL_END_REQUEST */ + +#endif /* (MAJOR_NR != SCSI_TAPE_MAJOR) */ +#endif /* defined(MAJOR_NR) || defined(IDE_DRIVER) */ + +#endif /* _BLK_H */ diff --git a/linux-2.4.28-xen-sparse/include/linux/mm.h b/linux-2.4.28-xen-sparse/include/linux/mm.h new file mode 100644 index 0000000000..7efbd0adf0 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/linux/mm.h @@ -0,0 +1,707 @@ +#ifndef _LINUX_MM_H +#define _LINUX_MM_H + +#include <linux/sched.h> +#include <linux/errno.h> + +#ifdef __KERNEL__ + +#include <linux/config.h> +#include <linux/string.h> +#include <linux/list.h> +#include <linux/mmzone.h> +#include <linux/swap.h> +#include <linux/rbtree.h> + +extern unsigned long max_mapnr; +extern unsigned long num_physpages; +extern unsigned long num_mappedpages; +extern void * high_memory; +extern int page_cluster; +/* The inactive_clean lists are per zone. */ +extern struct list_head active_list; +extern struct list_head inactive_list; + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/atomic.h> + +/* + * Linux kernel virtual memory manager primitives. + * The idea being to have a "virtual" mm in the same way + * we have a virtual fs - giving a cleaner interface to the + * mm details, and allowing different kinds of memory mappings + * (from shared memory to executable loading to arbitrary + * mmap() functions). + */ + +/* + * This struct defines a memory VMM memory area. There is one of these + * per VM-area/task. A VM area is any part of the process virtual memory + * space that has a special rule for the page-fault handlers (ie a shared + * library, the executable area etc). + */ +struct vm_area_struct { + struct mm_struct * vm_mm; /* The address space we belong to. */ + unsigned long vm_start; /* Our start address within vm_mm. */ + unsigned long vm_end; /* The first byte after our end address + within vm_mm. */ + + /* linked list of VM areas per task, sorted by address */ + struct vm_area_struct *vm_next; + + pgprot_t vm_page_prot; /* Access permissions of this VMA. */ + unsigned long vm_flags; /* Flags, listed below. */ + + rb_node_t vm_rb; + + /* + * For areas with an address space and backing store, + * one of the address_space->i_mmap{,shared} lists, + * for shm areas, the list of attaches, otherwise unused. + */ + struct vm_area_struct *vm_next_share; + struct vm_area_struct **vm_pprev_share; + + /* Function pointers to deal with this struct. */ + struct vm_operations_struct * vm_ops; + + /* Information about our backing store: */ + unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE + units, *not* PAGE_CACHE_SIZE */ + struct file * vm_file; /* File we map to (can be NULL). */ + unsigned long vm_raend; /* XXX: put full readahead info here. */ + void * vm_private_data; /* was vm_pte (shared mem) */ +}; + +/* + * vm_flags.. + */ +#define VM_READ 0x00000001 /* currently active flags */ +#define VM_WRITE 0x00000002 +#define VM_EXEC 0x00000004 +#define VM_SHARED 0x00000008 + +#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */ +#define VM_MAYWRITE 0x00000020 +#define VM_MAYEXEC 0x00000040 +#define VM_MAYSHARE 0x00000080 + +#define VM_GROWSDOWN 0x00000100 /* general info on the segment */ +#define VM_GROWSUP 0x00000200 +#define VM_SHM 0x00000400 /* shared memory area, don't swap out */ +#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ + +#define VM_EXECUTABLE 0x00001000 +#define VM_LOCKED 0x00002000 +#define VM_IO 0x00004000 /* Memory mapped I/O or similar */ + + /* Used by sys_madvise() */ +#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ +#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ + +#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ +#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ +#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */ + +#ifndef VM_STACK_FLAGS +#define VM_STACK_FLAGS 0x00000177 +#endif + +#define VM_READHINTMASK (VM_SEQ_READ | VM_RAND_READ) +#define VM_ClearReadHint(v) (v)->vm_flags &= ~VM_READHINTMASK +#define VM_NormalReadHint(v) (!((v)->vm_flags & VM_READHINTMASK)) +#define VM_SequentialReadHint(v) ((v)->vm_flags & VM_SEQ_READ) +#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) + +/* read ahead limits */ +extern int vm_min_readahead; +extern int vm_max_readahead; + +/* + * mapping from the currently active vm_flags protection bits (the + * low four bits) to a page protection mask.. + */ +extern pgprot_t protection_map[16]; + + +/* + * These are the virtual MM functions - opening of an area, closing and + * unmapping it (needed to keep files on disk up-to-date etc), pointer + * to the functions called when a no-page or a wp-page exception occurs. + */ +struct vm_operations_struct { + void (*open)(struct vm_area_struct * area); + void (*close)(struct vm_area_struct * area); + struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int unused); +}; + +/* + * Each physical page in the system has a struct page associated with + * it to keep track of whatever it is we are using the page for at the + * moment. Note that we have no way to track which tasks are using + * a page. + * + * Try to keep the most commonly accessed fields in single cache lines + * here (16 bytes or greater). This ordering should be particularly + * beneficial on 32-bit processors. + * + * The first line is data used in page cache lookup, the second line + * is used for linear searches (eg. clock algorithm scans). + * + * TODO: make this structure smaller, it could be as small as 32 bytes. + */ +typedef struct page { + struct list_head list; /* ->mapping has some page lists. */ + struct address_space *mapping; /* The inode (or ...) we belong to. */ + unsigned long index; /* Our offset within mapping. */ + struct page *next_hash; /* Next page sharing our hash bucket in + the pagecache hash table. */ + atomic_t count; /* Usage count, see below. */ + unsigned long flags; /* atomic flags, some possibly + updated asynchronously */ + struct list_head lru; /* Pageout list, eg. active_list; + protected by pagemap_lru_lock !! */ + struct page **pprev_hash; /* Complement to *next_hash. */ + struct buffer_head * buffers; /* Buffer maps us to a disk block. */ + + /* + * On machines where all RAM is mapped into kernel address space, + * we can simply calculate the virtual address. On machines with + * highmem some memory is mapped into kernel virtual memory + * dynamically, so we need a place to store that address. + * Note that this field could be 16 bits on x86 ... ;) + * + * Architectures with slow multiplication can define + * WANT_PAGE_VIRTUAL in asm/page.h + */ +#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL) + void *virtual; /* Kernel virtual address (NULL if + not kmapped, ie. highmem) */ +#endif /* CONFIG_HIGMEM || WANT_PAGE_VIRTUAL */ +} mem_map_t; + +/* + * Methods to modify the page usage count. + * + * What counts for a page usage: + * - cache mapping (page->mapping) + * - disk mapping (page->buffers) + * - page mapped in a task's page tables, each mapping + * is counted separately + * + * Also, many kernel routines increase the page count before a critical + * routine so they can be sure the page doesn't go away from under them. + */ +#define get_page(p) atomic_inc(&(p)->count) +#define put_page(p) __free_page(p) +#define put_page_testzero(p) atomic_dec_and_test(&(p)->count) +#define page_count(p) atomic_read(&(p)->count) +#define set_page_count(p,v) atomic_set(&(p)->count, v) + +/* + * Various page->flags bits: + * + * PG_reserved is set for special pages, which can never be swapped + * out. Some of them might not even exist (eg empty_bad_page)... + * + * Multiple processes may "see" the same page. E.g. for untouched + * mappings of /dev/null, all processes see the same page full of + * zeroes, and text pages of executables and shared libraries have + * only one copy in memory, at most, normally. + * + * For the non-reserved pages, page->count denotes a reference count. + * page->count == 0 means the page is free. + * page->count == 1 means the page is used for exactly one purpose + * (e.g. a private data page of one process). + * + * A page may be used for kmalloc() or anyone else who does a + * __get_free_page(). In this case the page->count is at least 1, and + * all other fields are unused but should be 0 or NULL. The + * management of this page is the responsibility of the one who uses + * it. + * + * The other pages (we may call them "process pages") are completely + * managed by the Linux memory manager: I/O, buffers, swapping etc. + * The following discussion applies only to them. + * + * A page may belong to an inode's memory mapping. In this case, + * page->mapping is the pointer to the inode, and page->index is the + * file offset of the page, in units of PAGE_CACHE_SIZE. + * + * A page may have buffers allocated to it. In this case, + * page->buffers is a circular list of these buffer heads. Else, + * page->buffers == NULL. + * + * For pages belonging to inodes, the page->count is the number of + * attaches, plus 1 if buffers are allocated to the page, plus one + * for the page cache itself. + * + * All pages belonging to an inode are in these doubly linked lists: + * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages; + * using the page->list list_head. These fields are also used for + * freelist managemet (when page->count==0). + * + * There is also a hash table mapping (mapping,index) to the page + * in memory if present. The lists for this hash table use the fields + * page->next_hash and page->pprev_hash. + * + * All process pages can do I/O: + * - inode pages may need to be read from disk, + * - inode pages which have been modified and are MAP_SHARED may need + * to be written to disk, + * - private pages which have been modified may need to be swapped out + * to swap space and (later) to be read back into memory. + * During disk I/O, PG_locked is used. This bit is set before I/O + * and reset when I/O completes. page_waitqueue(page) is a wait queue of all + * tasks waiting for the I/O on this page to complete. + * PG_uptodate tells whether the page's contents is valid. + * When a read completes, the page becomes uptodate, unless a disk I/O + * error happened. + * + * For choosing which pages to swap out, inode pages carry a + * PG_referenced bit, which is set any time the system accesses + * that page through the (mapping,index) hash table. This referenced + * bit, together with the referenced bit in the page tables, is used + * to manipulate page->age and move the page across the active, + * inactive_dirty and inactive_clean lists. + * + * Note that the referenced bit, the page->lru list_head and the + * active, inactive_dirty and inactive_clean lists are protected by + * the pagemap_lru_lock, and *NOT* by the usual PG_locked bit! + * + * PG_skip is used on sparc/sparc64 architectures to "skip" certain + * parts of the address space. + * + * PG_error is set to indicate that an I/O error occurred on this page. + * + * PG_arch_1 is an architecture specific page state bit. The generic + * code guarantees that this bit is cleared for a page when it first + * is entered into the page cache. + * + * PG_highmem pages are not permanently mapped into the kernel virtual + * address space, they need to be kmapped separately for doing IO on + * the pages. The struct page (these bits with information) are always + * mapped into kernel address space... + */ +#define PG_locked 0 /* Page is locked. Don't touch. */ +#define PG_error 1 +#define PG_referenced 2 +#define PG_uptodate 3 +#define PG_dirty 4 +#define PG_unused 5 +#define PG_lru 6 +#define PG_active 7 +#define PG_slab 8 +#define PG_skip 10 +#define PG_highmem 11 +#define PG_checked 12 /* kill me in 2.5.<early>. */ +#define PG_arch_1 13 +#define PG_reserved 14 +#define PG_launder 15 /* written out by VM pressure.. */ +#define PG_fs_1 16 /* Filesystem specific */ +#define PG_foreign 21 /* Page belongs to foreign allocator */ + +#ifndef arch_set_page_uptodate +#define arch_set_page_uptodate(page) +#endif + +/* Make it prettier to test the above... */ +#define UnlockPage(page) unlock_page(page) +#define Page_Uptodate(page) test_bit(PG_uptodate, &(page)->flags) +#ifndef SetPageUptodate +#define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags) +#endif +#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags) +#define PageDirty(page) test_bit(PG_dirty, &(page)->flags) +#define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags) +#define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags) +#define PageLocked(page) test_bit(PG_locked, &(page)->flags) +#define LockPage(page) set_bit(PG_locked, &(page)->flags) +#define TryLockPage(page) test_and_set_bit(PG_locked, &(page)->flags) +#define PageChecked(page) test_bit(PG_checked, &(page)->flags) +#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) +#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) +#define PageLaunder(page) test_bit(PG_launder, &(page)->flags) +#define SetPageLaunder(page) set_bit(PG_launder, &(page)->flags) +#define ClearPageLaunder(page) clear_bit(PG_launder, &(page)->flags) +#define ClearPageArch1(page) clear_bit(PG_arch_1, &(page)->flags) + +/* A foreign page uses a custom destructor rather than the buddy allocator. */ +#ifdef CONFIG_FOREIGN_PAGES +#define PageForeign(page) test_bit(PG_foreign, &(page)->flags) +#define SetPageForeign(page, dtor) do { \ + set_bit(PG_foreign, &(page)->flags); \ + (page)->mapping = (void *)dtor; \ +} while (0) +#define ClearPageForeign(page) do { \ + clear_bit(PG_foreign, &(page)->flags); \ + (page)->mapping = NULL; \ +} while (0) +#define PageForeignDestructor(page) \ + ( (void (*) (struct page *)) (page)->mapping ) +#else +#define PageForeign(page) 0 +#define PageForeignDestructor(page) void +#endif + +/* + * The zone field is never updated after free_area_init_core() + * sets it, so none of the operations on it need to be atomic. + */ +#define NODE_SHIFT 4 +#define ZONE_SHIFT (BITS_PER_LONG - 8) + +struct zone_struct; +extern struct zone_struct *zone_table[]; + +static inline zone_t *page_zone(struct page *page) +{ + return zone_table[page->flags >> ZONE_SHIFT]; +} + +static inline void set_page_zone(struct page *page, unsigned long zone_num) +{ + page->flags &= ~(~0UL << ZONE_SHIFT); + page->flags |= zone_num << ZONE_SHIFT; +} + +/* + * In order to avoid #ifdefs within C code itself, we define + * set_page_address to a noop for non-highmem machines, where + * the field isn't useful. + * The same is true for page_address() in arch-dependent code. + */ +#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL) + +#define set_page_address(page, address) \ + do { \ + (page)->virtual = (address); \ + } while(0) + +#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ +#define set_page_address(page, address) do { } while(0) +#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ + +/* + * Permanent address of a page. Obviously must never be + * called on a highmem page. + */ +#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL) + +#define page_address(page) ((page)->virtual) + +#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ + +#define page_address(page) \ + __va( (((page) - page_zone(page)->zone_mem_map) << PAGE_SHIFT) \ + + page_zone(page)->zone_start_paddr) + +#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ + +extern void FASTCALL(set_page_dirty(struct page *)); + +/* + * The first mb is necessary to safely close the critical section opened by the + * TryLockPage(), the second mb is necessary to enforce ordering between + * the clear_bit and the read of the waitqueue (to avoid SMP races with a + * parallel wait_on_page). + */ +#define PageError(page) test_bit(PG_error, &(page)->flags) +#define SetPageError(page) set_bit(PG_error, &(page)->flags) +#define ClearPageError(page) clear_bit(PG_error, &(page)->flags) +#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags) +#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags) +#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags) +#define PageTestandClearReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags) +#define PageSlab(page) test_bit(PG_slab, &(page)->flags) +#define PageSetSlab(page) set_bit(PG_slab, &(page)->flags) +#define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags) +#define PageReserved(page) test_bit(PG_reserved, &(page)->flags) + +#define PageActive(page) test_bit(PG_active, &(page)->flags) +#define SetPageActive(page) set_bit(PG_active, &(page)->flags) +#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) + +#define PageLRU(page) test_bit(PG_lru, &(page)->flags) +#define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags) +#define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags) + +#ifdef CONFIG_HIGHMEM +#define PageHighMem(page) test_bit(PG_highmem, &(page)->flags) +#else +#define PageHighMem(page) 0 /* needed to optimize away at compile time */ +#endif + +#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) +#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) + +/* + * Error return values for the *_nopage functions + */ +#define NOPAGE_SIGBUS (NULL) +#define NOPAGE_OOM ((struct page *) (-1)) + +/* The array of struct pages */ +extern mem_map_t * mem_map; + +/* + * There is only one page-allocator function, and two main namespaces to + * it. The alloc_page*() variants return 'struct page *' and as such + * can allocate highmem pages, the *get*page*() variants return + * virtual kernel addresses to the allocated page(s). + */ +extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned int order)); +extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)); +extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order); + +static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order) +{ + /* + * Gets optimized away by the compiler. + */ + if (order >= MAX_ORDER) + return NULL; + return _alloc_pages(gfp_mask, order); +} + +#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) + +extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order)); +extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask)); + +#define __get_free_page(gfp_mask) \ + __get_free_pages((gfp_mask),0) + +#define __get_dma_pages(gfp_mask, order) \ + __get_free_pages((gfp_mask) | GFP_DMA,(order)) + +/* + * The old interface name will be removed in 2.5: + */ +#define get_free_page get_zeroed_page + +/* + * There is only one 'core' page-freeing function. + */ +extern void FASTCALL(__free_pages(struct page *page, unsigned int order)); +extern void FASTCALL(free_pages(unsigned long addr, unsigned int order)); + +#define __free_page(page) __free_pages((page), 0) +#define free_page(addr) free_pages((addr),0) + +extern void show_free_areas(void); +extern void show_free_areas_node(pg_data_t *pgdat); + +extern void clear_page_tables(struct mm_struct *, unsigned long, int); + +extern int fail_writepage(struct page *); +struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused); +struct file *shmem_file_setup(char * name, loff_t size); +extern void shmem_lock(struct file * file, int lock); +extern int shmem_zero_setup(struct vm_area_struct *); + +extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size); +extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); +extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); +extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot); + +extern int vmtruncate(struct inode * inode, loff_t offset); +extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); +extern pte_t *FASTCALL(pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); +extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); +extern int make_pages_present(unsigned long addr, unsigned long end); +extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); +extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len); +extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len); +extern int ptrace_attach(struct task_struct *tsk); +extern int ptrace_detach(struct task_struct *, unsigned int); +extern void ptrace_disable(struct task_struct *); +extern int ptrace_check_attach(struct task_struct *task, int kill); + +int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, + int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); + +/* + * On a two-level page table, this ends up being trivial. Thus the + * inlining and the symmetry break with pte_alloc() that does all + * of this out-of-line. + */ +static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +{ + if (pgd_none(*pgd)) + return __pmd_alloc(mm, pgd, address); + return pmd_offset(pgd, address); +} + +extern int pgt_cache_water[2]; +extern int check_pgt_cache(void); + +extern void free_area_init(unsigned long * zones_size); +extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap, + unsigned long * zones_size, unsigned long zone_start_paddr, + unsigned long *zholes_size); +extern void mem_init(void); +extern void show_mem(void); +extern void si_meminfo(struct sysinfo * val); +extern void swapin_readahead(swp_entry_t); + +extern struct address_space swapper_space; +#define PageSwapCache(page) ((page)->mapping == &swapper_space) + +static inline int is_page_cache_freeable(struct page * page) +{ + return page_count(page) - !!page->buffers == 1; +} + +extern int FASTCALL(can_share_swap_page(struct page *)); +extern int FASTCALL(remove_exclusive_swap_page(struct page *)); + +extern void __free_pte(pte_t); + +/* mmap.c */ +extern void lock_vma_mappings(struct vm_area_struct *); +extern void unlock_vma_mappings(struct vm_area_struct *); +extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *); +extern void __insert_vm_struct(struct mm_struct *, struct vm_area_struct *); +extern void build_mmap_rb(struct mm_struct *); +extern void exit_mmap(struct mm_struct *); + +extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); + +extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long pgoff); + +static inline unsigned long do_mmap(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long offset) +{ + unsigned long ret = -EINVAL; + if ((offset + PAGE_ALIGN(len)) < offset) + goto out; + if (!(offset & ~PAGE_MASK)) + ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); +out: + return ret; +} + +extern int do_munmap(struct mm_struct *, unsigned long, size_t); + +extern unsigned long do_brk(unsigned long, unsigned long); + +static inline void __vma_unlink(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev) +{ + prev->vm_next = vma->vm_next; + rb_erase(&vma->vm_rb, &mm->mm_rb); + if (mm->mmap_cache == vma) + mm->mmap_cache = prev; +} + +static inline int can_vma_merge(struct vm_area_struct * vma, unsigned long vm_flags) +{ + if (!vma->vm_file && vma->vm_flags == vm_flags) + return 1; + else + return 0; +} + +struct zone_t; +/* filemap.c */ +extern void remove_inode_page(struct page *); +extern unsigned long page_unuse(struct page *); +extern void truncate_inode_pages(struct address_space *, loff_t); + +/* generic vm_area_ops exported for stackable file systems */ +extern int filemap_sync(struct vm_area_struct *, unsigned long, size_t, unsigned int); +extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int); + +/* + * GFP bitmasks.. + */ +/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low four bits) */ +#define __GFP_DMA 0x01 +#define __GFP_HIGHMEM 0x02 + +/* Action modifiers - doesn't change the zoning */ +#define __GFP_WAIT 0x10 /* Can wait and reschedule? */ +#define __GFP_HIGH 0x20 /* Should access emergency pools? */ +#define __GFP_IO 0x40 /* Can start low memory physical IO? */ +#define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */ +#define __GFP_FS 0x100 /* Can call down to low-level FS? */ + +#define GFP_NOHIGHIO (__GFP_HIGH | __GFP_WAIT | __GFP_IO) +#define GFP_NOIO (__GFP_HIGH | __GFP_WAIT) +#define GFP_NOFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO) +#define GFP_ATOMIC (__GFP_HIGH) +#define GFP_USER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) +#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM) +#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) +#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) +#define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) + +/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some + platforms, used as appropriate on others */ + +#define GFP_DMA __GFP_DMA + +static inline unsigned int pf_gfp_mask(unsigned int gfp_mask) +{ + /* avoid all memory balancing I/O methods if this task cannot block on I/O */ + if (current->flags & PF_NOIO) + gfp_mask &= ~(__GFP_IO | __GFP_HIGHIO | __GFP_FS); + + return gfp_mask; +} + +/* vma is the first one with address < vma->vm_end, + * and even address < vma->vm_start. Have to extend vma. */ +static inline int expand_stack(struct vm_area_struct * vma, unsigned long address) +{ + unsigned long grow; + + /* + * vma->vm_start/vm_end cannot change under us because the caller is required + * to hold the mmap_sem in write mode. We need to get the spinlock only + * before relocating the vma range ourself. + */ + address &= PAGE_MASK; + spin_lock(&vma->vm_mm->page_table_lock); + grow = (vma->vm_start - address) >> PAGE_SHIFT; + if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur || + ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) { + spin_unlock(&vma->vm_mm->page_table_lock); + return -ENOMEM; + } + vma->vm_start = address; + vma->vm_pgoff -= grow; + vma->vm_mm->total_vm += grow; + if (vma->vm_flags & VM_LOCKED) + vma->vm_mm->locked_vm += grow; + spin_unlock(&vma->vm_mm->page_table_lock); + return 0; +} + +/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ +extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); +extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, + struct vm_area_struct **pprev); + +/* Look up the first VMA which intersects the interval start_addr..end_addr-1, + NULL if none. Assume start_addr < end_addr. */ +static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) +{ + struct vm_area_struct * vma = find_vma(mm,start_addr); + + if (vma && end_addr <= vma->vm_start) + vma = NULL; + return vma; +} + +extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr); + +extern struct page * vmalloc_to_page(void *addr); + +#endif /* __KERNEL__ */ + +#endif diff --git a/linux-2.4.28-xen-sparse/include/linux/sched.h b/linux-2.4.28-xen-sparse/include/linux/sched.h new file mode 100644 index 0000000000..da2f49dfcf --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/linux/sched.h @@ -0,0 +1,968 @@ +#ifndef _LINUX_SCHED_H +#define _LINUX_SCHED_H + +#include <asm/param.h> /* for HZ */ + +extern unsigned long event; + +#include <linux/config.h> +#include <linux/binfmts.h> +#include <linux/threads.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/times.h> +#include <linux/timex.h> +#include <linux/rbtree.h> + +#include <asm/system.h> +#include <asm/semaphore.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include <asm/mmu.h> + +#include <linux/smp.h> +#include <linux/tty.h> +#include <linux/sem.h> +#include <linux/signal.h> +#include <linux/securebits.h> +#include <linux/fs_struct.h> + +struct exec_domain; + +/* + * cloning flags: + */ +#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ +#define CLONE_VM 0x00000100 /* set if VM shared between processes */ +#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ +#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ +#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ +#define CLONE_PID 0x00001000 /* set if pid shared */ +#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ +#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ +#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ +#define CLONE_THREAD 0x00010000 /* Same thread group? */ +#define CLONE_NEWNS 0x00020000 /* New namespace group? */ + +#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD) + +/* + * These are the constant used to fake the fixed-point load-average + * counting. Some notes: + * - 11 bit fractions expand to 22 bits by the multiplies: this gives + * a load-average precision of 10 bits integer + 11 bits fractional + * - if you want to count load-averages more often, you need more + * precision, or rounding will get you. With 2-second counting freq, + * the EXP_n values would be 1981, 2034 and 2043 if still using only + * 11 bit fractions. + */ +extern unsigned long avenrun[]; /* Load averages */ + +#define FSHIFT 11 /* nr of bits of precision */ +#define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */ +#define LOAD_FREQ (5*HZ) /* 5 sec intervals */ +#define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */ +#define EXP_5 2014 /* 1/exp(5sec/5min) */ +#define EXP_15 2037 /* 1/exp(5sec/15min) */ + +#define CALC_LOAD(load,exp,n) \ + load *= exp; \ + load += n*(FIXED_1-exp); \ + load >>= FSHIFT; + +#define CT_TO_SECS(x) ((x) / HZ) +#define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ) + +extern int nr_running, nr_threads; +extern int last_pid; + +#include <linux/fs.h> +#include <linux/time.h> +#include <linux/param.h> +#include <linux/resource.h> +#ifdef __KERNEL__ +#include <linux/timer.h> +#endif + +#include <asm/processor.h> + +#define TASK_RUNNING 0 +#define TASK_INTERRUPTIBLE 1 +#define TASK_UNINTERRUPTIBLE 2 +#define TASK_ZOMBIE 4 +#define TASK_STOPPED 8 + +#define __set_task_state(tsk, state_value) \ + do { (tsk)->state = (state_value); } while (0) +#define set_task_state(tsk, state_value) \ + set_mb((tsk)->state, (state_value)) + +#define __set_current_state(state_value) \ + do { current->state = (state_value); } while (0) +#define set_current_state(state_value) \ + set_mb(current->state, (state_value)) + +/* + * Scheduling policies + */ +#define SCHED_OTHER 0 +#define SCHED_FIFO 1 +#define SCHED_RR 2 + +/* + * This is an additional bit set when we want to + * yield the CPU for one re-schedule.. + */ +#define SCHED_YIELD 0x10 + +struct sched_param { + int sched_priority; +}; + +struct completion; + +#ifdef __KERNEL__ + +#include <linux/spinlock.h> + +/* + * This serializes "schedule()" and also protects + * the run-queue from deletions/modifications (but + * _adding_ to the beginning of the run-queue has + * a separate lock). + */ +extern rwlock_t tasklist_lock; +extern spinlock_t runqueue_lock; +extern spinlock_t mmlist_lock; + +extern void sched_init(void); +extern void init_idle(void); +extern void show_state(void); +extern void cpu_init (void); +extern void trap_init(void); +extern void update_process_times(int user); +#ifdef CONFIG_NO_IDLE_HZ +extern void update_process_times_us(int user, int system); +#endif +extern void update_one_process(struct task_struct *p, unsigned long user, + unsigned long system, int cpu); + +#define MAX_SCHEDULE_TIMEOUT LONG_MAX +extern signed long FASTCALL(schedule_timeout(signed long timeout)); +asmlinkage void schedule(void); + +extern int schedule_task(struct tq_struct *task); +extern void flush_scheduled_tasks(void); +extern int start_context_thread(void); +extern int current_is_keventd(void); + +#if CONFIG_SMP +extern void set_cpus_allowed(struct task_struct *p, unsigned long new_mask); +#else +# define set_cpus_allowed(p, new_mask) do { } while (0) +#endif + +/* + * The default fd array needs to be at least BITS_PER_LONG, + * as this is the granularity returned by copy_fdset(). + */ +#define NR_OPEN_DEFAULT BITS_PER_LONG + +struct namespace; +/* + * Open file table structure + */ +struct files_struct { + atomic_t count; + rwlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */ + int max_fds; + int max_fdset; + int next_fd; + struct file ** fd; /* current fd array */ + fd_set *close_on_exec; + fd_set *open_fds; + fd_set close_on_exec_init; + fd_set open_fds_init; + struct file * fd_array[NR_OPEN_DEFAULT]; +}; + +#define INIT_FILES \ +{ \ + count: ATOMIC_INIT(1), \ + file_lock: RW_LOCK_UNLOCKED, \ + max_fds: NR_OPEN_DEFAULT, \ + max_fdset: __FD_SETSIZE, \ + next_fd: 0, \ + fd: &init_files.fd_array[0], \ + close_on_exec: &init_files.close_on_exec_init, \ + open_fds: &init_files.open_fds_init, \ + close_on_exec_init: { { 0, } }, \ + open_fds_init: { { 0, } }, \ + fd_array: { NULL, } \ +} + +/* Maximum number of active map areas.. This is a random (large) number */ +#define DEFAULT_MAX_MAP_COUNT (65536) + +extern int max_map_count; + +struct mm_struct { + struct vm_area_struct * mmap; /* list of VMAs */ + rb_root_t mm_rb; + struct vm_area_struct * mmap_cache; /* last find_vma result */ + pgd_t * pgd; + atomic_t mm_users; /* How many users with user space? */ + atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ + int map_count; /* number of VMAs */ + struct rw_semaphore mmap_sem; + spinlock_t page_table_lock; /* Protects task page tables and mm->rss */ + + struct list_head mmlist; /* List of all active mm's. These are globally strung + * together off init_mm.mmlist, and are protected + * by mmlist_lock + */ + + unsigned long start_code, end_code, start_data, end_data; + unsigned long start_brk, brk, start_stack; + unsigned long arg_start, arg_end, env_start, env_end; + unsigned long rss, total_vm, locked_vm; + unsigned long def_flags; + unsigned long cpu_vm_mask; + unsigned long swap_address; + + unsigned dumpable:1; + + /* Architecture-specific MM context */ + mm_context_t context; +}; + +extern int mmlist_nr; + +#define INIT_MM(name) \ +{ \ + mm_rb: RB_ROOT, \ + pgd: swapper_pg_dir, \ + mm_users: ATOMIC_INIT(2), \ + mm_count: ATOMIC_INIT(1), \ + mmap_sem: __RWSEM_INITIALIZER(name.mmap_sem), \ + page_table_lock: SPIN_LOCK_UNLOCKED, \ + mmlist: LIST_HEAD_INIT(name.mmlist), \ +} + +struct signal_struct { + atomic_t count; + struct k_sigaction action[_NSIG]; + spinlock_t siglock; +}; + + +#define INIT_SIGNALS { \ + count: ATOMIC_INIT(1), \ + action: { {{0,}}, }, \ + siglock: SPIN_LOCK_UNLOCKED \ +} + +/* + * Some day this will be a full-fledged user tracking system.. + */ +struct user_struct { + atomic_t __count; /* reference count */ + atomic_t processes; /* How many processes does this user have? */ + atomic_t files; /* How many open files does this user have? */ + + /* Hash table maintenance information */ + struct user_struct *next, **pprev; + uid_t uid; +}; + +#define get_current_user() ({ \ + struct user_struct *__tmp_user = current->user; \ + atomic_inc(&__tmp_user->__count); \ + __tmp_user; }) + +extern struct user_struct root_user; +#define INIT_USER (&root_user) + +struct task_struct { + /* + * offsets of these are hardcoded elsewhere - touch with care + */ + volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ + unsigned long flags; /* per process flags, defined below */ + int sigpending; + mm_segment_t addr_limit; /* thread address space: + 0-0xBFFFFFFF for user-thead + 0-0xFFFFFFFF for kernel-thread + */ + struct exec_domain *exec_domain; + volatile long need_resched; + unsigned long ptrace; + + int lock_depth; /* Lock depth */ + +/* + * offset 32 begins here on 32-bit platforms. We keep + * all fields in a single cacheline that are needed for + * the goodness() loop in schedule(). + */ + long counter; + long nice; + unsigned long policy; + struct mm_struct *mm; + int processor; + /* + * cpus_runnable is ~0 if the process is not running on any + * CPU. It's (1 << cpu) if it's running on a CPU. This mask + * is updated under the runqueue lock. + * + * To determine whether a process might run on a CPU, this + * mask is AND-ed with cpus_allowed. + */ + unsigned long cpus_runnable, cpus_allowed; + /* + * (only the 'next' pointer fits into the cacheline, but + * that's just fine.) + */ + struct list_head run_list; + unsigned long sleep_time; + + struct task_struct *next_task, *prev_task; + struct mm_struct *active_mm; + struct list_head local_pages; + unsigned int allocation_order, nr_local_pages; + +/* task state */ + struct linux_binfmt *binfmt; + int exit_code, exit_signal; + int pdeath_signal; /* The signal sent when the parent dies */ + /* ??? */ + unsigned long personality; + int did_exec:1; + unsigned task_dumpable:1; + pid_t pid; + pid_t pgrp; + pid_t tty_old_pgrp; + pid_t session; + pid_t tgid; + /* boolean value for session group leader */ + int leader; + /* + * pointers to (original) parent process, youngest child, younger sibling, + * older sibling, respectively. (p->father can be replaced with + * p->p_pptr->pid) + */ + struct task_struct *p_opptr, *p_pptr, *p_cptr, *p_ysptr, *p_osptr; + struct list_head thread_group; + + /* PID hash table linkage. */ + struct task_struct *pidhash_next; + struct task_struct **pidhash_pprev; + + wait_queue_head_t wait_chldexit; /* for wait4() */ + struct completion *vfork_done; /* for vfork() */ + unsigned long rt_priority; + unsigned long it_real_value, it_prof_value, it_virt_value; + unsigned long it_real_incr, it_prof_incr, it_virt_incr; + struct timer_list real_timer; + struct tms times; + unsigned long start_time; + long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS]; +/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ + unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap; + int swappable:1; +/* process credentials */ + uid_t uid,euid,suid,fsuid; + gid_t gid,egid,sgid,fsgid; + int ngroups; + gid_t groups[NGROUPS]; + kernel_cap_t cap_effective, cap_inheritable, cap_permitted; + int keep_capabilities:1; + struct user_struct *user; +/* limits */ + struct rlimit rlim[RLIM_NLIMITS]; + unsigned short used_math; + char comm[16]; +/* file system info */ + int link_count, total_link_count; + struct tty_struct *tty; /* NULL if no tty */ + unsigned int locks; /* How many file locks are being held */ +/* ipc stuff */ + struct sem_undo *semundo; + struct sem_queue *semsleeping; +/* CPU-specific state of this task */ + struct thread_struct thread; +/* filesystem information */ + struct fs_struct *fs; +/* open file information */ + struct files_struct *files; +/* namespace */ + struct namespace *namespace; +/* signal handlers */ + spinlock_t sigmask_lock; /* Protects signal and blocked */ + struct signal_struct *sig; + + sigset_t blocked; + struct sigpending pending; + + unsigned long sas_ss_sp; + size_t sas_ss_size; + int (*notifier)(void *priv); + void *notifier_data; + sigset_t *notifier_mask; + +/* Thread group tracking */ + u32 parent_exec_id; + u32 self_exec_id; +/* Protection of (de-)allocation: mm, files, fs, tty */ + spinlock_t alloc_lock; + +/* journalling filesystem info */ + void *journal_info; +}; + +/* + * Per process flags + */ +#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ + /* Not implemented yet, only for 486*/ +#define PF_STARTING 0x00000002 /* being created */ +#define PF_EXITING 0x00000004 /* getting shut down */ +#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ +#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ +#define PF_DUMPCORE 0x00000200 /* dumped core */ +#define PF_SIGNALED 0x00000400 /* killed by a signal */ +#define PF_MEMALLOC 0x00000800 /* Allocating memory */ +#define PF_MEMDIE 0x00001000 /* Killed for out-of-memory */ +#define PF_FREE_PAGES 0x00002000 /* per process page freeing */ +#define PF_NOIO 0x00004000 /* avoid generating further I/O */ +#define PF_FSTRANS 0x00008000 /* inside a filesystem transaction */ + +#define PF_USEDFPU 0x00100000 /* task used FPU this quantum (SMP) */ + +/* + * Ptrace flags + */ + +#define PT_PTRACED 0x00000001 +#define PT_TRACESYS 0x00000002 +#define PT_DTRACE 0x00000004 /* delayed trace (used on m68k, i386) */ +#define PT_TRACESYSGOOD 0x00000008 +#define PT_PTRACE_CAP 0x00000010 /* ptracer can follow suid-exec */ + +#define is_dumpable(tsk) ((tsk)->task_dumpable && (tsk)->mm && (tsk)->mm->dumpable) + +/* + * Limit the stack by to some sane default: root can always + * increase this limit if needed.. 8MB seems reasonable. + */ +#define _STK_LIM (8*1024*1024) + +#define DEF_COUNTER (10*HZ/100) /* 100 ms time slice */ +#define MAX_COUNTER (20*HZ/100) +#define DEF_NICE (0) + +extern void yield(void); + +/* + * The default (Linux) execution domain. + */ +extern struct exec_domain default_exec_domain; + +/* + * INIT_TASK is used to set up the first task table, touch at + * your own risk!. Base=0, limit=0x1fffff (=2MB) + */ +#define INIT_TASK(tsk) \ +{ \ + state: 0, \ + flags: 0, \ + sigpending: 0, \ + addr_limit: KERNEL_DS, \ + exec_domain: &default_exec_domain, \ + lock_depth: -1, \ + counter: DEF_COUNTER, \ + nice: DEF_NICE, \ + policy: SCHED_OTHER, \ + mm: NULL, \ + active_mm: &init_mm, \ + cpus_runnable: ~0UL, \ + cpus_allowed: ~0UL, \ + run_list: LIST_HEAD_INIT(tsk.run_list), \ + next_task: &tsk, \ + prev_task: &tsk, \ + p_opptr: &tsk, \ + p_pptr: &tsk, \ + thread_group: LIST_HEAD_INIT(tsk.thread_group), \ + wait_chldexit: __WAIT_QUEUE_HEAD_INITIALIZER(tsk.wait_chldexit),\ + real_timer: { \ + function: it_real_fn \ + }, \ + cap_effective: CAP_INIT_EFF_SET, \ + cap_inheritable: CAP_INIT_INH_SET, \ + cap_permitted: CAP_FULL_SET, \ + keep_capabilities: 0, \ + rlim: INIT_RLIMITS, \ + user: INIT_USER, \ + comm: "swapper", \ + thread: INIT_THREAD, \ + fs: &init_fs, \ + files: &init_files, \ + sigmask_lock: SPIN_LOCK_UNLOCKED, \ + sig: &init_signals, \ + pending: { NULL, &tsk.pending.head, {{0}}}, \ + blocked: {{0}}, \ + alloc_lock: SPIN_LOCK_UNLOCKED, \ + journal_info: NULL, \ +} + + +#ifndef INIT_TASK_SIZE +# define INIT_TASK_SIZE 2048*sizeof(long) +#endif + +union task_union { + struct task_struct task; + unsigned long stack[INIT_TASK_SIZE/sizeof(long)]; +}; + +extern union task_union init_task_union; + +extern struct mm_struct init_mm; +extern struct task_struct *init_tasks[NR_CPUS]; + +/* PID hashing. (shouldnt this be dynamic?) */ +#define PIDHASH_SZ (4096 >> 2) +extern struct task_struct *pidhash[PIDHASH_SZ]; + +#define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1)) + +static inline void hash_pid(struct task_struct *p) +{ + struct task_struct **htable = &pidhash[pid_hashfn(p->pid)]; + + if((p->pidhash_next = *htable) != NULL) + (*htable)->pidhash_pprev = &p->pidhash_next; + *htable = p; + p->pidhash_pprev = htable; +} + +static inline void unhash_pid(struct task_struct *p) +{ + if(p->pidhash_next) + p->pidhash_next->pidhash_pprev = p->pidhash_pprev; + *p->pidhash_pprev = p->pidhash_next; +} + +static inline struct task_struct *find_task_by_pid(int pid) +{ + struct task_struct *p, **htable = &pidhash[pid_hashfn(pid)]; + + for(p = *htable; p && p->pid != pid; p = p->pidhash_next) + ; + + return p; +} + +#define task_has_cpu(tsk) ((tsk)->cpus_runnable != ~0UL) + +static inline void task_set_cpu(struct task_struct *tsk, unsigned int cpu) +{ + tsk->processor = cpu; + tsk->cpus_runnable = 1UL << cpu; +} + +static inline void task_release_cpu(struct task_struct *tsk) +{ + tsk->cpus_runnable = ~0UL; +} + +/* per-UID process charging. */ +extern struct user_struct * alloc_uid(uid_t); +extern void free_uid(struct user_struct *); +extern void switch_uid(struct user_struct *); + +#include <asm/current.h> + +extern unsigned long volatile jiffies; +extern unsigned long itimer_ticks; +extern unsigned long itimer_next; +extern struct timeval xtime; +extern void do_timer(struct pt_regs *); +#ifdef CONFIG_NO_IDLE_HZ +extern void do_timer_ticks(int ticks); +#endif + +extern unsigned int * prof_buffer; +extern unsigned long prof_len; +extern unsigned long prof_shift; + +#define CURRENT_TIME (xtime.tv_sec) + +extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr)); +extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)); +extern void FASTCALL(sleep_on(wait_queue_head_t *q)); +extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q, + signed long timeout)); +extern void FASTCALL(interruptible_sleep_on(wait_queue_head_t *q)); +extern long FASTCALL(interruptible_sleep_on_timeout(wait_queue_head_t *q, + signed long timeout)); +extern int FASTCALL(wake_up_process(struct task_struct * tsk)); + +#define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1) +#define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr) +#define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0) +#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1) +#define wake_up_sync_nr(x, nr) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr) +#define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE, 1) +#define wake_up_interruptible_nr(x, nr) __wake_up((x),TASK_INTERRUPTIBLE, nr) +#define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE, 0) +#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1) +#define wake_up_interruptible_sync_nr(x, nr) __wake_up_sync((x),TASK_INTERRUPTIBLE, nr) +asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru); + +extern int in_group_p(gid_t); +extern int in_egroup_p(gid_t); + +extern void proc_caches_init(void); +extern void flush_signals(struct task_struct *); +extern void flush_signal_handlers(struct task_struct *); +extern void sig_exit(int, int, struct siginfo *); +extern int dequeue_signal(sigset_t *, siginfo_t *); +extern void block_all_signals(int (*notifier)(void *priv), void *priv, + sigset_t *mask); +extern void unblock_all_signals(void); +extern int send_sig_info(int, struct siginfo *, struct task_struct *); +extern int force_sig_info(int, struct siginfo *, struct task_struct *); +extern int kill_pg_info(int, struct siginfo *, pid_t); +extern int kill_sl_info(int, struct siginfo *, pid_t); +extern int kill_proc_info(int, struct siginfo *, pid_t); +extern void notify_parent(struct task_struct *, int); +extern void do_notify_parent(struct task_struct *, int); +extern void force_sig(int, struct task_struct *); +extern int send_sig(int, struct task_struct *, int); +extern int kill_pg(pid_t, int, int); +extern int kill_sl(pid_t, int, int); +extern int kill_proc(pid_t, int, int); +extern int do_sigaction(int, const struct k_sigaction *, struct k_sigaction *); +extern int do_sigaltstack(const stack_t *, stack_t *, unsigned long); + +static inline int signal_pending(struct task_struct *p) +{ + return (p->sigpending != 0); +} + +/* + * Re-calculate pending state from the set of locally pending + * signals, globally pending signals, and blocked signals. + */ +static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) +{ + unsigned long ready; + long i; + + switch (_NSIG_WORDS) { + default: + for (i = _NSIG_WORDS, ready = 0; --i >= 0 ;) + ready |= signal->sig[i] &~ blocked->sig[i]; + break; + + case 4: ready = signal->sig[3] &~ blocked->sig[3]; + ready |= signal->sig[2] &~ blocked->sig[2]; + ready |= signal->sig[1] &~ blocked->sig[1]; + ready |= signal->sig[0] &~ blocked->sig[0]; + break; + + case 2: ready = signal->sig[1] &~ blocked->sig[1]; + ready |= signal->sig[0] &~ blocked->sig[0]; + break; + + case 1: ready = signal->sig[0] &~ blocked->sig[0]; + } + return ready != 0; +} + +/* Reevaluate whether the task has signals pending delivery. + This is required every time the blocked sigset_t changes. + All callers should have t->sigmask_lock. */ + +static inline void recalc_sigpending(struct task_struct *t) +{ + t->sigpending = has_pending_signals(&t->pending.signal, &t->blocked); +} + +/* True if we are on the alternate signal stack. */ + +static inline int on_sig_stack(unsigned long sp) +{ + return (sp - current->sas_ss_sp < current->sas_ss_size); +} + +static inline int sas_ss_flags(unsigned long sp) +{ + return (current->sas_ss_size == 0 ? SS_DISABLE + : on_sig_stack(sp) ? SS_ONSTACK : 0); +} + +extern int request_irq(unsigned int, + void (*handler)(int, void *, struct pt_regs *), + unsigned long, const char *, void *); +extern void free_irq(unsigned int, void *); + +/* + * This has now become a routine instead of a macro, it sets a flag if + * it returns true (to do BSD-style accounting where the process is flagged + * if it uses root privs). The implication of this is that you should do + * normal permissions checks first, and check suser() last. + * + * [Dec 1997 -- Chris Evans] + * For correctness, the above considerations need to be extended to + * fsuser(). This is done, along with moving fsuser() checks to be + * last. + * + * These will be removed, but in the mean time, when the SECURE_NOROOT + * flag is set, uids don't grant privilege. + */ +static inline int suser(void) +{ + if (!issecure(SECURE_NOROOT) && current->euid == 0) { + current->flags |= PF_SUPERPRIV; + return 1; + } + return 0; +} + +static inline int fsuser(void) +{ + if (!issecure(SECURE_NOROOT) && current->fsuid == 0) { + current->flags |= PF_SUPERPRIV; + return 1; + } + return 0; +} + +/* + * capable() checks for a particular capability. + * New privilege checks should use this interface, rather than suser() or + * fsuser(). See include/linux/capability.h for defined capabilities. + */ + +static inline int capable(int cap) +{ +#if 1 /* ok now */ + if (cap_raised(current->cap_effective, cap)) +#else + if (cap_is_fs_cap(cap) ? current->fsuid == 0 : current->euid == 0) +#endif + { + current->flags |= PF_SUPERPRIV; + return 1; + } + return 0; +} + +/* + * Routines for handling mm_structs + */ +extern struct mm_struct * mm_alloc(void); + +extern struct mm_struct * start_lazy_tlb(void); +extern void end_lazy_tlb(struct mm_struct *mm); + +/* mmdrop drops the mm and the page tables */ +extern void FASTCALL(__mmdrop(struct mm_struct *)); +static inline void mmdrop(struct mm_struct * mm) +{ + if (atomic_dec_and_test(&mm->mm_count)) + __mmdrop(mm); +} + +/* mmput gets rid of the mappings and all user-space */ +extern void mmput(struct mm_struct *); +/* Remove the current tasks stale references to the old mm_struct */ +extern void mm_release(void); + +/* + * Routines for handling the fd arrays + */ +extern struct file ** alloc_fd_array(int); +extern int expand_fd_array(struct files_struct *, int nr); +extern void free_fd_array(struct file **, int); + +extern fd_set *alloc_fdset(int); +extern int expand_fdset(struct files_struct *, int nr); +extern void free_fdset(fd_set *, int); + +extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); +extern void flush_thread(void); +extern void exit_thread(void); + +extern void exit_mm(struct task_struct *); +extern void exit_files(struct task_struct *); +extern void exit_sighand(struct task_struct *); + +extern void reparent_to_init(void); +extern void daemonize(void); + +extern int do_execve(char *, char **, char **, struct pt_regs *); +extern int do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long); + +extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)); +extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)); +extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)); + +extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); + +#define __wait_event(wq, condition) \ +do { \ + wait_queue_t __wait; \ + init_waitqueue_entry(&__wait, current); \ + \ + add_wait_queue(&wq, &__wait); \ + for (;;) { \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + schedule(); \ + } \ + current->state = TASK_RUNNING; \ + remove_wait_queue(&wq, &__wait); \ +} while (0) + +#define wait_event(wq, condition) \ +do { \ + if (condition) \ + break; \ + __wait_event(wq, condition); \ +} while (0) + +#define __wait_event_interruptible(wq, condition, ret) \ +do { \ + wait_queue_t __wait; \ + init_waitqueue_entry(&__wait, current); \ + \ + add_wait_queue(&wq, &__wait); \ + for (;;) { \ + set_current_state(TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + if (!signal_pending(current)) { \ + schedule(); \ + continue; \ + } \ + ret = -ERESTARTSYS; \ + break; \ + } \ + current->state = TASK_RUNNING; \ + remove_wait_queue(&wq, &__wait); \ +} while (0) + +#define wait_event_interruptible(wq, condition) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __wait_event_interruptible(wq, condition, __ret); \ + __ret; \ +}) + +#define REMOVE_LINKS(p) do { \ + (p)->next_task->prev_task = (p)->prev_task; \ + (p)->prev_task->next_task = (p)->next_task; \ + if ((p)->p_osptr) \ + (p)->p_osptr->p_ysptr = (p)->p_ysptr; \ + if ((p)->p_ysptr) \ + (p)->p_ysptr->p_osptr = (p)->p_osptr; \ + else \ + (p)->p_pptr->p_cptr = (p)->p_osptr; \ + } while (0) + +#define SET_LINKS(p) do { \ + (p)->next_task = &init_task; \ + (p)->prev_task = init_task.prev_task; \ + init_task.prev_task->next_task = (p); \ + init_task.prev_task = (p); \ + (p)->p_ysptr = NULL; \ + if (((p)->p_osptr = (p)->p_pptr->p_cptr) != NULL) \ + (p)->p_osptr->p_ysptr = p; \ + (p)->p_pptr->p_cptr = p; \ + } while (0) + +#define for_each_task(p) \ + for (p = &init_task ; (p = p->next_task) != &init_task ; ) + +#define for_each_thread(task) \ + for (task = next_thread(current) ; task != current ; task = next_thread(task)) + +#define next_thread(p) \ + list_entry((p)->thread_group.next, struct task_struct, thread_group) + +#define thread_group_leader(p) (p->pid == p->tgid) + +static inline void del_from_runqueue(struct task_struct * p) +{ + nr_running--; + p->sleep_time = jiffies; + list_del(&p->run_list); + p->run_list.next = NULL; +} + +static inline int task_on_runqueue(struct task_struct *p) +{ + return (p->run_list.next != NULL); +} + +static inline void unhash_process(struct task_struct *p) +{ + if (task_on_runqueue(p)) + out_of_line_bug(); + write_lock_irq(&tasklist_lock); + nr_threads--; + unhash_pid(p); + REMOVE_LINKS(p); + list_del(&p->thread_group); + write_unlock_irq(&tasklist_lock); +} + +/* Protects ->fs, ->files, ->mm, and synchronises with wait4(). Nests inside tasklist_lock */ +static inline void task_lock(struct task_struct *p) +{ + spin_lock(&p->alloc_lock); +} + +static inline void task_unlock(struct task_struct *p) +{ + spin_unlock(&p->alloc_lock); +} + +/* write full pathname into buffer and return start of pathname */ +static inline char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, + char *buf, int buflen) +{ + char *res; + struct vfsmount *rootmnt; + struct dentry *root; + read_lock(¤t->fs->lock); + rootmnt = mntget(current->fs->rootmnt); + root = dget(current->fs->root); + read_unlock(¤t->fs->lock); + spin_lock(&dcache_lock); + res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen); + spin_unlock(&dcache_lock); + dput(root); + mntput(rootmnt); + return res; +} + +static inline int need_resched(void) +{ + return (unlikely(current->need_resched)); +} + +extern void __cond_resched(void); +static inline void cond_resched(void) +{ + if (need_resched()) + __cond_resched(); +} + +#endif /* __KERNEL__ */ +#endif diff --git a/linux-2.4.28-xen-sparse/include/linux/skbuff.h b/linux-2.4.28-xen-sparse/include/linux/skbuff.h new file mode 100644 index 0000000000..32ba0f8cd0 --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/linux/skbuff.h @@ -0,0 +1,1185 @@ +/* + * Definitions for the 'struct sk_buff' memory handlers. + * + * Authors: + * Alan Cox, <gw4pts@gw4pts.ampr.org> + * Florian La Roche, <rzsfl@rz.uni-sb.de> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_SKBUFF_H +#define _LINUX_SKBUFF_H + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <linux/cache.h> + +#include <asm/atomic.h> +#include <asm/types.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/highmem.h> + +#define HAVE_ALLOC_SKB /* For the drivers to know */ +#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ +#define SLAB_SKB /* Slabified skbuffs */ + +#define CHECKSUM_NONE 0 +#define CHECKSUM_HW 1 +#define CHECKSUM_UNNECESSARY 2 + +#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1)) +#define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1)) +#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X),0)) +#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0,2)) + +/* A. Checksumming of received packets by device. + * + * NONE: device failed to checksum this packet. + * skb->csum is undefined. + * + * UNNECESSARY: device parsed packet and wouldbe verified checksum. + * skb->csum is undefined. + * It is bad option, but, unfortunately, many of vendors do this. + * Apparently with secret goal to sell you new device, when you + * will add new protocol to your host. F.e. IPv6. 8) + * + * HW: the most generic way. Device supplied checksum of _all_ + * the packet as seen by netif_rx in skb->csum. + * NOTE: Even if device supports only some protocols, but + * is able to produce some skb->csum, it MUST use HW, + * not UNNECESSARY. + * + * B. Checksumming on output. + * + * NONE: skb is checksummed by protocol or csum is not required. + * + * HW: device is required to csum packet as seen by hard_start_xmit + * from skb->h.raw to the end and to record the checksum + * at skb->h.raw+skb->csum. + * + * Device must show its capabilities in dev->features, set + * at device setup time. + * NETIF_F_HW_CSUM - it is clever device, it is able to checksum + * everything. + * NETIF_F_NO_CSUM - loopback or reliable single hop media. + * NETIF_F_IP_CSUM - device is dumb. It is able to csum only + * TCP/UDP over IPv4. Sigh. Vendors like this + * way by an unknown reason. Though, see comment above + * about CHECKSUM_UNNECESSARY. 8) + * + * Any questions? No questions, good. --ANK + */ + +#ifdef __i386__ +#define NET_CALLER(arg) (*(((void**)&arg)-1)) +#else +#define NET_CALLER(arg) __builtin_return_address(0) +#endif + +#ifdef CONFIG_NETFILTER +struct nf_conntrack { + atomic_t use; + void (*destroy)(struct nf_conntrack *); +}; + +struct nf_ct_info { + struct nf_conntrack *master; +}; +#endif + +struct sk_buff_head { + /* These two members must be first. */ + struct sk_buff * next; + struct sk_buff * prev; + + __u32 qlen; + spinlock_t lock; +}; + +struct sk_buff; + +#define MAX_SKB_FRAGS 6 + +typedef struct skb_frag_struct skb_frag_t; + +struct skb_frag_struct +{ + struct page *page; + __u16 page_offset; + __u16 size; +}; + +/* This data is invariant across clones and lives at + * the end of the header data, ie. at skb->end. + */ +struct skb_shared_info { + atomic_t dataref; + unsigned int nr_frags; + struct sk_buff *frag_list; + skb_frag_t frags[MAX_SKB_FRAGS]; +}; + +struct sk_buff { + /* These two members must be first. */ + struct sk_buff * next; /* Next buffer in list */ + struct sk_buff * prev; /* Previous buffer in list */ + + struct sk_buff_head * list; /* List we are on */ + struct sock *sk; /* Socket we are owned by */ + struct timeval stamp; /* Time we arrived */ + struct net_device *dev; /* Device we arrived on/are leaving by */ + struct net_device *real_dev; /* For support of point to point protocols + (e.g. 802.3ad) over bonding, we must save the + physical device that got the packet before + replacing skb->dev with the virtual device. */ + + /* Transport layer header */ + union + { + struct tcphdr *th; + struct udphdr *uh; + struct icmphdr *icmph; + struct igmphdr *igmph; + struct iphdr *ipiph; + struct spxhdr *spxh; + unsigned char *raw; + } h; + + /* Network layer header */ + union + { + struct iphdr *iph; + struct ipv6hdr *ipv6h; + struct arphdr *arph; + struct ipxhdr *ipxh; + unsigned char *raw; + } nh; + + /* Link layer header */ + union + { + struct ethhdr *ethernet; + unsigned char *raw; + } mac; + + struct dst_entry *dst; + + /* + * This is the control buffer. It is free to use for every + * layer. Please put your private variables there. If you + * want to keep them across layers you have to do a skb_clone() + * first. This is owned by whoever has the skb queued ATM. + */ + char cb[48]; + + unsigned int len; /* Length of actual data */ + unsigned int data_len; + unsigned int csum; /* Checksum */ + unsigned char __unused, /* Dead field, may be reused */ + cloned, /* head may be cloned (check refcnt to be sure). */ + pkt_type, /* Packet class */ + ip_summed; /* Driver fed us an IP checksum */ + __u32 priority; /* Packet queueing priority */ + atomic_t users; /* User count - see datagram.c,tcp.c */ + unsigned short protocol; /* Packet protocol from driver. */ + unsigned short security; /* Security level of packet */ + unsigned int truesize; /* Buffer size */ + + unsigned char *head; /* Head of buffer */ + unsigned char *data; /* Data head pointer */ + unsigned char *tail; /* Tail pointer */ + unsigned char *end; /* End pointer */ + + void (*destructor)(struct sk_buff *); /* Destruct function */ +#ifdef CONFIG_NETFILTER + /* Can be used for communication between hooks. */ + unsigned long nfmark; + /* Cache info */ + __u32 nfcache; + /* Associated connection, if any */ + struct nf_ct_info *nfct; +#ifdef CONFIG_NETFILTER_DEBUG + unsigned int nf_debug; +#endif +#endif /*CONFIG_NETFILTER*/ + +#if defined(CONFIG_HIPPI) + union{ + __u32 ifield; + } private; +#endif + +#ifdef CONFIG_NET_SCHED + __u32 tc_index; /* traffic control index */ +#endif +}; + +#ifdef __KERNEL__ +/* + * Handling routines are only of interest to the kernel + */ +#include <linux/slab.h> + +#include <asm/system.h> + +extern void __kfree_skb(struct sk_buff *skb); +extern struct sk_buff * alloc_skb(unsigned int size, int priority); +extern struct sk_buff * alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, int priority); +extern void kfree_skbmem(struct sk_buff *skb); +extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); +extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); +extern struct sk_buff * pskb_copy(struct sk_buff *skb, int gfp_mask); +extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask); +extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); +extern struct sk_buff * skb_copy_expand(const struct sk_buff *skb, + int newheadroom, + int newtailroom, + int priority); +extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); +#define dev_kfree_skb(a) kfree_skb(a) +extern void skb_over_panic(struct sk_buff *skb, int len, void *here); +extern void skb_under_panic(struct sk_buff *skb, int len, void *here); + +/* Internal */ +#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) + +/** + * skb_queue_empty - check if a queue is empty + * @list: queue head + * + * Returns true if the queue is empty, false otherwise. + */ + +static inline int skb_queue_empty(struct sk_buff_head *list) +{ + return (list->next == (struct sk_buff *) list); +} + +/** + * skb_get - reference buffer + * @skb: buffer to reference + * + * Makes another reference to a socket buffer and returns a pointer + * to the buffer. + */ + +static inline struct sk_buff *skb_get(struct sk_buff *skb) +{ + atomic_inc(&skb->users); + return skb; +} + +/* + * If users==1, we are the only owner and are can avoid redundant + * atomic change. + */ + +/** + * kfree_skb - free an sk_buff + * @skb: buffer to free + * + * Drop a reference to the buffer and free it if the usage count has + * hit zero. + */ + +static inline void kfree_skb(struct sk_buff *skb) +{ + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) + __kfree_skb(skb); +} + +/* Use this if you didn't touch the skb state [for fast switching] */ +static inline void kfree_skb_fast(struct sk_buff *skb) +{ + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) + kfree_skbmem(skb); +} + +/** + * skb_cloned - is the buffer a clone + * @skb: buffer to check + * + * Returns true if the buffer was generated with skb_clone() and is + * one of multiple shared copies of the buffer. Cloned buffers are + * shared data so must not be written to under normal circumstances. + */ + +static inline int skb_cloned(struct sk_buff *skb) +{ + return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1; +} + +/** + * skb_shared - is the buffer shared + * @skb: buffer to check + * + * Returns true if more than one person has a reference to this + * buffer. + */ + +static inline int skb_shared(struct sk_buff *skb) +{ + return (atomic_read(&skb->users) != 1); +} + +/** + * skb_share_check - check if buffer is shared and if so clone it + * @skb: buffer to check + * @pri: priority for memory allocation + * + * If the buffer is shared the buffer is cloned and the old copy + * drops a reference. A new clone with a single reference is returned. + * If the buffer is not shared the original buffer is returned. When + * being called from interrupt status or with spinlocks held pri must + * be GFP_ATOMIC. + * + * NULL is returned on a memory allocation failure. + */ + +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) +{ + if (skb_shared(skb)) { + struct sk_buff *nskb; + nskb = skb_clone(skb, pri); + kfree_skb(skb); + return nskb; + } + return skb; +} + + +/* + * Copy shared buffers into a new sk_buff. We effectively do COW on + * packets to handle cases where we have a local reader and forward + * and a couple of other messy ones. The normal one is tcpdumping + * a packet thats being forwarded. + */ + +/** + * skb_unshare - make a copy of a shared buffer + * @skb: buffer to check + * @pri: priority for memory allocation + * + * If the socket buffer is a clone then this function creates a new + * copy of the data, drops a reference count on the old copy and returns + * the new copy with the reference count at 1. If the buffer is not a clone + * the original buffer is returned. When called with a spinlock held or + * from interrupt state @pri must be %GFP_ATOMIC + * + * %NULL is returned on a memory allocation failure. + */ + +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) +{ + struct sk_buff *nskb; + if(!skb_cloned(skb)) + return skb; + nskb=skb_copy(skb, pri); + kfree_skb(skb); /* Free our shared copy */ + return nskb; +} + +/** + * skb_peek + * @list_: list to peek at + * + * Peek an &sk_buff. Unlike most other operations you _MUST_ + * be careful with this one. A peek leaves the buffer on the + * list and someone else may run off with it. You must hold + * the appropriate locks or have a private queue to do this. + * + * Returns %NULL for an empty list or a pointer to the head element. + * The reference count is not incremented and the reference is therefore + * volatile. Use with caution. + */ + +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) +{ + struct sk_buff *list = ((struct sk_buff *)list_)->next; + if (list == (struct sk_buff *)list_) + list = NULL; + return list; +} + +/** + * skb_peek_tail + * @list_: list to peek at + * + * Peek an &sk_buff. Unlike most other operations you _MUST_ + * be careful with this one. A peek leaves the buffer on the + * list and someone else may run off with it. You must hold + * the appropriate locks or have a private queue to do this. + * + * Returns %NULL for an empty list or a pointer to the tail element. + * The reference count is not incremented and the reference is therefore + * volatile. Use with caution. + */ + +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) +{ + struct sk_buff *list = ((struct sk_buff *)list_)->prev; + if (list == (struct sk_buff *)list_) + list = NULL; + return list; +} + +/** + * skb_queue_len - get queue length + * @list_: list to measure + * + * Return the length of an &sk_buff queue. + */ + +static inline __u32 skb_queue_len(struct sk_buff_head *list_) +{ + return(list_->qlen); +} + +static inline void skb_queue_head_init(struct sk_buff_head *list) +{ + spin_lock_init(&list->lock); + list->prev = (struct sk_buff *)list; + list->next = (struct sk_buff *)list; + list->qlen = 0; +} + +/* + * Insert an sk_buff at the start of a list. + * + * The "__skb_xxxx()" functions are the non-atomic ones that + * can only be called with interrupts disabled. + */ + +/** + * __skb_queue_head - queue a buffer at the list head + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the start of a list. This function takes no locks + * and you must therefore hold required locks before calling it. + * + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) +{ + struct sk_buff *prev, *next; + + newsk->list = list; + list->qlen++; + prev = (struct sk_buff *)list; + next = prev->next; + newsk->next = next; + newsk->prev = prev; + next->prev = newsk; + prev->next = newsk; +} + + +/** + * skb_queue_head - queue a buffer at the list head + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the start of the list. This function takes the + * list lock and can be used safely with other locking &sk_buff functions + * safely. + * + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + __skb_queue_head(list, newsk); + spin_unlock_irqrestore(&list->lock, flags); +} + +/** + * __skb_queue_tail - queue a buffer at the list tail + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the end of a list. This function takes no locks + * and you must therefore hold required locks before calling it. + * + * A buffer cannot be placed on two lists at the same time. + */ + + +static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) +{ + struct sk_buff *prev, *next; + + newsk->list = list; + list->qlen++; + next = (struct sk_buff *)list; + prev = next->prev; + newsk->next = next; + newsk->prev = prev; + next->prev = newsk; + prev->next = newsk; +} + +/** + * skb_queue_tail - queue a buffer at the list tail + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the tail of the list. This function takes the + * list lock and can be used safely with other locking &sk_buff functions + * safely. + * + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + __skb_queue_tail(list, newsk); + spin_unlock_irqrestore(&list->lock, flags); +} + +/** + * __skb_dequeue - remove from the head of the queue + * @list: list to dequeue from + * + * Remove the head of the list. This function does not take any locks + * so must be used with appropriate locks held only. The head item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) +{ + struct sk_buff *next, *prev, *result; + + prev = (struct sk_buff *) list; + next = prev->next; + result = NULL; + if (next != prev) { + result = next; + next = next->next; + list->qlen--; + next->prev = prev; + prev->next = next; + result->next = NULL; + result->prev = NULL; + result->list = NULL; + } + return result; +} + +/** + * skb_dequeue - remove from the head of the queue + * @list: list to dequeue from + * + * Remove the head of the list. The list lock is taken so the function + * may be used safely with other locking list functions. The head item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list) +{ + unsigned long flags; + struct sk_buff *result; + + spin_lock_irqsave(&list->lock, flags); + result = __skb_dequeue(list); + spin_unlock_irqrestore(&list->lock, flags); + return result; +} + +/* + * Insert a packet on a list. + */ + +static inline void __skb_insert(struct sk_buff *newsk, + struct sk_buff * prev, struct sk_buff *next, + struct sk_buff_head * list) +{ + newsk->next = next; + newsk->prev = prev; + next->prev = newsk; + prev->next = newsk; + newsk->list = list; + list->qlen++; +} + +/** + * skb_insert - insert a buffer + * @old: buffer to insert before + * @newsk: buffer to insert + * + * Place a packet before a given packet in a list. The list locks are taken + * and this function is atomic with respect to other list locked calls + * A buffer cannot be placed on two lists at the same time. + */ + +static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&old->list->lock, flags); + __skb_insert(newsk, old->prev, old, old->list); + spin_unlock_irqrestore(&old->list->lock, flags); +} + +/* + * Place a packet after a given packet in a list. + */ + +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) +{ + __skb_insert(newsk, old, old->next, old->list); +} + +/** + * skb_append - append a buffer + * @old: buffer to insert after + * @newsk: buffer to insert + * + * Place a packet after a given packet in a list. The list locks are taken + * and this function is atomic with respect to other list locked calls. + * A buffer cannot be placed on two lists at the same time. + */ + + +static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk) +{ + unsigned long flags; + + spin_lock_irqsave(&old->list->lock, flags); + __skb_append(old, newsk); + spin_unlock_irqrestore(&old->list->lock, flags); +} + +/* + * remove sk_buff from list. _Must_ be called atomically, and with + * the list known.. + */ + +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) +{ + struct sk_buff * next, * prev; + + list->qlen--; + next = skb->next; + prev = skb->prev; + skb->next = NULL; + skb->prev = NULL; + skb->list = NULL; + next->prev = prev; + prev->next = next; +} + +/** + * skb_unlink - remove a buffer from a list + * @skb: buffer to remove + * + * Place a packet after a given packet in a list. The list locks are taken + * and this function is atomic with respect to other list locked calls + * + * Works even without knowing the list it is sitting on, which can be + * handy at times. It also means that THE LIST MUST EXIST when you + * unlink. Thus a list must have its contents unlinked before it is + * destroyed. + */ + +static inline void skb_unlink(struct sk_buff *skb) +{ + struct sk_buff_head *list = skb->list; + + if(list) { + unsigned long flags; + + spin_lock_irqsave(&list->lock, flags); + if(skb->list == list) + __skb_unlink(skb, skb->list); + spin_unlock_irqrestore(&list->lock, flags); + } +} + +/* XXX: more streamlined implementation */ + +/** + * __skb_dequeue_tail - remove from the tail of the queue + * @list: list to dequeue from + * + * Remove the tail of the list. This function does not take any locks + * so must be used with appropriate locks held only. The tail item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) +{ + struct sk_buff *skb = skb_peek_tail(list); + if (skb) + __skb_unlink(skb, list); + return skb; +} + +/** + * skb_dequeue - remove from the head of the queue + * @list: list to dequeue from + * + * Remove the head of the list. The list lock is taken so the function + * may be used safely with other locking list functions. The tail item is + * returned or %NULL if the list is empty. + */ + +static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) +{ + unsigned long flags; + struct sk_buff *result; + + spin_lock_irqsave(&list->lock, flags); + result = __skb_dequeue_tail(list); + spin_unlock_irqrestore(&list->lock, flags); + return result; +} + +static inline int skb_is_nonlinear(const struct sk_buff *skb) +{ + return skb->data_len; +} + +static inline unsigned int skb_headlen(const struct sk_buff *skb) +{ + return skb->len - skb->data_len; +} + +#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) out_of_line_bug(); } while (0) +#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) out_of_line_bug(); } while (0) +#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) out_of_line_bug(); } while (0) + +/* + * Add data to an sk_buff + */ + +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) +{ + unsigned char *tmp=skb->tail; + SKB_LINEAR_ASSERT(skb); + skb->tail+=len; + skb->len+=len; + return tmp; +} + +/** + * skb_put - add data to a buffer + * @skb: buffer to use + * @len: amount of data to add + * + * This function extends the used data area of the buffer. If this would + * exceed the total buffer size the kernel will panic. A pointer to the + * first byte of the extra data is returned. + */ + +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) +{ + unsigned char *tmp=skb->tail; + SKB_LINEAR_ASSERT(skb); + skb->tail+=len; + skb->len+=len; + if(skb->tail>skb->end) { + skb_over_panic(skb, len, current_text_addr()); + } + return tmp; +} + +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) +{ + skb->data-=len; + skb->len+=len; + return skb->data; +} + +/** + * skb_push - add data to the start of a buffer + * @skb: buffer to use + * @len: amount of data to add + * + * This function extends the used data area of the buffer at the buffer + * start. If this would exceed the total buffer headroom the kernel will + * panic. A pointer to the first byte of the extra data is returned. + */ + +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) +{ + skb->data-=len; + skb->len+=len; + if(skb->data<skb->head) { + skb_under_panic(skb, len, current_text_addr()); + } + return skb->data; +} + +static inline char *__skb_pull(struct sk_buff *skb, unsigned int len) +{ + skb->len-=len; + if (skb->len < skb->data_len) + out_of_line_bug(); + return skb->data+=len; +} + +/** + * skb_pull - remove data from the start of a buffer + * @skb: buffer to use + * @len: amount of data to remove + * + * This function removes data from the start of a buffer, returning + * the memory to the headroom. A pointer to the next data in the buffer + * is returned. Once the data has been pulled future pushes will overwrite + * the old data. + */ + +static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len) +{ + if (len > skb->len) + return NULL; + return __skb_pull(skb,len); +} + +extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta); + +static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len) +{ + if (len > skb_headlen(skb) && + __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL) + return NULL; + skb->len -= len; + return skb->data += len; +} + +static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len) +{ + if (len > skb->len) + return NULL; + return __pskb_pull(skb,len); +} + +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) +{ + if (len <= skb_headlen(skb)) + return 1; + if (len > skb->len) + return 0; + return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL); +} + +/** + * skb_headroom - bytes at buffer head + * @skb: buffer to check + * + * Return the number of bytes of free space at the head of an &sk_buff. + */ + +static inline int skb_headroom(const struct sk_buff *skb) +{ + return skb->data-skb->head; +} + +/** + * skb_tailroom - bytes at buffer end + * @skb: buffer to check + * + * Return the number of bytes of free space at the tail of an sk_buff + */ + +static inline int skb_tailroom(const struct sk_buff *skb) +{ + return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail; +} + +/** + * skb_reserve - adjust headroom + * @skb: buffer to alter + * @len: bytes to move + * + * Increase the headroom of an empty &sk_buff by reducing the tail + * room. This is only allowed for an empty buffer. + */ + +static inline void skb_reserve(struct sk_buff *skb, unsigned int len) +{ + skb->data+=len; + skb->tail+=len; +} + +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); + +static inline void __skb_trim(struct sk_buff *skb, unsigned int len) +{ + if (!skb->data_len) { + skb->len = len; + skb->tail = skb->data+len; + } else { + ___pskb_trim(skb, len, 0); + } +} + +/** + * skb_trim - remove end from a buffer + * @skb: buffer to alter + * @len: new length + * + * Cut the length of a buffer down by removing data from the tail. If + * the buffer is already under the length specified it is not modified. + */ + +static inline void skb_trim(struct sk_buff *skb, unsigned int len) +{ + if (skb->len > len) { + __skb_trim(skb, len); + } +} + + +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) +{ + if (!skb->data_len) { + skb->len = len; + skb->tail = skb->data+len; + return 0; + } else { + return ___pskb_trim(skb, len, 1); + } +} + +static inline int pskb_trim(struct sk_buff *skb, unsigned int len) +{ + if (len < skb->len) + return __pskb_trim(skb, len); + return 0; +} + +/** + * skb_orphan - orphan a buffer + * @skb: buffer to orphan + * + * If a buffer currently has an owner then we call the owner's + * destructor function and make the @skb unowned. The buffer continues + * to exist but is no longer charged to its former owner. + */ + + +static inline void skb_orphan(struct sk_buff *skb) +{ + if (skb->destructor) + skb->destructor(skb); + skb->destructor = NULL; + skb->sk = NULL; +} + +/** + * skb_purge - empty a list + * @list: list to empty + * + * Delete all buffers on an &sk_buff list. Each buffer is removed from + * the list and one reference dropped. This function takes the list + * lock and is atomic with respect to other list locking functions. + */ + + +static inline void skb_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + while ((skb=skb_dequeue(list))!=NULL) + kfree_skb(skb); +} + +/** + * __skb_purge - empty a list + * @list: list to empty + * + * Delete all buffers on an &sk_buff list. Each buffer is removed from + * the list and one reference dropped. This function does not take the + * list lock and the caller must hold the relevant locks to use it. + */ + + +static inline void __skb_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + while ((skb=__skb_dequeue(list))!=NULL) + kfree_skb(skb); +} + +/** + * __dev_alloc_skb - allocate an skbuff for sending + * @length: length to allocate + * @gfp_mask: get_free_pages mask, passed to alloc_skb + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned in there is no free memory. + */ +#ifndef CONFIG_XEN +static inline struct sk_buff *__dev_alloc_skb(unsigned int length, + int gfp_mask) +{ + struct sk_buff *skb = alloc_skb(length+16, gfp_mask); + if (skb) + skb_reserve(skb,16); + return skb; +} +#else +extern struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask); +#endif + +/** + * dev_alloc_skb - allocate an skbuff for sending + * @length: length to allocate + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned in there is no free memory. Although this function + * allocates memory it can be called from an interrupt. + */ + +static inline struct sk_buff *dev_alloc_skb(unsigned int length) +{ + return __dev_alloc_skb(length, GFP_ATOMIC); +} + +/** + * skb_cow - copy header of skb when it is required + * @skb: buffer to cow + * @headroom: needed headroom + * + * If the skb passed lacks sufficient headroom or its data part + * is shared, data is reallocated. If reallocation fails, an error + * is returned and original skb is not changed. + * + * The result is skb with writable area skb->head...skb->tail + * and at least @headroom of space at head. + */ + +static inline int +skb_cow(struct sk_buff *skb, unsigned int headroom) +{ + int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); + + if (delta < 0) + delta = 0; + + if (delta || skb_cloned(skb)) + return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC); + return 0; +} + +/** + * skb_padto - pad an skbuff up to a minimal size + * @skb: buffer to pad + * @len: minimal length + * + * Pads up a buffer to ensure the trailing bytes exist and are + * blanked. If the buffer already contains sufficient data it + * is untouched. Returns the buffer, which may be a replacement + * for the original, or NULL for out of memory - in which case + * the original buffer is still freed. + */ + +static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len) +{ + unsigned int size = skb->len; + if(likely(size >= len)) + return skb; + return skb_pad(skb, len-size); +} + +/** + * skb_linearize - convert paged skb to linear one + * @skb: buffer to linarize + * @gfp: allocation mode + * + * If there is no free memory -ENOMEM is returned, otherwise zero + * is returned and the old skb data released. */ +int skb_linearize(struct sk_buff *skb, int gfp); + +static inline void *kmap_skb_frag(const skb_frag_t *frag) +{ +#ifdef CONFIG_HIGHMEM + if (in_irq()) + out_of_line_bug(); + + local_bh_disable(); +#endif + return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); +} + +static inline void kunmap_skb_frag(void *vaddr) +{ + kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); +#ifdef CONFIG_HIGHMEM + local_bh_enable(); +#endif +} + +#define skb_queue_walk(queue, skb) \ + for (skb = (queue)->next; \ + (skb != (struct sk_buff *)(queue)); \ + skb=skb->next) + + +extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); +extern unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); +extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size); +extern int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size); +extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump); +extern int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov); +extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb); + +extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); +extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); +extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum); +extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); + +extern void skb_init(void); +extern void skb_add_mtu(int mtu); + +#ifdef CONFIG_NETFILTER +static inline void +nf_conntrack_put(struct nf_ct_info *nfct) +{ + if (nfct && atomic_dec_and_test(&nfct->master->use)) + nfct->master->destroy(nfct->master); +} +static inline void +nf_conntrack_get(struct nf_ct_info *nfct) +{ + if (nfct) + atomic_inc(&nfct->master->use); +} +static inline void +nf_reset(struct sk_buff *skb) +{ + nf_conntrack_put(skb->nfct); + skb->nfct = NULL; +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug = 0; +#endif +} +#else /* CONFIG_NETFILTER */ +static inline void nf_reset(struct sk_buff *skb) {} +#endif /* CONFIG_NETFILTER */ + +#endif /* __KERNEL__ */ +#endif /* _LINUX_SKBUFF_H */ diff --git a/linux-2.4.28-xen-sparse/include/linux/timer.h b/linux-2.4.28-xen-sparse/include/linux/timer.h new file mode 100644 index 0000000000..238083218f --- /dev/null +++ b/linux-2.4.28-xen-sparse/include/linux/timer.h @@ -0,0 +1,77 @@ +#ifndef _LINUX_TIMER_H +#define _LINUX_TIMER_H + +#include <linux/config.h> +#include <linux/list.h> + +/* + * In Linux 2.4, static timers have been removed from the kernel. + * Timers may be dynamically created and destroyed, and should be initialized + * by a call to init_timer() upon creation. + * + * The "data" field enables use of a common timeout function for several + * timeouts. You can use this field to distinguish between the different + * invocations. + */ +struct timer_list { + struct list_head list; + unsigned long expires; + unsigned long data; + void (*function)(unsigned long); +}; + +extern void add_timer(struct timer_list * timer); +extern int del_timer(struct timer_list * timer); +#ifdef CONFIG_NO_IDLE_HZ +extern struct timer_list *next_timer_event(void); +#endif + +#ifdef CONFIG_SMP +extern int del_timer_sync(struct timer_list * timer); +extern void sync_timers(void); +#else +#define del_timer_sync(t) del_timer(t) +#define sync_timers() do { } while (0) +#endif + +/* + * mod_timer is a more efficient way to update the expire field of an + * active timer (if the timer is inactive it will be activated) + * mod_timer(a,b) is equivalent to del_timer(a); a->expires = b; add_timer(a). + * If the timer is known to be not pending (ie, in the handler), mod_timer + * is less efficient than a->expires = b; add_timer(a). + */ +int mod_timer(struct timer_list *timer, unsigned long expires); + +extern void it_real_fn(unsigned long); + +static inline void init_timer(struct timer_list * timer) +{ + timer->list.next = timer->list.prev = NULL; +} + +static inline int timer_pending (const struct timer_list * timer) +{ + return timer->list.next != NULL; +} + +/* + * These inlines deal with timer wrapping correctly. You are + * strongly encouraged to use them + * 1. Because people otherwise forget + * 2. Because if the timer wrap changes in future you wont have to + * alter your driver code. + * + * time_after(a,b) returns true if the time a is after time b. + * + * Do this with "<0" and ">=0" to only test the sign of the result. A + * good compiler would generate better code (and a really good compiler + * wouldn't care). Gcc is currently neither. + */ +#define time_after(a,b) ((long)(b) - (long)(a) < 0) +#define time_before(a,b) time_after(b,a) + +#define time_after_eq(a,b) ((long)(a) - (long)(b) >= 0) +#define time_before_eq(a,b) time_after_eq(b,a) + +#endif |