diff options
author | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-03-27 15:29:57 +0000 |
---|---|---|
committer | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-03-27 15:29:57 +0000 |
commit | 72d7f29fc5513bb48014b5fb8f19698703e538ca (patch) | |
tree | 2dbe24c7a830b9c34fb816da221ddfb032afb1c1 /xenolinux-2.4.25-sparse | |
parent | cc084c5cf73b9cd20ff6d75376ad61997fbfa8df (diff) | |
download | xen-72d7f29fc5513bb48014b5fb8f19698703e538ca.tar.gz xen-72d7f29fc5513bb48014b5fb8f19698703e538ca.tar.bz2 xen-72d7f29fc5513bb48014b5fb8f19698703e538ca.zip |
bitkeeper revision 1.825.3.12 (40659df50NIJI5Ld3gK593_2UZJDuQ)
xor.h:
new file
Many files:
Bug fixes for hardware virtualisation.
Diffstat (limited to 'xenolinux-2.4.25-sparse')
-rw-r--r-- | xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev | 49 | ||||
-rw-r--r-- | xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c | 12 | ||||
-rw-r--r-- | xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c | 8 | ||||
-rw-r--r-- | xenolinux-2.4.25-sparse/include/asm-xen/system.h | 23 | ||||
-rw-r--r-- | xenolinux-2.4.25-sparse/include/asm-xen/xor.h | 879 | ||||
-rwxr-xr-x | xenolinux-2.4.25-sparse/mkbuildtree | 1 |
6 files changed, 941 insertions, 31 deletions
diff --git a/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev b/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev index 3902755869..52922f5bf1 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev +++ b/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev @@ -17,7 +17,7 @@ CONFIG_NO_IDLE_HZ=y # # Code maturity level options # -# CONFIG_EXPERIMENTAL is not set +CONFIG_EXPERIMENTAL=y # # Loadable module support @@ -88,6 +88,8 @@ CONFIG_BINFMT_ELF=y # CONFIG_PARPORT=y CONFIG_PARPORT_PC=y +# CONFIG_PARPORT_PC_FIFO is not set +# CONFIG_PARPORT_PC_SUPERIO is not set # CONFIG_PARPORT_PC_PCMCIA is not set # CONFIG_PARPORT_AMIGA is not set # CONFIG_PARPORT_MFC3 is not set @@ -153,6 +155,7 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_IP_PNP_RARP is not set # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set # CONFIG_INET_ECN is not set # CONFIG_SYN_COOKIES is not set @@ -164,6 +167,7 @@ CONFIG_IP_NF_FTP=y # CONFIG_IP_NF_AMANDA is not set CONFIG_IP_NF_TFTP=y CONFIG_IP_NF_IRC=y +# CONFIG_IP_NF_QUEUE is not set CONFIG_IP_NF_IPTABLES=y # CONFIG_IP_NF_MATCH_LIMIT is not set # CONFIG_IP_NF_MATCH_MAC is not set @@ -181,13 +185,17 @@ CONFIG_IP_NF_IPTABLES=y # CONFIG_IP_NF_MATCH_HELPER is not set CONFIG_IP_NF_MATCH_STATE=y CONFIG_IP_NF_MATCH_CONNTRACK=y +# CONFIG_IP_NF_MATCH_UNCLEAN is not set +# CONFIG_IP_NF_MATCH_OWNER is not set CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_TARGET_REJECT=y +# CONFIG_IP_NF_TARGET_MIRROR is not set CONFIG_IP_NF_NAT=y CONFIG_IP_NF_NAT_NEEDED=y CONFIG_IP_NF_TARGET_MASQUERADE=y CONFIG_IP_NF_TARGET_REDIRECT=y # CONFIG_IP_NF_NAT_LOCAL is not set +# CONFIG_IP_NF_NAT_SNMP_BASIC is not set CONFIG_IP_NF_NAT_IRC=y CONFIG_IP_NF_NAT_FTP=y CONFIG_IP_NF_NAT_TFTP=y @@ -201,6 +209,15 @@ CONFIG_IP_NF_TARGET_ULOG=y # IP: Virtual Server Configuration # # CONFIG_IP_VS is not set +# CONFIG_IPV6 is not set +# CONFIG_KHTTPD is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +CONFIG_IPV6_SCTP__=y +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set # CONFIG_VLAN_8021Q is not set # @@ -215,6 +232,14 @@ CONFIG_IP_NF_TARGET_ULOG=y # CONFIG_DEV_APPLETALK is not set # CONFIG_DECNET is not set # CONFIG_BRIDGE is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_LLC is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_FASTROUTE is not set +# CONFIG_NET_HW_FLOWCONTROL is not set # # QoS and/or fair queueing @@ -348,6 +373,7 @@ CONFIG_CHR_DEV_SG=y CONFIG_SCSI_AHA152X=y CONFIG_SCSI_AHA1542=y CONFIG_SCSI_AHA1740=y +CONFIG_SCSI_AACRAID=y # CONFIG_SCSI_AIC7XXX is not set CONFIG_SCSI_AIC79XX=y CONFIG_AIC79XX_CMDS_PER_DEVICE=32 @@ -406,6 +432,7 @@ CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 # CONFIG_SCSI_U14_34F is not set # CONFIG_SCSI_ULTRASTOR is not set # CONFIG_SCSI_NSP32 is not set +# CONFIG_SCSI_DEBUG is not set # # Fusion MPT device support @@ -417,6 +444,11 @@ CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 # CONFIG_FUSION_LAN is not set # +# IEEE 1394 (FireWire) support (EXPERIMENTAL) +# +# CONFIG_IEEE1394 is not set + +# # I2O device support # # CONFIG_I2O is not set @@ -439,6 +471,7 @@ CONFIG_NETDEVICES=y # CONFIG_BONDING is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set +# CONFIG_ETHERTAP is not set # # Ethernet (10 or 100Mbit) @@ -484,8 +517,9 @@ CONFIG_E1000=y # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set # CONFIG_SK98LIN is not set -# CONFIG_TIGON3 is not set +CONFIG_TIGON3=y # CONFIG_FDDI is not set +# CONFIG_HIPPI is not set # CONFIG_PLIP is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set @@ -500,6 +534,8 @@ CONFIG_E1000=y # # CONFIG_TR is not set # CONFIG_NET_FC is not set +# CONFIG_RCPCI is not set +# CONFIG_SHAPER is not set # # Wan interfaces @@ -599,6 +635,7 @@ CONFIG_PSMOUSE=y # CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set +# CONFIG_SONYPI is not set # # Ftape, the floppy tape device driver @@ -778,6 +815,12 @@ CONFIG_XEN_CONSOLE=y CONFIG_VGA_CONSOLE=y CONFIG_DUMMY_CONSOLE=y # CONFIG_VIDEO_SELECT is not set +# CONFIG_MDA_CONSOLE is not set + +# +# Frame-buffer support +# +# CONFIG_FB is not set # # Sound @@ -811,6 +854,7 @@ CONFIG_USB_OHCI=y # # CONFIG_USB_AUDIO is not set # CONFIG_USB_EMI26 is not set +# CONFIG_USB_BLUETOOTH is not set # CONFIG_USB_MIDI is not set # CONFIG_USB_STORAGE is not set # CONFIG_USB_STORAGE_DEBUG is not set @@ -924,3 +968,4 @@ CONFIG_LOG_BUF_SHIFT=0 # CONFIG_CRC32 is not set CONFIG_ZLIB_INFLATE=y # CONFIG_ZLIB_DEFLATE is not set +# CONFIG_FW_LOADER is not set diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c index fc3efb810e..e46307f33d 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c @@ -84,7 +84,7 @@ static int find_unbound_irq(void) break; if ( irq == NR_IRQS ) - BUG(); + panic("No available IRQ to bind to: increase NR_IRQS!\n"); return irq; } @@ -101,7 +101,7 @@ int bind_virq_to_irq(int virq) op.cmd = EVTCHNOP_bind_virq; op.u.bind_virq.virq = virq; if ( HYPERVISOR_event_channel_op(&op) != 0 ) - BUG(); + panic("Failed to bind virtual IRQ %d\n", virq); evtchn = op.u.bind_virq.port; irq = find_unbound_irq(); @@ -132,7 +132,7 @@ void unbind_virq_from_irq(int virq) op.u.close.dom = DOMID_SELF; op.u.close.port = evtchn; if ( HYPERVISOR_event_channel_op(&op) != 0 ) - BUG(); + panic("Failed to unbind virtual IRQ %d\n", virq); evtchn_to_irq[evtchn] = -1; irq_to_evtchn[irq] = -1; @@ -241,8 +241,8 @@ static unsigned int startup_pirq(unsigned int irq) op.cmd = EVTCHNOP_bind_pirq; op.u.bind_pirq.pirq = irq; if ( HYPERVISOR_event_channel_op(&op) != 0 ) - BUG(); - evtchn = op.u.bind_virq.port; + panic("Failed to obtain physical IRQ %d\n", irq); + evtchn = op.u.bind_pirq.port; evtchn_to_irq[evtchn] = irq; irq_to_evtchn[irq] = evtchn; @@ -264,7 +264,7 @@ static void shutdown_pirq(unsigned int irq) op.u.close.dom = DOMID_SELF; op.u.close.port = evtchn; if ( HYPERVISOR_event_channel_op(&op) != 0 ) - BUG(); + panic("Failed to unbind physical IRQ %d\n", irq); evtchn_to_irq[evtchn] = -1; irq_to_evtchn[irq] = -1; diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c index 63288fc282..b98e0cc6bb 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c @@ -549,6 +549,14 @@ asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs, */ asmlinkage void math_state_restore(struct pt_regs regs) { + /* + * A trap in kernel mode can be ignored. It'll be the fast XOR or + * copying libraries, which will correctly save/restore state and + * reset the TS bit in CR0. + */ + if ( (regs.xcs & 2) == 0 ) + return; + if (current->used_math) { restore_fpu(current); } else { diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/system.h b/xenolinux-2.4.25-sparse/include/asm-xen/system.h index 28fbaeca82..77b325d61a 100644 --- a/xenolinux-2.4.25-sparse/include/asm-xen/system.h +++ b/xenolinux-2.4.25-sparse/include/asm-xen/system.h @@ -107,33 +107,12 @@ static inline unsigned long _get_base(char * addr) ".previous" \ : :"m" (*(unsigned int *)&(value))) +/* NB. 'clts' is done for us by Xen during virtual trap. */ #define clts() ((void)0) -#define read_cr0() ({ \ - unsigned int __dummy; \ - __asm__( \ - "movl %%cr0,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) -#define write_cr0(x) \ - __asm__("movl %0,%%cr0": :"r" (x)); - -#define read_cr4() ({ \ - unsigned int __dummy; \ - __asm__( \ - "movl %%cr4,%0\n\t" \ - :"=r" (__dummy)); \ - __dummy; \ -}) -#define write_cr4(x) \ - __asm__("movl %0,%%cr4": :"r" (x)); #define stts() (HYPERVISOR_fpu_taskswitch()) #endif /* __KERNEL__ */ -#define wbinvd() \ - __asm__ __volatile__ ("wbinvd": : :"memory"); - static inline unsigned long get_limit(unsigned long segment) { unsigned long __limit; diff --git a/xenolinux-2.4.25-sparse/include/asm-xen/xor.h b/xenolinux-2.4.25-sparse/include/asm-xen/xor.h new file mode 100644 index 0000000000..9e6cca8a8a --- /dev/null +++ b/xenolinux-2.4.25-sparse/include/asm-xen/xor.h @@ -0,0 +1,879 @@ +/* + * include/asm-i386/xor.h + * + * Optimized RAID-5 checksumming functions for MMX and SSE. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * High-speed RAID5 checksumming functions utilizing MMX instructions. + * Copyright (C) 1998 Ingo Molnar. + */ + +#define FPU_SAVE \ + do { \ + if (!(current->flags & PF_USEDFPU)) \ + clts(); \ + __asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0])); \ + } while (0) + +#define FPU_RESTORE \ + do { \ + __asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0])); \ + if (!(current->flags & PF_USEDFPU)) \ + stts(); \ + } while (0) + +#define LD(x,y) " movq 8*("#x")(%1), %%mm"#y" ;\n" +#define ST(x,y) " movq %%mm"#y", 8*("#x")(%1) ;\n" +#define XO1(x,y) " pxor 8*("#x")(%2), %%mm"#y" ;\n" +#define XO2(x,y) " pxor 8*("#x")(%3), %%mm"#y" ;\n" +#define XO3(x,y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" +#define XO4(x,y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" + + +static void +xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + ST(i,0) \ + XO1(i+1,1) \ + ST(i+1,1) \ + XO1(i+2,2) \ + ST(i+2,2) \ + XO1(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2) + : + : "memory"); + + FPU_RESTORE; +} + +static void +xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + ST(i,0) \ + XO2(i+1,1) \ + ST(i+1,1) \ + XO2(i+2,2) \ + ST(i+2,2) \ + XO2(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : + : "memory"); + + FPU_RESTORE; +} + +static void +xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + XO3(i,0) \ + ST(i,0) \ + XO3(i+1,1) \ + ST(i+1,1) \ + XO3(i+2,2) \ + ST(i+2,2) \ + XO3(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " addl $128, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) + : + : "memory"); + + FPU_RESTORE; +} + + +static void +xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 7; + char fpu_save[108]; + + FPU_SAVE; + + /* need to save/restore p4/p5 manually otherwise gcc's 10 argument + limit gets exceeded (+ counts as two arguments) */ + __asm__ __volatile__ ( + " pushl %4\n" + " pushl %5\n" +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + XO3(i,0) \ + XO3(i+1,1) \ + XO3(i+2,2) \ + XO3(i+3,3) \ + XO4(i,0) \ + ST(i,0) \ + XO4(i+1,1) \ + ST(i+1,1) \ + XO4(i+2,2) \ + ST(i+2,2) \ + XO4(i+3,3) \ + ST(i+3,3) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $128, %1 ;\n" + " addl $128, %2 ;\n" + " addl $128, %3 ;\n" + " addl $128, %4 ;\n" + " addl $128, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + " popl %5\n" + " popl %4\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : "r" (p4), "r" (p5) + : "memory"); + + FPU_RESTORE; +} + +#undef LD +#undef XO1 +#undef XO2 +#undef XO3 +#undef XO4 +#undef ST +#undef BLOCK + +static void +xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 8(%2), %%mm1 ;\n" + " movq 24(%1), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " pxor 16(%2), %%mm2 ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq 40(%1), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%2), %%mm4 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq 56(%1), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%2), %%mm6 ;\n" + " pxor 56(%2), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2) + : + : "memory"); + + FPU_RESTORE; +} + +static void +xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " movq 24(%1), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 32(%2), %%mm4 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " movq 56(%1), %%mm7 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 56(%2), %%mm7 ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : + : "memory" ); + + FPU_RESTORE; +} + +static void +xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + __asm__ __volatile__ ( + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor (%4), %%mm0 ;\n" + " movq 24(%1), %%mm3 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " pxor 8(%4), %%mm1 ;\n" + " movq %%mm0, (%1) ;\n" + " movq 32(%1), %%mm4 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " pxor 16(%4), %%mm2 ;\n" + " movq %%mm1, 8(%1) ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 32(%2), %%mm4 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 40(%2), %%mm5 ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 24(%4), %%mm3 ;\n" + " movq %%mm3, 24(%1) ;\n" + " movq 56(%1), %%mm7 ;\n" + " movq 48(%1), %%mm6 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " pxor 32(%4), %%mm4 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 56(%2), %%mm7 ;\n" + " pxor 40(%4), %%mm5 ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%4), %%mm6 ;\n" + " pxor 56(%4), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " addl $64, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) + : + : "memory"); + + FPU_RESTORE; +} + +static void +xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 6; + char fpu_save[108]; + + FPU_SAVE; + + /* need to save p4/p5 manually to not exceed gcc's 10 argument limit */ + __asm__ __volatile__ ( + " pushl %4\n" + " pushl %5\n" + " .align 32,0x90 ;\n" + " 1: ;\n" + " movq (%1), %%mm0 ;\n" + " movq 8(%1), %%mm1 ;\n" + " pxor (%2), %%mm0 ;\n" + " pxor 8(%2), %%mm1 ;\n" + " movq 16(%1), %%mm2 ;\n" + " pxor (%3), %%mm0 ;\n" + " pxor 8(%3), %%mm1 ;\n" + " pxor 16(%2), %%mm2 ;\n" + " pxor (%4), %%mm0 ;\n" + " pxor 8(%4), %%mm1 ;\n" + " pxor 16(%3), %%mm2 ;\n" + " movq 24(%1), %%mm3 ;\n" + " pxor (%5), %%mm0 ;\n" + " pxor 8(%5), %%mm1 ;\n" + " movq %%mm0, (%1) ;\n" + " pxor 16(%4), %%mm2 ;\n" + " pxor 24(%2), %%mm3 ;\n" + " movq %%mm1, 8(%1) ;\n" + " pxor 16(%5), %%mm2 ;\n" + " pxor 24(%3), %%mm3 ;\n" + " movq 32(%1), %%mm4 ;\n" + " movq %%mm2, 16(%1) ;\n" + " pxor 24(%4), %%mm3 ;\n" + " pxor 32(%2), %%mm4 ;\n" + " movq 40(%1), %%mm5 ;\n" + " pxor 24(%5), %%mm3 ;\n" + " pxor 32(%3), %%mm4 ;\n" + " pxor 40(%2), %%mm5 ;\n" + " movq %%mm3, 24(%1) ;\n" + " pxor 32(%4), %%mm4 ;\n" + " pxor 40(%3), %%mm5 ;\n" + " movq 48(%1), %%mm6 ;\n" + " movq 56(%1), %%mm7 ;\n" + " pxor 32(%5), %%mm4 ;\n" + " pxor 40(%4), %%mm5 ;\n" + " pxor 48(%2), %%mm6 ;\n" + " pxor 56(%2), %%mm7 ;\n" + " movq %%mm4, 32(%1) ;\n" + " pxor 48(%3), %%mm6 ;\n" + " pxor 56(%3), %%mm7 ;\n" + " pxor 40(%5), %%mm5 ;\n" + " pxor 48(%4), %%mm6 ;\n" + " pxor 56(%4), %%mm7 ;\n" + " movq %%mm5, 40(%1) ;\n" + " pxor 48(%5), %%mm6 ;\n" + " pxor 56(%5), %%mm7 ;\n" + " movq %%mm6, 48(%1) ;\n" + " movq %%mm7, 56(%1) ;\n" + + " addl $64, %1 ;\n" + " addl $64, %2 ;\n" + " addl $64, %3 ;\n" + " addl $64, %4 ;\n" + " addl $64, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + " popl %5\n" + " popl %4\n" + : "+g" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : "r" (p4), "r" (p5) + : "memory"); + + FPU_RESTORE; +} + +static struct xor_block_template xor_block_pII_mmx = { + name: "pII_mmx", + do_2: xor_pII_mmx_2, + do_3: xor_pII_mmx_3, + do_4: xor_pII_mmx_4, + do_5: xor_pII_mmx_5, +}; + +static struct xor_block_template xor_block_p5_mmx = { + name: "p5_mmx", + do_2: xor_p5_mmx_2, + do_3: xor_p5_mmx_3, + do_4: xor_p5_mmx_4, + do_5: xor_p5_mmx_5, +}; + +#undef FPU_SAVE +#undef FPU_RESTORE + +/* + * Cache avoiding checksumming functions utilizing KNI instructions + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) + */ + +#define XMMS_SAVE \ + if (!(current->flags & PF_USEDFPU)) \ + clts(); \ + __asm__ __volatile__ ( \ + "movups %%xmm0,(%1) ;\n\t" \ + "movups %%xmm1,0x10(%1) ;\n\t" \ + "movups %%xmm2,0x20(%1) ;\n\t" \ + "movups %%xmm3,0x30(%1) ;\n\t" \ + : "=&r" (cr0) \ + : "r" (xmm_save) \ + : "memory") + +#define XMMS_RESTORE \ + __asm__ __volatile__ ( \ + "sfence ;\n\t" \ + "movups (%1),%%xmm0 ;\n\t" \ + "movups 0x10(%1),%%xmm1 ;\n\t" \ + "movups 0x20(%1),%%xmm2 ;\n\t" \ + "movups 0x30(%1),%%xmm3 ;\n\t" \ + : \ + : "r" (cr0), "r" (xmm_save) \ + : "memory"); \ + if (!(current->flags & PF_USEDFPU)) \ + stts() + +#define ALIGN16 __attribute__((aligned(16))) + +#define OFFS(x) "16*("#x")" +#define PF_OFFS(x) "256+16*("#x")" +#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" +#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" +#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" +#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n" +#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n" +#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n" +#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n" +#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n" +#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" +#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" +#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" +#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" +#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" + + +static void +xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4] ALIGN16; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + LD(i,0) \ + LD(i+1,1) \ + PF1(i) \ + PF1(i+2) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF0(i+4) \ + PF0(i+6) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2) + : + : "memory"); + + XMMS_RESTORE; +} + +static void +xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4] ALIGN16; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i+2) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF2(i) \ + PF2(i+2) \ + PF0(i+4) \ + PF0(i+6) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r"(p2), "+r"(p3) + : + : "memory" ); + + XMMS_RESTORE; +} + +static void +xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4] ALIGN16; + int cr0; + + XMMS_SAVE; + + __asm__ __volatile__ ( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i+2) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF2(i) \ + PF2(i+2) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + PF3(i) \ + PF3(i+2) \ + PF0(i+4) \ + PF0(i+6) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + XO3(i,0) \ + XO3(i+1,1) \ + XO3(i+2,2) \ + XO3(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " addl $256, %4 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) + : + : "memory" ); + + XMMS_RESTORE; +} + +static void +xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 8; + char xmm_save[16*4] ALIGN16; + int cr0; + + XMMS_SAVE; + + /* need to save p4/p5 manually to not exceed gcc's 10 argument limit */ + __asm__ __volatile__ ( + " pushl %4\n" + " pushl %5\n" +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i+2) \ + LD(i,0) \ + LD(i+1,1) \ + LD(i+2,2) \ + LD(i+3,3) \ + PF2(i) \ + PF2(i+2) \ + XO1(i,0) \ + XO1(i+1,1) \ + XO1(i+2,2) \ + XO1(i+3,3) \ + PF3(i) \ + PF3(i+2) \ + XO2(i,0) \ + XO2(i+1,1) \ + XO2(i+2,2) \ + XO2(i+3,3) \ + PF4(i) \ + PF4(i+2) \ + PF0(i+4) \ + PF0(i+6) \ + XO3(i,0) \ + XO3(i+1,1) \ + XO3(i+2,2) \ + XO3(i+3,3) \ + XO4(i,0) \ + XO4(i+1,1) \ + XO4(i+2,2) \ + XO4(i+3,3) \ + ST(i,0) \ + ST(i+1,1) \ + ST(i+2,2) \ + ST(i+3,3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " addl $256, %1 ;\n" + " addl $256, %2 ;\n" + " addl $256, %3 ;\n" + " addl $256, %4 ;\n" + " addl $256, %5 ;\n" + " decl %0 ;\n" + " jnz 1b ;\n" + " popl %5\n" + " popl %4\n" + : "+r" (lines), + "+r" (p1), "+r" (p2), "+r" (p3) + : "r" (p4), "r" (p5) + : "memory"); + + XMMS_RESTORE; +} + +static struct xor_block_template xor_block_pIII_sse = { + name: "pIII_sse", + do_2: xor_sse_2, + do_3: xor_sse_3, + do_4: xor_sse_4, + do_5: xor_sse_5, +}; + +/* Also try the generic routines. */ +#include <asm-generic/xor.h> + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ + do { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_32regs); \ + if (cpu_has_xmm) \ + xor_speed(&xor_block_pIII_sse); \ + if (md_cpu_has_mmx()) { \ + xor_speed(&xor_block_pII_mmx); \ + xor_speed(&xor_block_p5_mmx); \ + } \ + } while (0) + +/* We force the use of the SSE xor block because it can write around L2. + We may also be able to load into the L1 only depending on how the cpu + deals with a load to a line that is being prefetched. */ +#define XOR_SELECT_TEMPLATE(FASTEST) \ + (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) diff --git a/xenolinux-2.4.25-sparse/mkbuildtree b/xenolinux-2.4.25-sparse/mkbuildtree index 08b6ae1e5d..b6679d05df 100755 --- a/xenolinux-2.4.25-sparse/mkbuildtree +++ b/xenolinux-2.4.25-sparse/mkbuildtree @@ -196,7 +196,6 @@ ln -sf ../asm-i386/ucontext.h ln -sf ../asm-i386/unaligned.h ln -sf ../asm-i386/unistd.h ln -sf ../asm-i386/user.h -ln -sf ../asm-i386/xor.h cd ../../arch/xen/kernel ln -sf ../../i386/kernel/i387.c |