ia64/xen-unstable
changeset 1251:76ba37dc1b89
bitkeeper revision 1.825.3.12 (40659df50NIJI5Ld3gK593_2UZJDuQ)
xor.h:
new file
Many files:
Bug fixes for hardware virtualisation.
xor.h:
new file
Many files:
Bug fixes for hardware virtualisation.
author | kaf24@scramble.cl.cam.ac.uk |
---|---|
date | Sat Mar 27 15:29:57 2004 +0000 (2004-03-27) |
parents | b2c2c29b1e99 |
children | 771b47a23e3d |
files | .rootkeys xen/arch/i386/irq.c xen/common/event_channel.c xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c xenolinux-2.4.25-sparse/include/asm-xen/system.h xenolinux-2.4.25-sparse/include/asm-xen/xor.h xenolinux-2.4.25-sparse/mkbuildtree |
line diff
1.1 --- a/.rootkeys Sat Mar 27 13:51:01 2004 +0000 1.2 +++ b/.rootkeys Sat Mar 27 15:29:57 2004 +0000 1.3 @@ -689,6 +689,7 @@ 4062f7e2PzFOUGT0PaE7A0VprTU3JQ xenolinux 1.4 3e5a4e68mTr0zcp9SXDbnd-XLrrfxw xenolinux-2.4.25-sparse/include/asm-xen/system.h 1.5 3f1056a9L_kqHcFheV00KbKBzv9j5w xenolinux-2.4.25-sparse/include/asm-xen/vga.h 1.6 3f689063nhrIRsMMZjZxMFk7iEINqQ xenolinux-2.4.25-sparse/include/asm-xen/xen_proc.h 1.7 +40659defgWA92arexpMGn8X3QMDj3w xenolinux-2.4.25-sparse/include/asm-xen/xor.h 1.8 3f056927gMHl7mWB89rb73JahbhQIA xenolinux-2.4.25-sparse/include/linux/blk.h 1.9 3e5a4e68WLX3B8owTvktP3HHOtznPQ xenolinux-2.4.25-sparse/include/linux/major.h 1.10 401c0590D_kwJDU59X8NyvqSv_Cl2A xenolinux-2.4.25-sparse/include/linux/sched.h
2.1 --- a/xen/arch/i386/irq.c Sat Mar 27 13:51:01 2004 +0000 2.2 +++ b/xen/arch/i386/irq.c Sat Mar 27 15:29:57 2004 +0000 2.3 @@ -1005,12 +1005,19 @@ int pirq_guest_bind(struct task_struct * 2.4 { 2.5 rc = -EBUSY; 2.6 if ( desc->action != NULL ) 2.7 + { 2.8 + DPRINTK("Cannot bind IRQ %d to guest. In use by '%s'.\n", 2.9 + irq, desc->action->name); 2.10 goto out; 2.11 + } 2.12 2.13 rc = -ENOMEM; 2.14 action = kmalloc(sizeof(irq_guest_action_t), GFP_KERNEL); 2.15 if ( (desc->action = (struct irqaction *)action) == NULL ) 2.16 + { 2.17 + DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq); 2.18 goto out; 2.19 + } 2.20 2.21 action->nr_guests = 0; 2.22 action->in_flight = 0; 2.23 @@ -1025,10 +1032,15 @@ int pirq_guest_bind(struct task_struct * 2.24 2.25 rc = -EBUSY; 2.26 if ( action->nr_guests == IRQ_MAX_GUESTS ) 2.27 + { 2.28 + DPRINTK("Cannot bind IRQ %d to guest. Already at max share.\n", irq); 2.29 goto out; 2.30 + } 2.31 2.32 action->guest[action->nr_guests++] = p; 2.33 2.34 + rc = 0; 2.35 + 2.36 out: 2.37 spin_unlock_irqrestore(&desc->lock, flags); 2.38 return rc;
3.1 --- a/xen/common/event_channel.c Sat Mar 27 13:51:01 2004 +0000 3.2 +++ b/xen/common/event_channel.c Sat Mar 27 15:29:57 2004 +0000 3.3 @@ -197,6 +197,7 @@ static long evtchn_bind_pirq(evtchn_bind 3.4 if ( (rc = pirq_guest_bind(p, pirq)) != 0 ) 3.5 { 3.6 p->pirq_to_evtchn[pirq] = 0; 3.7 + DPRINTK("Couldn't bind to PIRQ %d (error=%d)\n", pirq, rc); 3.8 goto out; 3.9 } 3.10
4.1 --- a/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev Sat Mar 27 13:51:01 2004 +0000 4.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev Sat Mar 27 15:29:57 2004 +0000 4.3 @@ -17,7 +17,7 @@ CONFIG_NO_IDLE_HZ=y 4.4 # 4.5 # Code maturity level options 4.6 # 4.7 -# CONFIG_EXPERIMENTAL is not set 4.8 +CONFIG_EXPERIMENTAL=y 4.9 4.10 # 4.11 # Loadable module support 4.12 @@ -88,6 +88,8 @@ CONFIG_BINFMT_ELF=y 4.13 # 4.14 CONFIG_PARPORT=y 4.15 CONFIG_PARPORT_PC=y 4.16 +# CONFIG_PARPORT_PC_FIFO is not set 4.17 +# CONFIG_PARPORT_PC_SUPERIO is not set 4.18 # CONFIG_PARPORT_PC_PCMCIA is not set 4.19 # CONFIG_PARPORT_AMIGA is not set 4.20 # CONFIG_PARPORT_MFC3 is not set 4.21 @@ -153,6 +155,7 @@ CONFIG_IP_PNP_DHCP=y 4.22 # CONFIG_IP_PNP_RARP is not set 4.23 # CONFIG_NET_IPIP is not set 4.24 # CONFIG_NET_IPGRE is not set 4.25 +# CONFIG_ARPD is not set 4.26 # CONFIG_INET_ECN is not set 4.27 # CONFIG_SYN_COOKIES is not set 4.28 4.29 @@ -164,6 +167,7 @@ CONFIG_IP_NF_FTP=y 4.30 # CONFIG_IP_NF_AMANDA is not set 4.31 CONFIG_IP_NF_TFTP=y 4.32 CONFIG_IP_NF_IRC=y 4.33 +# CONFIG_IP_NF_QUEUE is not set 4.34 CONFIG_IP_NF_IPTABLES=y 4.35 # CONFIG_IP_NF_MATCH_LIMIT is not set 4.36 # CONFIG_IP_NF_MATCH_MAC is not set 4.37 @@ -181,13 +185,17 @@ CONFIG_IP_NF_IPTABLES=y 4.38 # CONFIG_IP_NF_MATCH_HELPER is not set 4.39 CONFIG_IP_NF_MATCH_STATE=y 4.40 CONFIG_IP_NF_MATCH_CONNTRACK=y 4.41 +# CONFIG_IP_NF_MATCH_UNCLEAN is not set 4.42 +# CONFIG_IP_NF_MATCH_OWNER is not set 4.43 CONFIG_IP_NF_FILTER=y 4.44 CONFIG_IP_NF_TARGET_REJECT=y 4.45 +# CONFIG_IP_NF_TARGET_MIRROR is not set 4.46 CONFIG_IP_NF_NAT=y 4.47 CONFIG_IP_NF_NAT_NEEDED=y 4.48 CONFIG_IP_NF_TARGET_MASQUERADE=y 4.49 CONFIG_IP_NF_TARGET_REDIRECT=y 4.50 # CONFIG_IP_NF_NAT_LOCAL is not set 4.51 +# CONFIG_IP_NF_NAT_SNMP_BASIC is not set 4.52 CONFIG_IP_NF_NAT_IRC=y 4.53 CONFIG_IP_NF_NAT_FTP=y 4.54 CONFIG_IP_NF_NAT_TFTP=y 4.55 @@ -201,6 +209,15 @@ CONFIG_IP_NF_TARGET_ULOG=y 4.56 # IP: Virtual Server Configuration 4.57 # 4.58 # CONFIG_IP_VS is not set 4.59 +# CONFIG_IPV6 is not set 4.60 +# CONFIG_KHTTPD is not set 4.61 + 4.62 +# 4.63 +# SCTP Configuration (EXPERIMENTAL) 4.64 +# 4.65 +CONFIG_IPV6_SCTP__=y 4.66 +# CONFIG_IP_SCTP is not set 4.67 +# CONFIG_ATM is not set 4.68 # CONFIG_VLAN_8021Q is not set 4.69 4.70 # 4.71 @@ -215,6 +232,14 @@ CONFIG_IP_NF_TARGET_ULOG=y 4.72 # CONFIG_DEV_APPLETALK is not set 4.73 # CONFIG_DECNET is not set 4.74 # CONFIG_BRIDGE is not set 4.75 +# CONFIG_X25 is not set 4.76 +# CONFIG_LAPB is not set 4.77 +# CONFIG_LLC is not set 4.78 +# CONFIG_NET_DIVERT is not set 4.79 +# CONFIG_ECONET is not set 4.80 +# CONFIG_WAN_ROUTER is not set 4.81 +# CONFIG_NET_FASTROUTE is not set 4.82 +# CONFIG_NET_HW_FLOWCONTROL is not set 4.83 4.84 # 4.85 # QoS and/or fair queueing 4.86 @@ -348,6 +373,7 @@ CONFIG_CHR_DEV_SG=y 4.87 CONFIG_SCSI_AHA152X=y 4.88 CONFIG_SCSI_AHA1542=y 4.89 CONFIG_SCSI_AHA1740=y 4.90 +CONFIG_SCSI_AACRAID=y 4.91 # CONFIG_SCSI_AIC7XXX is not set 4.92 CONFIG_SCSI_AIC79XX=y 4.93 CONFIG_AIC79XX_CMDS_PER_DEVICE=32 4.94 @@ -406,6 +432,7 @@ CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 4.95 # CONFIG_SCSI_U14_34F is not set 4.96 # CONFIG_SCSI_ULTRASTOR is not set 4.97 # CONFIG_SCSI_NSP32 is not set 4.98 +# CONFIG_SCSI_DEBUG is not set 4.99 4.100 # 4.101 # Fusion MPT device support 4.102 @@ -417,6 +444,11 @@ CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 4.103 # CONFIG_FUSION_LAN is not set 4.104 4.105 # 4.106 +# IEEE 1394 (FireWire) support (EXPERIMENTAL) 4.107 +# 4.108 +# CONFIG_IEEE1394 is not set 4.109 + 4.110 +# 4.111 # I2O device support 4.112 # 4.113 # CONFIG_I2O is not set 4.114 @@ -439,6 +471,7 @@ CONFIG_NETDEVICES=y 4.115 # CONFIG_BONDING is not set 4.116 # CONFIG_EQUALIZER is not set 4.117 # CONFIG_TUN is not set 4.118 +# CONFIG_ETHERTAP is not set 4.119 4.120 # 4.121 # Ethernet (10 or 100Mbit) 4.122 @@ -484,8 +517,9 @@ CONFIG_E1000=y 4.123 # CONFIG_YELLOWFIN is not set 4.124 # CONFIG_R8169 is not set 4.125 # CONFIG_SK98LIN is not set 4.126 -# CONFIG_TIGON3 is not set 4.127 +CONFIG_TIGON3=y 4.128 # CONFIG_FDDI is not set 4.129 +# CONFIG_HIPPI is not set 4.130 # CONFIG_PLIP is not set 4.131 # CONFIG_PPP is not set 4.132 # CONFIG_SLIP is not set 4.133 @@ -500,6 +534,8 @@ CONFIG_E1000=y 4.134 # 4.135 # CONFIG_TR is not set 4.136 # CONFIG_NET_FC is not set 4.137 +# CONFIG_RCPCI is not set 4.138 +# CONFIG_SHAPER is not set 4.139 4.140 # 4.141 # Wan interfaces 4.142 @@ -599,6 +635,7 @@ CONFIG_PSMOUSE=y 4.143 # CONFIG_DTLK is not set 4.144 # CONFIG_R3964 is not set 4.145 # CONFIG_APPLICOM is not set 4.146 +# CONFIG_SONYPI is not set 4.147 4.148 # 4.149 # Ftape, the floppy tape device driver 4.150 @@ -778,6 +815,12 @@ CONFIG_XEN_CONSOLE=y 4.151 CONFIG_VGA_CONSOLE=y 4.152 CONFIG_DUMMY_CONSOLE=y 4.153 # CONFIG_VIDEO_SELECT is not set 4.154 +# CONFIG_MDA_CONSOLE is not set 4.155 + 4.156 +# 4.157 +# Frame-buffer support 4.158 +# 4.159 +# CONFIG_FB is not set 4.160 4.161 # 4.162 # Sound 4.163 @@ -811,6 +854,7 @@ CONFIG_USB_OHCI=y 4.164 # 4.165 # CONFIG_USB_AUDIO is not set 4.166 # CONFIG_USB_EMI26 is not set 4.167 +# CONFIG_USB_BLUETOOTH is not set 4.168 # CONFIG_USB_MIDI is not set 4.169 # CONFIG_USB_STORAGE is not set 4.170 # CONFIG_USB_STORAGE_DEBUG is not set 4.171 @@ -924,3 +968,4 @@ CONFIG_LOG_BUF_SHIFT=0 4.172 # CONFIG_CRC32 is not set 4.173 CONFIG_ZLIB_INFLATE=y 4.174 # CONFIG_ZLIB_DEFLATE is not set 4.175 +# CONFIG_FW_LOADER is not set
5.1 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c Sat Mar 27 13:51:01 2004 +0000 5.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c Sat Mar 27 15:29:57 2004 +0000 5.3 @@ -84,7 +84,7 @@ static int find_unbound_irq(void) 5.4 break; 5.5 5.6 if ( irq == NR_IRQS ) 5.7 - BUG(); 5.8 + panic("No available IRQ to bind to: increase NR_IRQS!\n"); 5.9 5.10 return irq; 5.11 } 5.12 @@ -101,7 +101,7 @@ int bind_virq_to_irq(int virq) 5.13 op.cmd = EVTCHNOP_bind_virq; 5.14 op.u.bind_virq.virq = virq; 5.15 if ( HYPERVISOR_event_channel_op(&op) != 0 ) 5.16 - BUG(); 5.17 + panic("Failed to bind virtual IRQ %d\n", virq); 5.18 evtchn = op.u.bind_virq.port; 5.19 5.20 irq = find_unbound_irq(); 5.21 @@ -132,7 +132,7 @@ void unbind_virq_from_irq(int virq) 5.22 op.u.close.dom = DOMID_SELF; 5.23 op.u.close.port = evtchn; 5.24 if ( HYPERVISOR_event_channel_op(&op) != 0 ) 5.25 - BUG(); 5.26 + panic("Failed to unbind virtual IRQ %d\n", virq); 5.27 5.28 evtchn_to_irq[evtchn] = -1; 5.29 irq_to_evtchn[irq] = -1; 5.30 @@ -241,8 +241,8 @@ static unsigned int startup_pirq(unsigne 5.31 op.cmd = EVTCHNOP_bind_pirq; 5.32 op.u.bind_pirq.pirq = irq; 5.33 if ( HYPERVISOR_event_channel_op(&op) != 0 ) 5.34 - BUG(); 5.35 - evtchn = op.u.bind_virq.port; 5.36 + panic("Failed to obtain physical IRQ %d\n", irq); 5.37 + evtchn = op.u.bind_pirq.port; 5.38 5.39 evtchn_to_irq[evtchn] = irq; 5.40 irq_to_evtchn[irq] = evtchn; 5.41 @@ -264,7 +264,7 @@ static void shutdown_pirq(unsigned int i 5.42 op.u.close.dom = DOMID_SELF; 5.43 op.u.close.port = evtchn; 5.44 if ( HYPERVISOR_event_channel_op(&op) != 0 ) 5.45 - BUG(); 5.46 + panic("Failed to unbind physical IRQ %d\n", irq); 5.47 5.48 evtchn_to_irq[evtchn] = -1; 5.49 irq_to_evtchn[irq] = -1;
6.1 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c Sat Mar 27 13:51:01 2004 +0000 6.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c Sat Mar 27 15:29:57 2004 +0000 6.3 @@ -549,6 +549,14 @@ asmlinkage void do_spurious_interrupt_bu 6.4 */ 6.5 asmlinkage void math_state_restore(struct pt_regs regs) 6.6 { 6.7 + /* 6.8 + * A trap in kernel mode can be ignored. It'll be the fast XOR or 6.9 + * copying libraries, which will correctly save/restore state and 6.10 + * reset the TS bit in CR0. 6.11 + */ 6.12 + if ( (regs.xcs & 2) == 0 ) 6.13 + return; 6.14 + 6.15 if (current->used_math) { 6.16 restore_fpu(current); 6.17 } else {
7.1 --- a/xenolinux-2.4.25-sparse/include/asm-xen/system.h Sat Mar 27 13:51:01 2004 +0000 7.2 +++ b/xenolinux-2.4.25-sparse/include/asm-xen/system.h Sat Mar 27 15:29:57 2004 +0000 7.3 @@ -107,33 +107,12 @@ static inline unsigned long _get_base(ch 7.4 ".previous" \ 7.5 : :"m" (*(unsigned int *)&(value))) 7.6 7.7 +/* NB. 'clts' is done for us by Xen during virtual trap. */ 7.8 #define clts() ((void)0) 7.9 -#define read_cr0() ({ \ 7.10 - unsigned int __dummy; \ 7.11 - __asm__( \ 7.12 - "movl %%cr0,%0\n\t" \ 7.13 - :"=r" (__dummy)); \ 7.14 - __dummy; \ 7.15 -}) 7.16 -#define write_cr0(x) \ 7.17 - __asm__("movl %0,%%cr0": :"r" (x)); 7.18 - 7.19 -#define read_cr4() ({ \ 7.20 - unsigned int __dummy; \ 7.21 - __asm__( \ 7.22 - "movl %%cr4,%0\n\t" \ 7.23 - :"=r" (__dummy)); \ 7.24 - __dummy; \ 7.25 -}) 7.26 -#define write_cr4(x) \ 7.27 - __asm__("movl %0,%%cr4": :"r" (x)); 7.28 #define stts() (HYPERVISOR_fpu_taskswitch()) 7.29 7.30 #endif /* __KERNEL__ */ 7.31 7.32 -#define wbinvd() \ 7.33 - __asm__ __volatile__ ("wbinvd": : :"memory"); 7.34 - 7.35 static inline unsigned long get_limit(unsigned long segment) 7.36 { 7.37 unsigned long __limit;
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/xenolinux-2.4.25-sparse/include/asm-xen/xor.h Sat Mar 27 15:29:57 2004 +0000 8.3 @@ -0,0 +1,879 @@ 8.4 +/* 8.5 + * include/asm-i386/xor.h 8.6 + * 8.7 + * Optimized RAID-5 checksumming functions for MMX and SSE. 8.8 + * 8.9 + * This program is free software; you can redistribute it and/or modify 8.10 + * it under the terms of the GNU General Public License as published by 8.11 + * the Free Software Foundation; either version 2, or (at your option) 8.12 + * any later version. 8.13 + * 8.14 + * You should have received a copy of the GNU General Public License 8.15 + * (for example /usr/src/linux/COPYING); if not, write to the Free 8.16 + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 8.17 + */ 8.18 + 8.19 +/* 8.20 + * High-speed RAID5 checksumming functions utilizing MMX instructions. 8.21 + * Copyright (C) 1998 Ingo Molnar. 8.22 + */ 8.23 + 8.24 +#define FPU_SAVE \ 8.25 + do { \ 8.26 + if (!(current->flags & PF_USEDFPU)) \ 8.27 + clts(); \ 8.28 + __asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0])); \ 8.29 + } while (0) 8.30 + 8.31 +#define FPU_RESTORE \ 8.32 + do { \ 8.33 + __asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0])); \ 8.34 + if (!(current->flags & PF_USEDFPU)) \ 8.35 + stts(); \ 8.36 + } while (0) 8.37 + 8.38 +#define LD(x,y) " movq 8*("#x")(%1), %%mm"#y" ;\n" 8.39 +#define ST(x,y) " movq %%mm"#y", 8*("#x")(%1) ;\n" 8.40 +#define XO1(x,y) " pxor 8*("#x")(%2), %%mm"#y" ;\n" 8.41 +#define XO2(x,y) " pxor 8*("#x")(%3), %%mm"#y" ;\n" 8.42 +#define XO3(x,y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" 8.43 +#define XO4(x,y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" 8.44 + 8.45 + 8.46 +static void 8.47 +xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 8.48 +{ 8.49 + unsigned long lines = bytes >> 7; 8.50 + char fpu_save[108]; 8.51 + 8.52 + FPU_SAVE; 8.53 + 8.54 + __asm__ __volatile__ ( 8.55 +#undef BLOCK 8.56 +#define BLOCK(i) \ 8.57 + LD(i,0) \ 8.58 + LD(i+1,1) \ 8.59 + LD(i+2,2) \ 8.60 + LD(i+3,3) \ 8.61 + XO1(i,0) \ 8.62 + ST(i,0) \ 8.63 + XO1(i+1,1) \ 8.64 + ST(i+1,1) \ 8.65 + XO1(i+2,2) \ 8.66 + ST(i+2,2) \ 8.67 + XO1(i+3,3) \ 8.68 + ST(i+3,3) 8.69 + 8.70 + " .align 32 ;\n" 8.71 + " 1: ;\n" 8.72 + 8.73 + BLOCK(0) 8.74 + BLOCK(4) 8.75 + BLOCK(8) 8.76 + BLOCK(12) 8.77 + 8.78 + " addl $128, %1 ;\n" 8.79 + " addl $128, %2 ;\n" 8.80 + " decl %0 ;\n" 8.81 + " jnz 1b ;\n" 8.82 + : "+r" (lines), 8.83 + "+r" (p1), "+r" (p2) 8.84 + : 8.85 + : "memory"); 8.86 + 8.87 + FPU_RESTORE; 8.88 +} 8.89 + 8.90 +static void 8.91 +xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 8.92 + unsigned long *p3) 8.93 +{ 8.94 + unsigned long lines = bytes >> 7; 8.95 + char fpu_save[108]; 8.96 + 8.97 + FPU_SAVE; 8.98 + 8.99 + __asm__ __volatile__ ( 8.100 +#undef BLOCK 8.101 +#define BLOCK(i) \ 8.102 + LD(i,0) \ 8.103 + LD(i+1,1) \ 8.104 + LD(i+2,2) \ 8.105 + LD(i+3,3) \ 8.106 + XO1(i,0) \ 8.107 + XO1(i+1,1) \ 8.108 + XO1(i+2,2) \ 8.109 + XO1(i+3,3) \ 8.110 + XO2(i,0) \ 8.111 + ST(i,0) \ 8.112 + XO2(i+1,1) \ 8.113 + ST(i+1,1) \ 8.114 + XO2(i+2,2) \ 8.115 + ST(i+2,2) \ 8.116 + XO2(i+3,3) \ 8.117 + ST(i+3,3) 8.118 + 8.119 + " .align 32 ;\n" 8.120 + " 1: ;\n" 8.121 + 8.122 + BLOCK(0) 8.123 + BLOCK(4) 8.124 + BLOCK(8) 8.125 + BLOCK(12) 8.126 + 8.127 + " addl $128, %1 ;\n" 8.128 + " addl $128, %2 ;\n" 8.129 + " addl $128, %3 ;\n" 8.130 + " decl %0 ;\n" 8.131 + " jnz 1b ;\n" 8.132 + : "+r" (lines), 8.133 + "+r" (p1), "+r" (p2), "+r" (p3) 8.134 + : 8.135 + : "memory"); 8.136 + 8.137 + FPU_RESTORE; 8.138 +} 8.139 + 8.140 +static void 8.141 +xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 8.142 + unsigned long *p3, unsigned long *p4) 8.143 +{ 8.144 + unsigned long lines = bytes >> 7; 8.145 + char fpu_save[108]; 8.146 + 8.147 + FPU_SAVE; 8.148 + 8.149 + __asm__ __volatile__ ( 8.150 +#undef BLOCK 8.151 +#define BLOCK(i) \ 8.152 + LD(i,0) \ 8.153 + LD(i+1,1) \ 8.154 + LD(i+2,2) \ 8.155 + LD(i+3,3) \ 8.156 + XO1(i,0) \ 8.157 + XO1(i+1,1) \ 8.158 + XO1(i+2,2) \ 8.159 + XO1(i+3,3) \ 8.160 + XO2(i,0) \ 8.161 + XO2(i+1,1) \ 8.162 + XO2(i+2,2) \ 8.163 + XO2(i+3,3) \ 8.164 + XO3(i,0) \ 8.165 + ST(i,0) \ 8.166 + XO3(i+1,1) \ 8.167 + ST(i+1,1) \ 8.168 + XO3(i+2,2) \ 8.169 + ST(i+2,2) \ 8.170 + XO3(i+3,3) \ 8.171 + ST(i+3,3) 8.172 + 8.173 + " .align 32 ;\n" 8.174 + " 1: ;\n" 8.175 + 8.176 + BLOCK(0) 8.177 + BLOCK(4) 8.178 + BLOCK(8) 8.179 + BLOCK(12) 8.180 + 8.181 + " addl $128, %1 ;\n" 8.182 + " addl $128, %2 ;\n" 8.183 + " addl $128, %3 ;\n" 8.184 + " addl $128, %4 ;\n" 8.185 + " decl %0 ;\n" 8.186 + " jnz 1b ;\n" 8.187 + : "+r" (lines), 8.188 + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) 8.189 + : 8.190 + : "memory"); 8.191 + 8.192 + FPU_RESTORE; 8.193 +} 8.194 + 8.195 + 8.196 +static void 8.197 +xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 8.198 + unsigned long *p3, unsigned long *p4, unsigned long *p5) 8.199 +{ 8.200 + unsigned long lines = bytes >> 7; 8.201 + char fpu_save[108]; 8.202 + 8.203 + FPU_SAVE; 8.204 + 8.205 + /* need to save/restore p4/p5 manually otherwise gcc's 10 argument 8.206 + limit gets exceeded (+ counts as two arguments) */ 8.207 + __asm__ __volatile__ ( 8.208 + " pushl %4\n" 8.209 + " pushl %5\n" 8.210 +#undef BLOCK 8.211 +#define BLOCK(i) \ 8.212 + LD(i,0) \ 8.213 + LD(i+1,1) \ 8.214 + LD(i+2,2) \ 8.215 + LD(i+3,3) \ 8.216 + XO1(i,0) \ 8.217 + XO1(i+1,1) \ 8.218 + XO1(i+2,2) \ 8.219 + XO1(i+3,3) \ 8.220 + XO2(i,0) \ 8.221 + XO2(i+1,1) \ 8.222 + XO2(i+2,2) \ 8.223 + XO2(i+3,3) \ 8.224 + XO3(i,0) \ 8.225 + XO3(i+1,1) \ 8.226 + XO3(i+2,2) \ 8.227 + XO3(i+3,3) \ 8.228 + XO4(i,0) \ 8.229 + ST(i,0) \ 8.230 + XO4(i+1,1) \ 8.231 + ST(i+1,1) \ 8.232 + XO4(i+2,2) \ 8.233 + ST(i+2,2) \ 8.234 + XO4(i+3,3) \ 8.235 + ST(i+3,3) 8.236 + 8.237 + " .align 32 ;\n" 8.238 + " 1: ;\n" 8.239 + 8.240 + BLOCK(0) 8.241 + BLOCK(4) 8.242 + BLOCK(8) 8.243 + BLOCK(12) 8.244 + 8.245 + " addl $128, %1 ;\n" 8.246 + " addl $128, %2 ;\n" 8.247 + " addl $128, %3 ;\n" 8.248 + " addl $128, %4 ;\n" 8.249 + " addl $128, %5 ;\n" 8.250 + " decl %0 ;\n" 8.251 + " jnz 1b ;\n" 8.252 + " popl %5\n" 8.253 + " popl %4\n" 8.254 + : "+r" (lines), 8.255 + "+r" (p1), "+r" (p2), "+r" (p3) 8.256 + : "r" (p4), "r" (p5) 8.257 + : "memory"); 8.258 + 8.259 + FPU_RESTORE; 8.260 +} 8.261 + 8.262 +#undef LD 8.263 +#undef XO1 8.264 +#undef XO2 8.265 +#undef XO3 8.266 +#undef XO4 8.267 +#undef ST 8.268 +#undef BLOCK 8.269 + 8.270 +static void 8.271 +xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 8.272 +{ 8.273 + unsigned long lines = bytes >> 6; 8.274 + char fpu_save[108]; 8.275 + 8.276 + FPU_SAVE; 8.277 + 8.278 + __asm__ __volatile__ ( 8.279 + " .align 32 ;\n" 8.280 + " 1: ;\n" 8.281 + " movq (%1), %%mm0 ;\n" 8.282 + " movq 8(%1), %%mm1 ;\n" 8.283 + " pxor (%2), %%mm0 ;\n" 8.284 + " movq 16(%1), %%mm2 ;\n" 8.285 + " movq %%mm0, (%1) ;\n" 8.286 + " pxor 8(%2), %%mm1 ;\n" 8.287 + " movq 24(%1), %%mm3 ;\n" 8.288 + " movq %%mm1, 8(%1) ;\n" 8.289 + " pxor 16(%2), %%mm2 ;\n" 8.290 + " movq 32(%1), %%mm4 ;\n" 8.291 + " movq %%mm2, 16(%1) ;\n" 8.292 + " pxor 24(%2), %%mm3 ;\n" 8.293 + " movq 40(%1), %%mm5 ;\n" 8.294 + " movq %%mm3, 24(%1) ;\n" 8.295 + " pxor 32(%2), %%mm4 ;\n" 8.296 + " movq 48(%1), %%mm6 ;\n" 8.297 + " movq %%mm4, 32(%1) ;\n" 8.298 + " pxor 40(%2), %%mm5 ;\n" 8.299 + " movq 56(%1), %%mm7 ;\n" 8.300 + " movq %%mm5, 40(%1) ;\n" 8.301 + " pxor 48(%2), %%mm6 ;\n" 8.302 + " pxor 56(%2), %%mm7 ;\n" 8.303 + " movq %%mm6, 48(%1) ;\n" 8.304 + " movq %%mm7, 56(%1) ;\n" 8.305 + 8.306 + " addl $64, %1 ;\n" 8.307 + " addl $64, %2 ;\n" 8.308 + " decl %0 ;\n" 8.309 + " jnz 1b ;\n" 8.310 + : "+r" (lines), 8.311 + "+r" (p1), "+r" (p2) 8.312 + : 8.313 + : "memory"); 8.314 + 8.315 + FPU_RESTORE; 8.316 +} 8.317 + 8.318 +static void 8.319 +xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 8.320 + unsigned long *p3) 8.321 +{ 8.322 + unsigned long lines = bytes >> 6; 8.323 + char fpu_save[108]; 8.324 + 8.325 + FPU_SAVE; 8.326 + 8.327 + __asm__ __volatile__ ( 8.328 + " .align 32,0x90 ;\n" 8.329 + " 1: ;\n" 8.330 + " movq (%1), %%mm0 ;\n" 8.331 + " movq 8(%1), %%mm1 ;\n" 8.332 + " pxor (%2), %%mm0 ;\n" 8.333 + " movq 16(%1), %%mm2 ;\n" 8.334 + " pxor 8(%2), %%mm1 ;\n" 8.335 + " pxor (%3), %%mm0 ;\n" 8.336 + " pxor 16(%2), %%mm2 ;\n" 8.337 + " movq %%mm0, (%1) ;\n" 8.338 + " pxor 8(%3), %%mm1 ;\n" 8.339 + " pxor 16(%3), %%mm2 ;\n" 8.340 + " movq 24(%1), %%mm3 ;\n" 8.341 + " movq %%mm1, 8(%1) ;\n" 8.342 + " movq 32(%1), %%mm4 ;\n" 8.343 + " movq 40(%1), %%mm5 ;\n" 8.344 + " pxor 24(%2), %%mm3 ;\n" 8.345 + " movq %%mm2, 16(%1) ;\n" 8.346 + " pxor 32(%2), %%mm4 ;\n" 8.347 + " pxor 24(%3), %%mm3 ;\n" 8.348 + " pxor 40(%2), %%mm5 ;\n" 8.349 + " movq %%mm3, 24(%1) ;\n" 8.350 + " pxor 32(%3), %%mm4 ;\n" 8.351 + " pxor 40(%3), %%mm5 ;\n" 8.352 + " movq 48(%1), %%mm6 ;\n" 8.353 + " movq %%mm4, 32(%1) ;\n" 8.354 + " movq 56(%1), %%mm7 ;\n" 8.355 + " pxor 48(%2), %%mm6 ;\n" 8.356 + " movq %%mm5, 40(%1) ;\n" 8.357 + " pxor 56(%2), %%mm7 ;\n" 8.358 + " pxor 48(%3), %%mm6 ;\n" 8.359 + " pxor 56(%3), %%mm7 ;\n" 8.360 + " movq %%mm6, 48(%1) ;\n" 8.361 + " movq %%mm7, 56(%1) ;\n" 8.362 + 8.363 + " addl $64, %1 ;\n" 8.364 + " addl $64, %2 ;\n" 8.365 + " addl $64, %3 ;\n" 8.366 + " decl %0 ;\n" 8.367 + " jnz 1b ;\n" 8.368 + : "+r" (lines), 8.369 + "+r" (p1), "+r" (p2), "+r" (p3) 8.370 + : 8.371 + : "memory" ); 8.372 + 8.373 + FPU_RESTORE; 8.374 +} 8.375 + 8.376 +static void 8.377 +xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 8.378 + unsigned long *p3, unsigned long *p4) 8.379 +{ 8.380 + unsigned long lines = bytes >> 6; 8.381 + char fpu_save[108]; 8.382 + 8.383 + FPU_SAVE; 8.384 + 8.385 + __asm__ __volatile__ ( 8.386 + " .align 32,0x90 ;\n" 8.387 + " 1: ;\n" 8.388 + " movq (%1), %%mm0 ;\n" 8.389 + " movq 8(%1), %%mm1 ;\n" 8.390 + " pxor (%2), %%mm0 ;\n" 8.391 + " movq 16(%1), %%mm2 ;\n" 8.392 + " pxor 8(%2), %%mm1 ;\n" 8.393 + " pxor (%3), %%mm0 ;\n" 8.394 + " pxor 16(%2), %%mm2 ;\n" 8.395 + " pxor 8(%3), %%mm1 ;\n" 8.396 + " pxor (%4), %%mm0 ;\n" 8.397 + " movq 24(%1), %%mm3 ;\n" 8.398 + " pxor 16(%3), %%mm2 ;\n" 8.399 + " pxor 8(%4), %%mm1 ;\n" 8.400 + " movq %%mm0, (%1) ;\n" 8.401 + " movq 32(%1), %%mm4 ;\n" 8.402 + " pxor 24(%2), %%mm3 ;\n" 8.403 + " pxor 16(%4), %%mm2 ;\n" 8.404 + " movq %%mm1, 8(%1) ;\n" 8.405 + " movq 40(%1), %%mm5 ;\n" 8.406 + " pxor 32(%2), %%mm4 ;\n" 8.407 + " pxor 24(%3), %%mm3 ;\n" 8.408 + " movq %%mm2, 16(%1) ;\n" 8.409 + " pxor 40(%2), %%mm5 ;\n" 8.410 + " pxor 32(%3), %%mm4 ;\n" 8.411 + " pxor 24(%4), %%mm3 ;\n" 8.412 + " movq %%mm3, 24(%1) ;\n" 8.413 + " movq 56(%1), %%mm7 ;\n" 8.414 + " movq 48(%1), %%mm6 ;\n" 8.415 + " pxor 40(%3), %%mm5 ;\n" 8.416 + " pxor 32(%4), %%mm4 ;\n" 8.417 + " pxor 48(%2), %%mm6 ;\n" 8.418 + " movq %%mm4, 32(%1) ;\n" 8.419 + " pxor 56(%2), %%mm7 ;\n" 8.420 + " pxor 40(%4), %%mm5 ;\n" 8.421 + " pxor 48(%3), %%mm6 ;\n" 8.422 + " pxor 56(%3), %%mm7 ;\n" 8.423 + " movq %%mm5, 40(%1) ;\n" 8.424 + " pxor 48(%4), %%mm6 ;\n" 8.425 + " pxor 56(%4), %%mm7 ;\n" 8.426 + " movq %%mm6, 48(%1) ;\n" 8.427 + " movq %%mm7, 56(%1) ;\n" 8.428 + 8.429 + " addl $64, %1 ;\n" 8.430 + " addl $64, %2 ;\n" 8.431 + " addl $64, %3 ;\n" 8.432 + " addl $64, %4 ;\n" 8.433 + " decl %0 ;\n" 8.434 + " jnz 1b ;\n" 8.435 + : "+r" (lines), 8.436 + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) 8.437 + : 8.438 + : "memory"); 8.439 + 8.440 + FPU_RESTORE; 8.441 +} 8.442 + 8.443 +static void 8.444 +xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 8.445 + unsigned long *p3, unsigned long *p4, unsigned long *p5) 8.446 +{ 8.447 + unsigned long lines = bytes >> 6; 8.448 + char fpu_save[108]; 8.449 + 8.450 + FPU_SAVE; 8.451 + 8.452 + /* need to save p4/p5 manually to not exceed gcc's 10 argument limit */ 8.453 + __asm__ __volatile__ ( 8.454 + " pushl %4\n" 8.455 + " pushl %5\n" 8.456 + " .align 32,0x90 ;\n" 8.457 + " 1: ;\n" 8.458 + " movq (%1), %%mm0 ;\n" 8.459 + " movq 8(%1), %%mm1 ;\n" 8.460 + " pxor (%2), %%mm0 ;\n" 8.461 + " pxor 8(%2), %%mm1 ;\n" 8.462 + " movq 16(%1), %%mm2 ;\n" 8.463 + " pxor (%3), %%mm0 ;\n" 8.464 + " pxor 8(%3), %%mm1 ;\n" 8.465 + " pxor 16(%2), %%mm2 ;\n" 8.466 + " pxor (%4), %%mm0 ;\n" 8.467 + " pxor 8(%4), %%mm1 ;\n" 8.468 + " pxor 16(%3), %%mm2 ;\n" 8.469 + " movq 24(%1), %%mm3 ;\n" 8.470 + " pxor (%5), %%mm0 ;\n" 8.471 + " pxor 8(%5), %%mm1 ;\n" 8.472 + " movq %%mm0, (%1) ;\n" 8.473 + " pxor 16(%4), %%mm2 ;\n" 8.474 + " pxor 24(%2), %%mm3 ;\n" 8.475 + " movq %%mm1, 8(%1) ;\n" 8.476 + " pxor 16(%5), %%mm2 ;\n" 8.477 + " pxor 24(%3), %%mm3 ;\n" 8.478 + " movq 32(%1), %%mm4 ;\n" 8.479 + " movq %%mm2, 16(%1) ;\n" 8.480 + " pxor 24(%4), %%mm3 ;\n" 8.481 + " pxor 32(%2), %%mm4 ;\n" 8.482 + " movq 40(%1), %%mm5 ;\n" 8.483 + " pxor 24(%5), %%mm3 ;\n" 8.484 + " pxor 32(%3), %%mm4 ;\n" 8.485 + " pxor 40(%2), %%mm5 ;\n" 8.486 + " movq %%mm3, 24(%1) ;\n" 8.487 + " pxor 32(%4), %%mm4 ;\n" 8.488 + " pxor 40(%3), %%mm5 ;\n" 8.489 + " movq 48(%1), %%mm6 ;\n" 8.490 + " movq 56(%1), %%mm7 ;\n" 8.491 + " pxor 32(%5), %%mm4 ;\n" 8.492 + " pxor 40(%4), %%mm5 ;\n" 8.493 + " pxor 48(%2), %%mm6 ;\n" 8.494 + " pxor 56(%2), %%mm7 ;\n" 8.495 + " movq %%mm4, 32(%1) ;\n" 8.496 + " pxor 48(%3), %%mm6 ;\n" 8.497 + " pxor 56(%3), %%mm7 ;\n" 8.498 + " pxor 40(%5), %%mm5 ;\n" 8.499 + " pxor 48(%4), %%mm6 ;\n" 8.500 + " pxor 56(%4), %%mm7 ;\n" 8.501 + " movq %%mm5, 40(%1) ;\n" 8.502 + " pxor 48(%5), %%mm6 ;\n" 8.503 + " pxor 56(%5), %%mm7 ;\n" 8.504 + " movq %%mm6, 48(%1) ;\n" 8.505 + " movq %%mm7, 56(%1) ;\n" 8.506 + 8.507 + " addl $64, %1 ;\n" 8.508 + " addl $64, %2 ;\n" 8.509 + " addl $64, %3 ;\n" 8.510 + " addl $64, %4 ;\n" 8.511 + " addl $64, %5 ;\n" 8.512 + " decl %0 ;\n" 8.513 + " jnz 1b ;\n" 8.514 + " popl %5\n" 8.515 + " popl %4\n" 8.516 + : "+g" (lines), 8.517 + "+r" (p1), "+r" (p2), "+r" (p3) 8.518 + : "r" (p4), "r" (p5) 8.519 + : "memory"); 8.520 + 8.521 + FPU_RESTORE; 8.522 +} 8.523 + 8.524 +static struct xor_block_template xor_block_pII_mmx = { 8.525 + name: "pII_mmx", 8.526 + do_2: xor_pII_mmx_2, 8.527 + do_3: xor_pII_mmx_3, 8.528 + do_4: xor_pII_mmx_4, 8.529 + do_5: xor_pII_mmx_5, 8.530 +}; 8.531 + 8.532 +static struct xor_block_template xor_block_p5_mmx = { 8.533 + name: "p5_mmx", 8.534 + do_2: xor_p5_mmx_2, 8.535 + do_3: xor_p5_mmx_3, 8.536 + do_4: xor_p5_mmx_4, 8.537 + do_5: xor_p5_mmx_5, 8.538 +}; 8.539 + 8.540 +#undef FPU_SAVE 8.541 +#undef FPU_RESTORE 8.542 + 8.543 +/* 8.544 + * Cache avoiding checksumming functions utilizing KNI instructions 8.545 + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) 8.546 + */ 8.547 + 8.548 +#define XMMS_SAVE \ 8.549 + if (!(current->flags & PF_USEDFPU)) \ 8.550 + clts(); \ 8.551 + __asm__ __volatile__ ( \ 8.552 + "movups %%xmm0,(%1) ;\n\t" \ 8.553 + "movups %%xmm1,0x10(%1) ;\n\t" \ 8.554 + "movups %%xmm2,0x20(%1) ;\n\t" \ 8.555 + "movups %%xmm3,0x30(%1) ;\n\t" \ 8.556 + : "=&r" (cr0) \ 8.557 + : "r" (xmm_save) \ 8.558 + : "memory") 8.559 + 8.560 +#define XMMS_RESTORE \ 8.561 + __asm__ __volatile__ ( \ 8.562 + "sfence ;\n\t" \ 8.563 + "movups (%1),%%xmm0 ;\n\t" \ 8.564 + "movups 0x10(%1),%%xmm1 ;\n\t" \ 8.565 + "movups 0x20(%1),%%xmm2 ;\n\t" \ 8.566 + "movups 0x30(%1),%%xmm3 ;\n\t" \ 8.567 + : \ 8.568 + : "r" (cr0), "r" (xmm_save) \ 8.569 + : "memory"); \ 8.570 + if (!(current->flags & PF_USEDFPU)) \ 8.571 + stts() 8.572 + 8.573 +#define ALIGN16 __attribute__((aligned(16))) 8.574 + 8.575 +#define OFFS(x) "16*("#x")" 8.576 +#define PF_OFFS(x) "256+16*("#x")" 8.577 +#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" 8.578 +#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" 8.579 +#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" 8.580 +#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n" 8.581 +#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n" 8.582 +#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n" 8.583 +#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n" 8.584 +#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n" 8.585 +#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" 8.586 +#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" 8.587 +#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" 8.588 +#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" 8.589 +#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" 8.590 + 8.591 + 8.592 +static void 8.593 +xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 8.594 +{ 8.595 + unsigned long lines = bytes >> 8; 8.596 + char xmm_save[16*4] ALIGN16; 8.597 + int cr0; 8.598 + 8.599 + XMMS_SAVE; 8.600 + 8.601 + __asm__ __volatile__ ( 8.602 +#undef BLOCK 8.603 +#define BLOCK(i) \ 8.604 + LD(i,0) \ 8.605 + LD(i+1,1) \ 8.606 + PF1(i) \ 8.607 + PF1(i+2) \ 8.608 + LD(i+2,2) \ 8.609 + LD(i+3,3) \ 8.610 + PF0(i+4) \ 8.611 + PF0(i+6) \ 8.612 + XO1(i,0) \ 8.613 + XO1(i+1,1) \ 8.614 + XO1(i+2,2) \ 8.615 + XO1(i+3,3) \ 8.616 + ST(i,0) \ 8.617 + ST(i+1,1) \ 8.618 + ST(i+2,2) \ 8.619 + ST(i+3,3) \ 8.620 + 8.621 + 8.622 + PF0(0) 8.623 + PF0(2) 8.624 + 8.625 + " .align 32 ;\n" 8.626 + " 1: ;\n" 8.627 + 8.628 + BLOCK(0) 8.629 + BLOCK(4) 8.630 + BLOCK(8) 8.631 + BLOCK(12) 8.632 + 8.633 + " addl $256, %1 ;\n" 8.634 + " addl $256, %2 ;\n" 8.635 + " decl %0 ;\n" 8.636 + " jnz 1b ;\n" 8.637 + : "+r" (lines), 8.638 + "+r" (p1), "+r" (p2) 8.639 + : 8.640 + : "memory"); 8.641 + 8.642 + XMMS_RESTORE; 8.643 +} 8.644 + 8.645 +static void 8.646 +xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, 8.647 + unsigned long *p3) 8.648 +{ 8.649 + unsigned long lines = bytes >> 8; 8.650 + char xmm_save[16*4] ALIGN16; 8.651 + int cr0; 8.652 + 8.653 + XMMS_SAVE; 8.654 + 8.655 + __asm__ __volatile__ ( 8.656 +#undef BLOCK 8.657 +#define BLOCK(i) \ 8.658 + PF1(i) \ 8.659 + PF1(i+2) \ 8.660 + LD(i,0) \ 8.661 + LD(i+1,1) \ 8.662 + LD(i+2,2) \ 8.663 + LD(i+3,3) \ 8.664 + PF2(i) \ 8.665 + PF2(i+2) \ 8.666 + PF0(i+4) \ 8.667 + PF0(i+6) \ 8.668 + XO1(i,0) \ 8.669 + XO1(i+1,1) \ 8.670 + XO1(i+2,2) \ 8.671 + XO1(i+3,3) \ 8.672 + XO2(i,0) \ 8.673 + XO2(i+1,1) \ 8.674 + XO2(i+2,2) \ 8.675 + XO2(i+3,3) \ 8.676 + ST(i,0) \ 8.677 + ST(i+1,1) \ 8.678 + ST(i+2,2) \ 8.679 + ST(i+3,3) \ 8.680 + 8.681 + 8.682 + PF0(0) 8.683 + PF0(2) 8.684 + 8.685 + " .align 32 ;\n" 8.686 + " 1: ;\n" 8.687 + 8.688 + BLOCK(0) 8.689 + BLOCK(4) 8.690 + BLOCK(8) 8.691 + BLOCK(12) 8.692 + 8.693 + " addl $256, %1 ;\n" 8.694 + " addl $256, %2 ;\n" 8.695 + " addl $256, %3 ;\n" 8.696 + " decl %0 ;\n" 8.697 + " jnz 1b ;\n" 8.698 + : "+r" (lines), 8.699 + "+r" (p1), "+r"(p2), "+r"(p3) 8.700 + : 8.701 + : "memory" ); 8.702 + 8.703 + XMMS_RESTORE; 8.704 +} 8.705 + 8.706 +static void 8.707 +xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, 8.708 + unsigned long *p3, unsigned long *p4) 8.709 +{ 8.710 + unsigned long lines = bytes >> 8; 8.711 + char xmm_save[16*4] ALIGN16; 8.712 + int cr0; 8.713 + 8.714 + XMMS_SAVE; 8.715 + 8.716 + __asm__ __volatile__ ( 8.717 +#undef BLOCK 8.718 +#define BLOCK(i) \ 8.719 + PF1(i) \ 8.720 + PF1(i+2) \ 8.721 + LD(i,0) \ 8.722 + LD(i+1,1) \ 8.723 + LD(i+2,2) \ 8.724 + LD(i+3,3) \ 8.725 + PF2(i) \ 8.726 + PF2(i+2) \ 8.727 + XO1(i,0) \ 8.728 + XO1(i+1,1) \ 8.729 + XO1(i+2,2) \ 8.730 + XO1(i+3,3) \ 8.731 + PF3(i) \ 8.732 + PF3(i+2) \ 8.733 + PF0(i+4) \ 8.734 + PF0(i+6) \ 8.735 + XO2(i,0) \ 8.736 + XO2(i+1,1) \ 8.737 + XO2(i+2,2) \ 8.738 + XO2(i+3,3) \ 8.739 + XO3(i,0) \ 8.740 + XO3(i+1,1) \ 8.741 + XO3(i+2,2) \ 8.742 + XO3(i+3,3) \ 8.743 + ST(i,0) \ 8.744 + ST(i+1,1) \ 8.745 + ST(i+2,2) \ 8.746 + ST(i+3,3) \ 8.747 + 8.748 + 8.749 + PF0(0) 8.750 + PF0(2) 8.751 + 8.752 + " .align 32 ;\n" 8.753 + " 1: ;\n" 8.754 + 8.755 + BLOCK(0) 8.756 + BLOCK(4) 8.757 + BLOCK(8) 8.758 + BLOCK(12) 8.759 + 8.760 + " addl $256, %1 ;\n" 8.761 + " addl $256, %2 ;\n" 8.762 + " addl $256, %3 ;\n" 8.763 + " addl $256, %4 ;\n" 8.764 + " decl %0 ;\n" 8.765 + " jnz 1b ;\n" 8.766 + : "+r" (lines), 8.767 + "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) 8.768 + : 8.769 + : "memory" ); 8.770 + 8.771 + XMMS_RESTORE; 8.772 +} 8.773 + 8.774 +static void 8.775 +xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, 8.776 + unsigned long *p3, unsigned long *p4, unsigned long *p5) 8.777 +{ 8.778 + unsigned long lines = bytes >> 8; 8.779 + char xmm_save[16*4] ALIGN16; 8.780 + int cr0; 8.781 + 8.782 + XMMS_SAVE; 8.783 + 8.784 + /* need to save p4/p5 manually to not exceed gcc's 10 argument limit */ 8.785 + __asm__ __volatile__ ( 8.786 + " pushl %4\n" 8.787 + " pushl %5\n" 8.788 +#undef BLOCK 8.789 +#define BLOCK(i) \ 8.790 + PF1(i) \ 8.791 + PF1(i+2) \ 8.792 + LD(i,0) \ 8.793 + LD(i+1,1) \ 8.794 + LD(i+2,2) \ 8.795 + LD(i+3,3) \ 8.796 + PF2(i) \ 8.797 + PF2(i+2) \ 8.798 + XO1(i,0) \ 8.799 + XO1(i+1,1) \ 8.800 + XO1(i+2,2) \ 8.801 + XO1(i+3,3) \ 8.802 + PF3(i) \ 8.803 + PF3(i+2) \ 8.804 + XO2(i,0) \ 8.805 + XO2(i+1,1) \ 8.806 + XO2(i+2,2) \ 8.807 + XO2(i+3,3) \ 8.808 + PF4(i) \ 8.809 + PF4(i+2) \ 8.810 + PF0(i+4) \ 8.811 + PF0(i+6) \ 8.812 + XO3(i,0) \ 8.813 + XO3(i+1,1) \ 8.814 + XO3(i+2,2) \ 8.815 + XO3(i+3,3) \ 8.816 + XO4(i,0) \ 8.817 + XO4(i+1,1) \ 8.818 + XO4(i+2,2) \ 8.819 + XO4(i+3,3) \ 8.820 + ST(i,0) \ 8.821 + ST(i+1,1) \ 8.822 + ST(i+2,2) \ 8.823 + ST(i+3,3) \ 8.824 + 8.825 + 8.826 + PF0(0) 8.827 + PF0(2) 8.828 + 8.829 + " .align 32 ;\n" 8.830 + " 1: ;\n" 8.831 + 8.832 + BLOCK(0) 8.833 + BLOCK(4) 8.834 + BLOCK(8) 8.835 + BLOCK(12) 8.836 + 8.837 + " addl $256, %1 ;\n" 8.838 + " addl $256, %2 ;\n" 8.839 + " addl $256, %3 ;\n" 8.840 + " addl $256, %4 ;\n" 8.841 + " addl $256, %5 ;\n" 8.842 + " decl %0 ;\n" 8.843 + " jnz 1b ;\n" 8.844 + " popl %5\n" 8.845 + " popl %4\n" 8.846 + : "+r" (lines), 8.847 + "+r" (p1), "+r" (p2), "+r" (p3) 8.848 + : "r" (p4), "r" (p5) 8.849 + : "memory"); 8.850 + 8.851 + XMMS_RESTORE; 8.852 +} 8.853 + 8.854 +static struct xor_block_template xor_block_pIII_sse = { 8.855 + name: "pIII_sse", 8.856 + do_2: xor_sse_2, 8.857 + do_3: xor_sse_3, 8.858 + do_4: xor_sse_4, 8.859 + do_5: xor_sse_5, 8.860 +}; 8.861 + 8.862 +/* Also try the generic routines. */ 8.863 +#include <asm-generic/xor.h> 8.864 + 8.865 +#undef XOR_TRY_TEMPLATES 8.866 +#define XOR_TRY_TEMPLATES \ 8.867 + do { \ 8.868 + xor_speed(&xor_block_8regs); \ 8.869 + xor_speed(&xor_block_32regs); \ 8.870 + if (cpu_has_xmm) \ 8.871 + xor_speed(&xor_block_pIII_sse); \ 8.872 + if (md_cpu_has_mmx()) { \ 8.873 + xor_speed(&xor_block_pII_mmx); \ 8.874 + xor_speed(&xor_block_p5_mmx); \ 8.875 + } \ 8.876 + } while (0) 8.877 + 8.878 +/* We force the use of the SSE xor block because it can write around L2. 8.879 + We may also be able to load into the L1 only depending on how the cpu 8.880 + deals with a load to a line that is being prefetched. */ 8.881 +#define XOR_SELECT_TEMPLATE(FASTEST) \ 8.882 + (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
9.1 --- a/xenolinux-2.4.25-sparse/mkbuildtree Sat Mar 27 13:51:01 2004 +0000 9.2 +++ b/xenolinux-2.4.25-sparse/mkbuildtree Sat Mar 27 15:29:57 2004 +0000 9.3 @@ -196,7 +196,6 @@ ln -sf ../asm-i386/ucontext.h 9.4 ln -sf ../asm-i386/unaligned.h 9.5 ln -sf ../asm-i386/unistd.h 9.6 ln -sf ../asm-i386/user.h 9.7 -ln -sf ../asm-i386/xor.h 9.8 9.9 cd ../../arch/xen/kernel 9.10 ln -sf ../../i386/kernel/i387.c