ia64/xen-unstable

changeset 1251:76ba37dc1b89

bitkeeper revision 1.825.3.12 (40659df50NIJI5Ld3gK593_2UZJDuQ)

xor.h:
new file
Many files:
Bug fixes for hardware virtualisation.
author kaf24@scramble.cl.cam.ac.uk
date Sat Mar 27 15:29:57 2004 +0000 (2004-03-27)
parents b2c2c29b1e99
children 771b47a23e3d
files .rootkeys xen/arch/i386/irq.c xen/common/event_channel.c xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c xenolinux-2.4.25-sparse/include/asm-xen/system.h xenolinux-2.4.25-sparse/include/asm-xen/xor.h xenolinux-2.4.25-sparse/mkbuildtree
line diff
     1.1 --- a/.rootkeys	Sat Mar 27 13:51:01 2004 +0000
     1.2 +++ b/.rootkeys	Sat Mar 27 15:29:57 2004 +0000
     1.3 @@ -689,6 +689,7 @@ 4062f7e2PzFOUGT0PaE7A0VprTU3JQ xenolinux
     1.4  3e5a4e68mTr0zcp9SXDbnd-XLrrfxw xenolinux-2.4.25-sparse/include/asm-xen/system.h
     1.5  3f1056a9L_kqHcFheV00KbKBzv9j5w xenolinux-2.4.25-sparse/include/asm-xen/vga.h
     1.6  3f689063nhrIRsMMZjZxMFk7iEINqQ xenolinux-2.4.25-sparse/include/asm-xen/xen_proc.h
     1.7 +40659defgWA92arexpMGn8X3QMDj3w xenolinux-2.4.25-sparse/include/asm-xen/xor.h
     1.8  3f056927gMHl7mWB89rb73JahbhQIA xenolinux-2.4.25-sparse/include/linux/blk.h
     1.9  3e5a4e68WLX3B8owTvktP3HHOtznPQ xenolinux-2.4.25-sparse/include/linux/major.h
    1.10  401c0590D_kwJDU59X8NyvqSv_Cl2A xenolinux-2.4.25-sparse/include/linux/sched.h
     2.1 --- a/xen/arch/i386/irq.c	Sat Mar 27 13:51:01 2004 +0000
     2.2 +++ b/xen/arch/i386/irq.c	Sat Mar 27 15:29:57 2004 +0000
     2.3 @@ -1005,12 +1005,19 @@ int pirq_guest_bind(struct task_struct *
     2.4      {
     2.5          rc = -EBUSY;
     2.6          if ( desc->action != NULL )
     2.7 +        {
     2.8 +            DPRINTK("Cannot bind IRQ %d to guest. In use by '%s'.\n",
     2.9 +                    irq, desc->action->name);
    2.10              goto out;
    2.11 +        }
    2.12  
    2.13          rc = -ENOMEM;
    2.14          action = kmalloc(sizeof(irq_guest_action_t), GFP_KERNEL);
    2.15          if ( (desc->action = (struct irqaction *)action) == NULL )
    2.16 +        {
    2.17 +            DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq);
    2.18              goto out;
    2.19 +        }
    2.20  
    2.21          action->nr_guests = 0;
    2.22          action->in_flight = 0;
    2.23 @@ -1025,10 +1032,15 @@ int pirq_guest_bind(struct task_struct *
    2.24  
    2.25      rc = -EBUSY;
    2.26      if ( action->nr_guests == IRQ_MAX_GUESTS )
    2.27 +    {
    2.28 +        DPRINTK("Cannot bind IRQ %d to guest. Already at max share.\n", irq);
    2.29          goto out;
    2.30 +    }
    2.31  
    2.32      action->guest[action->nr_guests++] = p;
    2.33  
    2.34 +    rc = 0;
    2.35 +
    2.36   out:
    2.37      spin_unlock_irqrestore(&desc->lock, flags);
    2.38      return rc;
     3.1 --- a/xen/common/event_channel.c	Sat Mar 27 13:51:01 2004 +0000
     3.2 +++ b/xen/common/event_channel.c	Sat Mar 27 15:29:57 2004 +0000
     3.3 @@ -197,6 +197,7 @@ static long evtchn_bind_pirq(evtchn_bind
     3.4      if ( (rc = pirq_guest_bind(p, pirq)) != 0 )
     3.5      {
     3.6          p->pirq_to_evtchn[pirq] = 0;
     3.7 +        DPRINTK("Couldn't bind to PIRQ %d (error=%d)\n", pirq, rc);
     3.8          goto out;
     3.9      }
    3.10  
     4.1 --- a/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev	Sat Mar 27 13:51:01 2004 +0000
     4.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev	Sat Mar 27 15:29:57 2004 +0000
     4.3 @@ -17,7 +17,7 @@ CONFIG_NO_IDLE_HZ=y
     4.4  #
     4.5  # Code maturity level options
     4.6  #
     4.7 -# CONFIG_EXPERIMENTAL is not set
     4.8 +CONFIG_EXPERIMENTAL=y
     4.9  
    4.10  #
    4.11  # Loadable module support
    4.12 @@ -88,6 +88,8 @@ CONFIG_BINFMT_ELF=y
    4.13  #
    4.14  CONFIG_PARPORT=y
    4.15  CONFIG_PARPORT_PC=y
    4.16 +# CONFIG_PARPORT_PC_FIFO is not set
    4.17 +# CONFIG_PARPORT_PC_SUPERIO is not set
    4.18  # CONFIG_PARPORT_PC_PCMCIA is not set
    4.19  # CONFIG_PARPORT_AMIGA is not set
    4.20  # CONFIG_PARPORT_MFC3 is not set
    4.21 @@ -153,6 +155,7 @@ CONFIG_IP_PNP_DHCP=y
    4.22  # CONFIG_IP_PNP_RARP is not set
    4.23  # CONFIG_NET_IPIP is not set
    4.24  # CONFIG_NET_IPGRE is not set
    4.25 +# CONFIG_ARPD is not set
    4.26  # CONFIG_INET_ECN is not set
    4.27  # CONFIG_SYN_COOKIES is not set
    4.28  
    4.29 @@ -164,6 +167,7 @@ CONFIG_IP_NF_FTP=y
    4.30  # CONFIG_IP_NF_AMANDA is not set
    4.31  CONFIG_IP_NF_TFTP=y
    4.32  CONFIG_IP_NF_IRC=y
    4.33 +# CONFIG_IP_NF_QUEUE is not set
    4.34  CONFIG_IP_NF_IPTABLES=y
    4.35  # CONFIG_IP_NF_MATCH_LIMIT is not set
    4.36  # CONFIG_IP_NF_MATCH_MAC is not set
    4.37 @@ -181,13 +185,17 @@ CONFIG_IP_NF_IPTABLES=y
    4.38  # CONFIG_IP_NF_MATCH_HELPER is not set
    4.39  CONFIG_IP_NF_MATCH_STATE=y
    4.40  CONFIG_IP_NF_MATCH_CONNTRACK=y
    4.41 +# CONFIG_IP_NF_MATCH_UNCLEAN is not set
    4.42 +# CONFIG_IP_NF_MATCH_OWNER is not set
    4.43  CONFIG_IP_NF_FILTER=y
    4.44  CONFIG_IP_NF_TARGET_REJECT=y
    4.45 +# CONFIG_IP_NF_TARGET_MIRROR is not set
    4.46  CONFIG_IP_NF_NAT=y
    4.47  CONFIG_IP_NF_NAT_NEEDED=y
    4.48  CONFIG_IP_NF_TARGET_MASQUERADE=y
    4.49  CONFIG_IP_NF_TARGET_REDIRECT=y
    4.50  # CONFIG_IP_NF_NAT_LOCAL is not set
    4.51 +# CONFIG_IP_NF_NAT_SNMP_BASIC is not set
    4.52  CONFIG_IP_NF_NAT_IRC=y
    4.53  CONFIG_IP_NF_NAT_FTP=y
    4.54  CONFIG_IP_NF_NAT_TFTP=y
    4.55 @@ -201,6 +209,15 @@ CONFIG_IP_NF_TARGET_ULOG=y
    4.56  #   IP: Virtual Server Configuration
    4.57  #
    4.58  # CONFIG_IP_VS is not set
    4.59 +# CONFIG_IPV6 is not set
    4.60 +# CONFIG_KHTTPD is not set
    4.61 +
    4.62 +#
    4.63 +#    SCTP Configuration (EXPERIMENTAL)
    4.64 +#
    4.65 +CONFIG_IPV6_SCTP__=y
    4.66 +# CONFIG_IP_SCTP is not set
    4.67 +# CONFIG_ATM is not set
    4.68  # CONFIG_VLAN_8021Q is not set
    4.69  
    4.70  #
    4.71 @@ -215,6 +232,14 @@ CONFIG_IP_NF_TARGET_ULOG=y
    4.72  # CONFIG_DEV_APPLETALK is not set
    4.73  # CONFIG_DECNET is not set
    4.74  # CONFIG_BRIDGE is not set
    4.75 +# CONFIG_X25 is not set
    4.76 +# CONFIG_LAPB is not set
    4.77 +# CONFIG_LLC is not set
    4.78 +# CONFIG_NET_DIVERT is not set
    4.79 +# CONFIG_ECONET is not set
    4.80 +# CONFIG_WAN_ROUTER is not set
    4.81 +# CONFIG_NET_FASTROUTE is not set
    4.82 +# CONFIG_NET_HW_FLOWCONTROL is not set
    4.83  
    4.84  #
    4.85  # QoS and/or fair queueing
    4.86 @@ -348,6 +373,7 @@ CONFIG_CHR_DEV_SG=y
    4.87  CONFIG_SCSI_AHA152X=y
    4.88  CONFIG_SCSI_AHA1542=y
    4.89  CONFIG_SCSI_AHA1740=y
    4.90 +CONFIG_SCSI_AACRAID=y
    4.91  # CONFIG_SCSI_AIC7XXX is not set
    4.92  CONFIG_SCSI_AIC79XX=y
    4.93  CONFIG_AIC79XX_CMDS_PER_DEVICE=32
    4.94 @@ -406,6 +432,7 @@ CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
    4.95  # CONFIG_SCSI_U14_34F is not set
    4.96  # CONFIG_SCSI_ULTRASTOR is not set
    4.97  # CONFIG_SCSI_NSP32 is not set
    4.98 +# CONFIG_SCSI_DEBUG is not set
    4.99  
   4.100  #
   4.101  # Fusion MPT device support
   4.102 @@ -417,6 +444,11 @@ CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
   4.103  # CONFIG_FUSION_LAN is not set
   4.104  
   4.105  #
   4.106 +# IEEE 1394 (FireWire) support (EXPERIMENTAL)
   4.107 +#
   4.108 +# CONFIG_IEEE1394 is not set
   4.109 +
   4.110 +#
   4.111  # I2O device support
   4.112  #
   4.113  # CONFIG_I2O is not set
   4.114 @@ -439,6 +471,7 @@ CONFIG_NETDEVICES=y
   4.115  # CONFIG_BONDING is not set
   4.116  # CONFIG_EQUALIZER is not set
   4.117  # CONFIG_TUN is not set
   4.118 +# CONFIG_ETHERTAP is not set
   4.119  
   4.120  #
   4.121  # Ethernet (10 or 100Mbit)
   4.122 @@ -484,8 +517,9 @@ CONFIG_E1000=y
   4.123  # CONFIG_YELLOWFIN is not set
   4.124  # CONFIG_R8169 is not set
   4.125  # CONFIG_SK98LIN is not set
   4.126 -# CONFIG_TIGON3 is not set
   4.127 +CONFIG_TIGON3=y
   4.128  # CONFIG_FDDI is not set
   4.129 +# CONFIG_HIPPI is not set
   4.130  # CONFIG_PLIP is not set
   4.131  # CONFIG_PPP is not set
   4.132  # CONFIG_SLIP is not set
   4.133 @@ -500,6 +534,8 @@ CONFIG_E1000=y
   4.134  #
   4.135  # CONFIG_TR is not set
   4.136  # CONFIG_NET_FC is not set
   4.137 +# CONFIG_RCPCI is not set
   4.138 +# CONFIG_SHAPER is not set
   4.139  
   4.140  #
   4.141  # Wan interfaces
   4.142 @@ -599,6 +635,7 @@ CONFIG_PSMOUSE=y
   4.143  # CONFIG_DTLK is not set
   4.144  # CONFIG_R3964 is not set
   4.145  # CONFIG_APPLICOM is not set
   4.146 +# CONFIG_SONYPI is not set
   4.147  
   4.148  #
   4.149  # Ftape, the floppy tape device driver
   4.150 @@ -778,6 +815,12 @@ CONFIG_XEN_CONSOLE=y
   4.151  CONFIG_VGA_CONSOLE=y
   4.152  CONFIG_DUMMY_CONSOLE=y
   4.153  # CONFIG_VIDEO_SELECT is not set
   4.154 +# CONFIG_MDA_CONSOLE is not set
   4.155 +
   4.156 +#
   4.157 +# Frame-buffer support
   4.158 +#
   4.159 +# CONFIG_FB is not set
   4.160  
   4.161  #
   4.162  # Sound
   4.163 @@ -811,6 +854,7 @@ CONFIG_USB_OHCI=y
   4.164  #
   4.165  # CONFIG_USB_AUDIO is not set
   4.166  # CONFIG_USB_EMI26 is not set
   4.167 +# CONFIG_USB_BLUETOOTH is not set
   4.168  # CONFIG_USB_MIDI is not set
   4.169  # CONFIG_USB_STORAGE is not set
   4.170  # CONFIG_USB_STORAGE_DEBUG is not set
   4.171 @@ -924,3 +968,4 @@ CONFIG_LOG_BUF_SHIFT=0
   4.172  # CONFIG_CRC32 is not set
   4.173  CONFIG_ZLIB_INFLATE=y
   4.174  # CONFIG_ZLIB_DEFLATE is not set
   4.175 +# CONFIG_FW_LOADER is not set
     5.1 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c	Sat Mar 27 13:51:01 2004 +0000
     5.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c	Sat Mar 27 15:29:57 2004 +0000
     5.3 @@ -84,7 +84,7 @@ static int find_unbound_irq(void)
     5.4              break;
     5.5  
     5.6      if ( irq == NR_IRQS )
     5.7 -        BUG();
     5.8 +        panic("No available IRQ to bind to: increase NR_IRQS!\n");
     5.9  
    5.10      return irq;
    5.11  }
    5.12 @@ -101,7 +101,7 @@ int bind_virq_to_irq(int virq)
    5.13          op.cmd              = EVTCHNOP_bind_virq;
    5.14          op.u.bind_virq.virq = virq;
    5.15          if ( HYPERVISOR_event_channel_op(&op) != 0 )
    5.16 -            BUG();
    5.17 +            panic("Failed to bind virtual IRQ %d\n", virq);
    5.18          evtchn = op.u.bind_virq.port;
    5.19  
    5.20          irq = find_unbound_irq();
    5.21 @@ -132,7 +132,7 @@ void unbind_virq_from_irq(int virq)
    5.22          op.u.close.dom  = DOMID_SELF;
    5.23          op.u.close.port = evtchn;
    5.24          if ( HYPERVISOR_event_channel_op(&op) != 0 )
    5.25 -            BUG();
    5.26 +            panic("Failed to unbind virtual IRQ %d\n", virq);
    5.27  
    5.28          evtchn_to_irq[evtchn] = -1;
    5.29          irq_to_evtchn[irq]    = -1;
    5.30 @@ -241,8 +241,8 @@ static unsigned int startup_pirq(unsigne
    5.31      op.cmd              = EVTCHNOP_bind_pirq;
    5.32      op.u.bind_pirq.pirq = irq;
    5.33      if ( HYPERVISOR_event_channel_op(&op) != 0 )
    5.34 -        BUG();
    5.35 -    evtchn = op.u.bind_virq.port;
    5.36 +        panic("Failed to obtain physical IRQ %d\n", irq);
    5.37 +    evtchn = op.u.bind_pirq.port;
    5.38  
    5.39      evtchn_to_irq[evtchn] = irq;
    5.40      irq_to_evtchn[irq]    = evtchn;
    5.41 @@ -264,7 +264,7 @@ static void shutdown_pirq(unsigned int i
    5.42      op.u.close.dom  = DOMID_SELF;
    5.43      op.u.close.port = evtchn;
    5.44      if ( HYPERVISOR_event_channel_op(&op) != 0 )
    5.45 -        BUG();
    5.46 +        panic("Failed to unbind physical IRQ %d\n", irq);
    5.47  
    5.48      evtchn_to_irq[evtchn] = -1;
    5.49      irq_to_evtchn[irq]    = -1;
     6.1 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c	Sat Mar 27 13:51:01 2004 +0000
     6.2 +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/traps.c	Sat Mar 27 15:29:57 2004 +0000
     6.3 @@ -549,6 +549,14 @@ asmlinkage void do_spurious_interrupt_bu
     6.4   */
     6.5  asmlinkage void math_state_restore(struct pt_regs regs)
     6.6  {
     6.7 +	/*
     6.8 +	 * A trap in kernel mode can be ignored. It'll be the fast XOR or
     6.9 +	 * copying libraries, which will correctly save/restore state and
    6.10 +	 * reset the TS bit in CR0.
    6.11 +	 */
    6.12 +	if ( (regs.xcs & 2) == 0 )
    6.13 +		return;
    6.14 +
    6.15  	if (current->used_math) {
    6.16  		restore_fpu(current);
    6.17  	} else {
     7.1 --- a/xenolinux-2.4.25-sparse/include/asm-xen/system.h	Sat Mar 27 13:51:01 2004 +0000
     7.2 +++ b/xenolinux-2.4.25-sparse/include/asm-xen/system.h	Sat Mar 27 15:29:57 2004 +0000
     7.3 @@ -107,33 +107,12 @@ static inline unsigned long _get_base(ch
     7.4  		".previous"			\
     7.5  		: :"m" (*(unsigned int *)&(value)))
     7.6  
     7.7 +/* NB. 'clts' is done for us by Xen during virtual trap. */
     7.8  #define clts() ((void)0)
     7.9 -#define read_cr0() ({ \
    7.10 -	unsigned int __dummy; \
    7.11 -	__asm__( \
    7.12 -		"movl %%cr0,%0\n\t" \
    7.13 -		:"=r" (__dummy)); \
    7.14 -	__dummy; \
    7.15 -})
    7.16 -#define write_cr0(x) \
    7.17 -	__asm__("movl %0,%%cr0": :"r" (x));
    7.18 -
    7.19 -#define read_cr4() ({ \
    7.20 -	unsigned int __dummy; \
    7.21 -	__asm__( \
    7.22 -		"movl %%cr4,%0\n\t" \
    7.23 -		:"=r" (__dummy)); \
    7.24 -	__dummy; \
    7.25 -})
    7.26 -#define write_cr4(x) \
    7.27 -	__asm__("movl %0,%%cr4": :"r" (x));
    7.28  #define stts() (HYPERVISOR_fpu_taskswitch())
    7.29  
    7.30  #endif	/* __KERNEL__ */
    7.31  
    7.32 -#define wbinvd() \
    7.33 -	__asm__ __volatile__ ("wbinvd": : :"memory");
    7.34 -
    7.35  static inline unsigned long get_limit(unsigned long segment)
    7.36  {
    7.37  	unsigned long __limit;
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/xenolinux-2.4.25-sparse/include/asm-xen/xor.h	Sat Mar 27 15:29:57 2004 +0000
     8.3 @@ -0,0 +1,879 @@
     8.4 +/*
     8.5 + * include/asm-i386/xor.h
     8.6 + *
     8.7 + * Optimized RAID-5 checksumming functions for MMX and SSE.
     8.8 + *
     8.9 + * This program is free software; you can redistribute it and/or modify
    8.10 + * it under the terms of the GNU General Public License as published by
    8.11 + * the Free Software Foundation; either version 2, or (at your option)
    8.12 + * any later version.
    8.13 + *
    8.14 + * You should have received a copy of the GNU General Public License
    8.15 + * (for example /usr/src/linux/COPYING); if not, write to the Free
    8.16 + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
    8.17 + */
    8.18 +
    8.19 +/*
    8.20 + * High-speed RAID5 checksumming functions utilizing MMX instructions.
    8.21 + * Copyright (C) 1998 Ingo Molnar.
    8.22 + */
    8.23 +
    8.24 +#define FPU_SAVE							\
    8.25 +  do {									\
    8.26 +	if (!(current->flags & PF_USEDFPU))				\
    8.27 +		clts();							\
    8.28 +	__asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0]));	\
    8.29 +  } while (0)
    8.30 +
    8.31 +#define FPU_RESTORE							\
    8.32 +  do {									\
    8.33 +	__asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0]));		\
    8.34 +	if (!(current->flags & PF_USEDFPU))				\
    8.35 +		stts();							\
    8.36 +  } while (0)
    8.37 +
    8.38 +#define LD(x,y)		"       movq   8*("#x")(%1), %%mm"#y"   ;\n"
    8.39 +#define ST(x,y)		"       movq %%mm"#y",   8*("#x")(%1)   ;\n"
    8.40 +#define XO1(x,y)	"       pxor   8*("#x")(%2), %%mm"#y"   ;\n"
    8.41 +#define XO2(x,y)	"       pxor   8*("#x")(%3), %%mm"#y"   ;\n"
    8.42 +#define XO3(x,y)	"       pxor   8*("#x")(%4), %%mm"#y"   ;\n"
    8.43 +#define XO4(x,y)	"       pxor   8*("#x")(%5), %%mm"#y"   ;\n"
    8.44 +
    8.45 +
    8.46 +static void
    8.47 +xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
    8.48 +{
    8.49 +	unsigned long lines = bytes >> 7;
    8.50 +	char fpu_save[108];
    8.51 +
    8.52 +	FPU_SAVE;
    8.53 +
    8.54 +	__asm__ __volatile__ (
    8.55 +#undef BLOCK
    8.56 +#define BLOCK(i) \
    8.57 +	LD(i,0)					\
    8.58 +		LD(i+1,1)			\
    8.59 +			LD(i+2,2)		\
    8.60 +				LD(i+3,3)	\
    8.61 +	XO1(i,0)				\
    8.62 +	ST(i,0)					\
    8.63 +		XO1(i+1,1)			\
    8.64 +		ST(i+1,1)			\
    8.65 +			XO1(i+2,2)		\
    8.66 +			ST(i+2,2)		\
    8.67 +				XO1(i+3,3)	\
    8.68 +				ST(i+3,3)
    8.69 +
    8.70 +	" .align 32			;\n"
    8.71 +  	" 1:                            ;\n"
    8.72 +
    8.73 +	BLOCK(0)
    8.74 +	BLOCK(4)
    8.75 +	BLOCK(8)
    8.76 +	BLOCK(12)
    8.77 +
    8.78 +	"       addl $128, %1         ;\n"
    8.79 +	"       addl $128, %2         ;\n"
    8.80 +	"       decl %0               ;\n"
    8.81 +	"       jnz 1b                ;\n"
    8.82 +	: "+r" (lines),
    8.83 +	  "+r" (p1), "+r" (p2)
    8.84 +	:
    8.85 +	: "memory");
    8.86 +
    8.87 +	FPU_RESTORE;
    8.88 +}
    8.89 +
    8.90 +static void
    8.91 +xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
    8.92 +	      unsigned long *p3)
    8.93 +{
    8.94 +	unsigned long lines = bytes >> 7;
    8.95 +	char fpu_save[108];
    8.96 +
    8.97 +	FPU_SAVE;
    8.98 +
    8.99 +	__asm__ __volatile__ (
   8.100 +#undef BLOCK
   8.101 +#define BLOCK(i) \
   8.102 +	LD(i,0)					\
   8.103 +		LD(i+1,1)			\
   8.104 +			LD(i+2,2)		\
   8.105 +				LD(i+3,3)	\
   8.106 +	XO1(i,0)				\
   8.107 +		XO1(i+1,1)			\
   8.108 +			XO1(i+2,2)		\
   8.109 +				XO1(i+3,3)	\
   8.110 +	XO2(i,0)				\
   8.111 +	ST(i,0)					\
   8.112 +		XO2(i+1,1)			\
   8.113 +		ST(i+1,1)			\
   8.114 +			XO2(i+2,2)		\
   8.115 +			ST(i+2,2)		\
   8.116 +				XO2(i+3,3)	\
   8.117 +				ST(i+3,3)
   8.118 +
   8.119 +	" .align 32			;\n"
   8.120 +	" 1:                            ;\n"
   8.121 +
   8.122 +	BLOCK(0)
   8.123 +	BLOCK(4)
   8.124 +	BLOCK(8)
   8.125 +	BLOCK(12)
   8.126 +
   8.127 +	"       addl $128, %1         ;\n"
   8.128 +	"       addl $128, %2         ;\n"
   8.129 +	"       addl $128, %3         ;\n"
   8.130 +	"       decl %0               ;\n"
   8.131 +	"       jnz 1b                ;\n"
   8.132 +	: "+r" (lines),
   8.133 +	  "+r" (p1), "+r" (p2), "+r" (p3)
   8.134 +	:
   8.135 +	: "memory");
   8.136 +
   8.137 +	FPU_RESTORE;
   8.138 +}
   8.139 +
   8.140 +static void
   8.141 +xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
   8.142 +	      unsigned long *p3, unsigned long *p4)
   8.143 +{
   8.144 +	unsigned long lines = bytes >> 7;
   8.145 +	char fpu_save[108];
   8.146 +
   8.147 +	FPU_SAVE;
   8.148 +
   8.149 +	__asm__ __volatile__ (
   8.150 +#undef BLOCK
   8.151 +#define BLOCK(i) \
   8.152 +	LD(i,0)					\
   8.153 +		LD(i+1,1)			\
   8.154 +			LD(i+2,2)		\
   8.155 +				LD(i+3,3)	\
   8.156 +	XO1(i,0)				\
   8.157 +		XO1(i+1,1)			\
   8.158 +			XO1(i+2,2)		\
   8.159 +				XO1(i+3,3)	\
   8.160 +	XO2(i,0)				\
   8.161 +		XO2(i+1,1)			\
   8.162 +			XO2(i+2,2)		\
   8.163 +				XO2(i+3,3)	\
   8.164 +	XO3(i,0)				\
   8.165 +	ST(i,0)					\
   8.166 +		XO3(i+1,1)			\
   8.167 +		ST(i+1,1)			\
   8.168 +			XO3(i+2,2)		\
   8.169 +			ST(i+2,2)		\
   8.170 +				XO3(i+3,3)	\
   8.171 +				ST(i+3,3)
   8.172 +
   8.173 +	" .align 32			;\n"
   8.174 +	" 1:                            ;\n"
   8.175 +
   8.176 +	BLOCK(0)
   8.177 +	BLOCK(4)
   8.178 +	BLOCK(8)
   8.179 +	BLOCK(12)
   8.180 +
   8.181 +	"       addl $128, %1         ;\n"
   8.182 +	"       addl $128, %2         ;\n"
   8.183 +	"       addl $128, %3         ;\n"
   8.184 +	"       addl $128, %4         ;\n"
   8.185 +	"       decl %0               ;\n"
   8.186 +	"       jnz 1b                ;\n"
   8.187 +	: "+r" (lines),
   8.188 +	  "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
   8.189 +	:
   8.190 +	: "memory");
   8.191 +
   8.192 +	FPU_RESTORE;
   8.193 +}
   8.194 +
   8.195 +
   8.196 +static void
   8.197 +xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
   8.198 +	      unsigned long *p3, unsigned long *p4, unsigned long *p5)
   8.199 +{
   8.200 +	unsigned long lines = bytes >> 7;
   8.201 +	char fpu_save[108];
   8.202 +
   8.203 +	FPU_SAVE;
   8.204 +
   8.205 +	/* need to save/restore p4/p5 manually otherwise gcc's 10 argument
   8.206 +	   limit gets exceeded (+ counts as two arguments) */
   8.207 +	__asm__ __volatile__ (
   8.208 +		"  pushl %4\n"
   8.209 +		"  pushl %5\n"
   8.210 +#undef BLOCK
   8.211 +#define BLOCK(i) \
   8.212 +	LD(i,0)					\
   8.213 +		LD(i+1,1)			\
   8.214 +			LD(i+2,2)		\
   8.215 +				LD(i+3,3)	\
   8.216 +	XO1(i,0)				\
   8.217 +		XO1(i+1,1)			\
   8.218 +			XO1(i+2,2)		\
   8.219 +				XO1(i+3,3)	\
   8.220 +	XO2(i,0)				\
   8.221 +		XO2(i+1,1)			\
   8.222 +			XO2(i+2,2)		\
   8.223 +				XO2(i+3,3)	\
   8.224 +	XO3(i,0)				\
   8.225 +		XO3(i+1,1)			\
   8.226 +			XO3(i+2,2)		\
   8.227 +				XO3(i+3,3)	\
   8.228 +	XO4(i,0)				\
   8.229 +	ST(i,0)					\
   8.230 +		XO4(i+1,1)			\
   8.231 +		ST(i+1,1)			\
   8.232 +			XO4(i+2,2)		\
   8.233 +			ST(i+2,2)		\
   8.234 +				XO4(i+3,3)	\
   8.235 +				ST(i+3,3)
   8.236 +
   8.237 +	" .align 32			;\n"
   8.238 +	" 1:                            ;\n"
   8.239 +
   8.240 +	BLOCK(0)
   8.241 +	BLOCK(4)
   8.242 +	BLOCK(8)
   8.243 +	BLOCK(12)
   8.244 +
   8.245 +	"       addl $128, %1         ;\n"
   8.246 +	"       addl $128, %2         ;\n"
   8.247 +	"       addl $128, %3         ;\n"
   8.248 +	"       addl $128, %4         ;\n"
   8.249 +	"       addl $128, %5         ;\n"
   8.250 +	"       decl %0               ;\n"
   8.251 +	"       jnz 1b                ;\n"
   8.252 +	"	popl %5\n"
   8.253 +	"	popl %4\n"
   8.254 +	: "+r" (lines),
   8.255 +	  "+r" (p1), "+r" (p2), "+r" (p3)
   8.256 +	: "r" (p4), "r" (p5) 
   8.257 +	: "memory");
   8.258 +
   8.259 +	FPU_RESTORE;
   8.260 +}
   8.261 +
   8.262 +#undef LD
   8.263 +#undef XO1
   8.264 +#undef XO2
   8.265 +#undef XO3
   8.266 +#undef XO4
   8.267 +#undef ST
   8.268 +#undef BLOCK
   8.269 +
   8.270 +static void
   8.271 +xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
   8.272 +{
   8.273 +	unsigned long lines = bytes >> 6;
   8.274 +	char fpu_save[108];
   8.275 +
   8.276 +	FPU_SAVE;
   8.277 +
   8.278 +	__asm__ __volatile__ (
   8.279 +	" .align 32	             ;\n"
   8.280 +	" 1:                         ;\n"
   8.281 +	"       movq   (%1), %%mm0   ;\n"
   8.282 +	"       movq  8(%1), %%mm1   ;\n"
   8.283 +	"       pxor   (%2), %%mm0   ;\n"
   8.284 +	"       movq 16(%1), %%mm2   ;\n"
   8.285 +	"       movq %%mm0,   (%1)   ;\n"
   8.286 +	"       pxor  8(%2), %%mm1   ;\n"
   8.287 +	"       movq 24(%1), %%mm3   ;\n"
   8.288 +	"       movq %%mm1,  8(%1)   ;\n"
   8.289 +	"       pxor 16(%2), %%mm2   ;\n"
   8.290 +	"       movq 32(%1), %%mm4   ;\n"
   8.291 +	"       movq %%mm2, 16(%1)   ;\n"
   8.292 +	"       pxor 24(%2), %%mm3   ;\n"
   8.293 +	"       movq 40(%1), %%mm5   ;\n"
   8.294 +	"       movq %%mm3, 24(%1)   ;\n"
   8.295 +	"       pxor 32(%2), %%mm4   ;\n"
   8.296 +	"       movq 48(%1), %%mm6   ;\n"
   8.297 +	"       movq %%mm4, 32(%1)   ;\n"
   8.298 +	"       pxor 40(%2), %%mm5   ;\n"
   8.299 +	"       movq 56(%1), %%mm7   ;\n"
   8.300 +	"       movq %%mm5, 40(%1)   ;\n"
   8.301 +	"       pxor 48(%2), %%mm6   ;\n"
   8.302 +	"       pxor 56(%2), %%mm7   ;\n"
   8.303 +	"       movq %%mm6, 48(%1)   ;\n"
   8.304 +	"       movq %%mm7, 56(%1)   ;\n"
   8.305 +	
   8.306 +	"       addl $64, %1         ;\n"
   8.307 +	"       addl $64, %2         ;\n"
   8.308 +	"       decl %0              ;\n"
   8.309 +	"       jnz 1b               ;\n"
   8.310 +	: "+r" (lines),
   8.311 +	  "+r" (p1), "+r" (p2)
   8.312 +	:
   8.313 +	: "memory");
   8.314 +
   8.315 +	FPU_RESTORE;
   8.316 +}
   8.317 +
   8.318 +static void
   8.319 +xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
   8.320 +	     unsigned long *p3)
   8.321 +{
   8.322 +	unsigned long lines = bytes >> 6;
   8.323 +	char fpu_save[108];
   8.324 +
   8.325 +	FPU_SAVE;
   8.326 +
   8.327 +	__asm__ __volatile__ (
   8.328 +	" .align 32,0x90             ;\n"
   8.329 +	" 1:                         ;\n"
   8.330 +	"       movq   (%1), %%mm0   ;\n"
   8.331 +	"       movq  8(%1), %%mm1   ;\n"
   8.332 +	"       pxor   (%2), %%mm0   ;\n"
   8.333 +	"       movq 16(%1), %%mm2   ;\n"
   8.334 +	"       pxor  8(%2), %%mm1   ;\n"
   8.335 +	"       pxor   (%3), %%mm0   ;\n"
   8.336 +	"       pxor 16(%2), %%mm2   ;\n"
   8.337 +	"       movq %%mm0,   (%1)   ;\n"
   8.338 +	"       pxor  8(%3), %%mm1   ;\n"
   8.339 +	"       pxor 16(%3), %%mm2   ;\n"
   8.340 +	"       movq 24(%1), %%mm3   ;\n"
   8.341 +	"       movq %%mm1,  8(%1)   ;\n"
   8.342 +	"       movq 32(%1), %%mm4   ;\n"
   8.343 +	"       movq 40(%1), %%mm5   ;\n"
   8.344 +	"       pxor 24(%2), %%mm3   ;\n"
   8.345 +	"       movq %%mm2, 16(%1)   ;\n"
   8.346 +	"       pxor 32(%2), %%mm4   ;\n"
   8.347 +	"       pxor 24(%3), %%mm3   ;\n"
   8.348 +	"       pxor 40(%2), %%mm5   ;\n"
   8.349 +	"       movq %%mm3, 24(%1)   ;\n"
   8.350 +	"       pxor 32(%3), %%mm4   ;\n"
   8.351 +	"       pxor 40(%3), %%mm5   ;\n"
   8.352 +	"       movq 48(%1), %%mm6   ;\n"
   8.353 +	"       movq %%mm4, 32(%1)   ;\n"
   8.354 +	"       movq 56(%1), %%mm7   ;\n"
   8.355 +	"       pxor 48(%2), %%mm6   ;\n"
   8.356 +	"       movq %%mm5, 40(%1)   ;\n"
   8.357 +	"       pxor 56(%2), %%mm7   ;\n"
   8.358 +	"       pxor 48(%3), %%mm6   ;\n"
   8.359 +	"       pxor 56(%3), %%mm7   ;\n"
   8.360 +	"       movq %%mm6, 48(%1)   ;\n"
   8.361 +	"       movq %%mm7, 56(%1)   ;\n"
   8.362 +      
   8.363 +	"       addl $64, %1         ;\n"
   8.364 +	"       addl $64, %2         ;\n"
   8.365 +	"       addl $64, %3         ;\n"
   8.366 +	"       decl %0              ;\n"
   8.367 +	"       jnz 1b               ;\n"
   8.368 +	: "+r" (lines),
   8.369 +	  "+r" (p1), "+r" (p2), "+r" (p3)
   8.370 +	:
   8.371 +	: "memory" );
   8.372 +
   8.373 +	FPU_RESTORE;
   8.374 +}
   8.375 +
   8.376 +static void
   8.377 +xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
   8.378 +	     unsigned long *p3, unsigned long *p4)
   8.379 +{
   8.380 +	unsigned long lines = bytes >> 6;
   8.381 +	char fpu_save[108];
   8.382 +
   8.383 +	FPU_SAVE;
   8.384 +
   8.385 +	__asm__ __volatile__ (
   8.386 +	" .align 32,0x90             ;\n"
   8.387 +	" 1:                         ;\n"
   8.388 +	"       movq   (%1), %%mm0   ;\n"
   8.389 +	"       movq  8(%1), %%mm1   ;\n"
   8.390 +	"       pxor   (%2), %%mm0   ;\n"
   8.391 +	"       movq 16(%1), %%mm2   ;\n"
   8.392 +	"       pxor  8(%2), %%mm1   ;\n"
   8.393 +	"       pxor   (%3), %%mm0   ;\n"
   8.394 +	"       pxor 16(%2), %%mm2   ;\n"
   8.395 +	"       pxor  8(%3), %%mm1   ;\n"
   8.396 +	"       pxor   (%4), %%mm0   ;\n"
   8.397 +	"       movq 24(%1), %%mm3   ;\n"
   8.398 +	"       pxor 16(%3), %%mm2   ;\n"
   8.399 +	"       pxor  8(%4), %%mm1   ;\n"
   8.400 +	"       movq %%mm0,   (%1)   ;\n"
   8.401 +	"       movq 32(%1), %%mm4   ;\n"
   8.402 +	"       pxor 24(%2), %%mm3   ;\n"
   8.403 +	"       pxor 16(%4), %%mm2   ;\n"
   8.404 +	"       movq %%mm1,  8(%1)   ;\n"
   8.405 +	"       movq 40(%1), %%mm5   ;\n"
   8.406 +	"       pxor 32(%2), %%mm4   ;\n"
   8.407 +	"       pxor 24(%3), %%mm3   ;\n"
   8.408 +	"       movq %%mm2, 16(%1)   ;\n"
   8.409 +	"       pxor 40(%2), %%mm5   ;\n"
   8.410 +	"       pxor 32(%3), %%mm4   ;\n"
   8.411 +	"       pxor 24(%4), %%mm3   ;\n"
   8.412 +	"       movq %%mm3, 24(%1)   ;\n"
   8.413 +	"       movq 56(%1), %%mm7   ;\n"
   8.414 +	"       movq 48(%1), %%mm6   ;\n"
   8.415 +	"       pxor 40(%3), %%mm5   ;\n"
   8.416 +	"       pxor 32(%4), %%mm4   ;\n"
   8.417 +	"       pxor 48(%2), %%mm6   ;\n"
   8.418 +	"       movq %%mm4, 32(%1)   ;\n"
   8.419 +	"       pxor 56(%2), %%mm7   ;\n"
   8.420 +	"       pxor 40(%4), %%mm5   ;\n"
   8.421 +	"       pxor 48(%3), %%mm6   ;\n"
   8.422 +	"       pxor 56(%3), %%mm7   ;\n"
   8.423 +	"       movq %%mm5, 40(%1)   ;\n"
   8.424 +	"       pxor 48(%4), %%mm6   ;\n"
   8.425 +	"       pxor 56(%4), %%mm7   ;\n"
   8.426 +	"       movq %%mm6, 48(%1)   ;\n"
   8.427 +	"       movq %%mm7, 56(%1)   ;\n"
   8.428 +      
   8.429 +	"       addl $64, %1         ;\n"
   8.430 +	"       addl $64, %2         ;\n"
   8.431 +	"       addl $64, %3         ;\n"
   8.432 +	"       addl $64, %4         ;\n"
   8.433 +	"       decl %0              ;\n"
   8.434 +	"       jnz 1b               ;\n"
   8.435 +	: "+r" (lines),
   8.436 +	  "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
   8.437 +	:
   8.438 +	: "memory");
   8.439 +
   8.440 +	FPU_RESTORE;
   8.441 +}
   8.442 +
   8.443 +static void
   8.444 +xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
   8.445 +	     unsigned long *p3, unsigned long *p4, unsigned long *p5)
   8.446 +{
   8.447 +	unsigned long lines = bytes >> 6;
   8.448 +	char fpu_save[108];
   8.449 +
   8.450 +	FPU_SAVE;
   8.451 +
   8.452 +	/* need to save p4/p5 manually to not exceed gcc's 10 argument limit */
   8.453 +	__asm__ __volatile__ (
   8.454 +	"	pushl %4\n"
   8.455 +	"	pushl %5\n"        	
   8.456 +	" .align 32,0x90             ;\n"
   8.457 +	" 1:                         ;\n"
   8.458 +	"       movq   (%1), %%mm0   ;\n"
   8.459 +	"       movq  8(%1), %%mm1   ;\n"
   8.460 +	"       pxor   (%2), %%mm0   ;\n"
   8.461 +	"       pxor  8(%2), %%mm1   ;\n"
   8.462 +	"       movq 16(%1), %%mm2   ;\n"
   8.463 +	"       pxor   (%3), %%mm0   ;\n"
   8.464 +	"       pxor  8(%3), %%mm1   ;\n"
   8.465 +	"       pxor 16(%2), %%mm2   ;\n"
   8.466 +	"       pxor   (%4), %%mm0   ;\n"
   8.467 +	"       pxor  8(%4), %%mm1   ;\n"
   8.468 +	"       pxor 16(%3), %%mm2   ;\n"
   8.469 +	"       movq 24(%1), %%mm3   ;\n"
   8.470 +	"       pxor   (%5), %%mm0   ;\n"
   8.471 +	"       pxor  8(%5), %%mm1   ;\n"
   8.472 +	"       movq %%mm0,   (%1)   ;\n"
   8.473 +	"       pxor 16(%4), %%mm2   ;\n"
   8.474 +	"       pxor 24(%2), %%mm3   ;\n"
   8.475 +	"       movq %%mm1,  8(%1)   ;\n"
   8.476 +	"       pxor 16(%5), %%mm2   ;\n"
   8.477 +	"       pxor 24(%3), %%mm3   ;\n"
   8.478 +	"       movq 32(%1), %%mm4   ;\n"
   8.479 +	"       movq %%mm2, 16(%1)   ;\n"
   8.480 +	"       pxor 24(%4), %%mm3   ;\n"
   8.481 +	"       pxor 32(%2), %%mm4   ;\n"
   8.482 +	"       movq 40(%1), %%mm5   ;\n"
   8.483 +	"       pxor 24(%5), %%mm3   ;\n"
   8.484 +	"       pxor 32(%3), %%mm4   ;\n"
   8.485 +	"       pxor 40(%2), %%mm5   ;\n"
   8.486 +	"       movq %%mm3, 24(%1)   ;\n"
   8.487 +	"       pxor 32(%4), %%mm4   ;\n"
   8.488 +	"       pxor 40(%3), %%mm5   ;\n"
   8.489 +	"       movq 48(%1), %%mm6   ;\n"
   8.490 +	"       movq 56(%1), %%mm7   ;\n"
   8.491 +	"       pxor 32(%5), %%mm4   ;\n"
   8.492 +	"       pxor 40(%4), %%mm5   ;\n"
   8.493 +	"       pxor 48(%2), %%mm6   ;\n"
   8.494 +	"       pxor 56(%2), %%mm7   ;\n"
   8.495 +	"       movq %%mm4, 32(%1)   ;\n"
   8.496 +	"       pxor 48(%3), %%mm6   ;\n"
   8.497 +	"       pxor 56(%3), %%mm7   ;\n"
   8.498 +	"       pxor 40(%5), %%mm5   ;\n"
   8.499 +	"       pxor 48(%4), %%mm6   ;\n"
   8.500 +	"       pxor 56(%4), %%mm7   ;\n"
   8.501 +	"       movq %%mm5, 40(%1)   ;\n"
   8.502 +	"       pxor 48(%5), %%mm6   ;\n"
   8.503 +	"       pxor 56(%5), %%mm7   ;\n"
   8.504 +	"       movq %%mm6, 48(%1)   ;\n"
   8.505 +	"       movq %%mm7, 56(%1)   ;\n"
   8.506 +      
   8.507 +	"       addl $64, %1         ;\n"
   8.508 +	"       addl $64, %2         ;\n"
   8.509 +	"       addl $64, %3         ;\n"
   8.510 +	"       addl $64, %4         ;\n"
   8.511 +	"       addl $64, %5         ;\n"
   8.512 +	"       decl %0              ;\n"
   8.513 +	"       jnz 1b               ;\n"
   8.514 +	"	popl %5\n"
   8.515 +	"	popl %4\n"
   8.516 +	: "+g" (lines),
   8.517 +	  "+r" (p1), "+r" (p2), "+r" (p3)
   8.518 +	: "r" (p4), "r" (p5)
   8.519 +	: "memory");
   8.520 +
   8.521 +	FPU_RESTORE;
   8.522 +}
   8.523 +
   8.524 +static struct xor_block_template xor_block_pII_mmx = {
   8.525 +	name: "pII_mmx",
   8.526 +	do_2: xor_pII_mmx_2,
   8.527 +	do_3: xor_pII_mmx_3,
   8.528 +	do_4: xor_pII_mmx_4,
   8.529 +	do_5: xor_pII_mmx_5,
   8.530 +};
   8.531 +
   8.532 +static struct xor_block_template xor_block_p5_mmx = {
   8.533 +	name: "p5_mmx",
   8.534 +	do_2: xor_p5_mmx_2,
   8.535 +	do_3: xor_p5_mmx_3,
   8.536 +	do_4: xor_p5_mmx_4,
   8.537 +	do_5: xor_p5_mmx_5,
   8.538 +};
   8.539 +
   8.540 +#undef FPU_SAVE
   8.541 +#undef FPU_RESTORE
   8.542 +
   8.543 +/*
   8.544 + * Cache avoiding checksumming functions utilizing KNI instructions
   8.545 + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
   8.546 + */
   8.547 +
   8.548 +#define XMMS_SAVE				\
   8.549 +	if (!(current->flags & PF_USEDFPU))	\
   8.550 +		clts();				\
   8.551 +	__asm__ __volatile__ ( 			\
   8.552 +		"movups %%xmm0,(%1)	;\n\t"	\
   8.553 +		"movups %%xmm1,0x10(%1)	;\n\t"	\
   8.554 +		"movups %%xmm2,0x20(%1)	;\n\t"	\
   8.555 +		"movups %%xmm3,0x30(%1)	;\n\t"	\
   8.556 +		: "=&r" (cr0)			\
   8.557 +		: "r" (xmm_save) 		\
   8.558 +		: "memory")
   8.559 +
   8.560 +#define XMMS_RESTORE				\
   8.561 +	__asm__ __volatile__ ( 			\
   8.562 +		"sfence			;\n\t"	\
   8.563 +		"movups (%1),%%xmm0	;\n\t"	\
   8.564 +		"movups 0x10(%1),%%xmm1	;\n\t"	\
   8.565 +		"movups 0x20(%1),%%xmm2	;\n\t"	\
   8.566 +		"movups 0x30(%1),%%xmm3	;\n\t"	\
   8.567 +		:				\
   8.568 +		: "r" (cr0), "r" (xmm_save)	\
   8.569 +		: "memory");			\
   8.570 +	if (!(current->flags & PF_USEDFPU))	\
   8.571 +		stts()
   8.572 +
   8.573 +#define ALIGN16 __attribute__((aligned(16)))
   8.574 +
   8.575 +#define OFFS(x)		"16*("#x")"
   8.576 +#define PF_OFFS(x)	"256+16*("#x")"
   8.577 +#define	PF0(x)		"	prefetchnta "PF_OFFS(x)"(%1)		;\n"
   8.578 +#define LD(x,y)		"       movaps   "OFFS(x)"(%1), %%xmm"#y"	;\n"
   8.579 +#define ST(x,y)		"       movaps %%xmm"#y",   "OFFS(x)"(%1)	;\n"
   8.580 +#define PF1(x)		"	prefetchnta "PF_OFFS(x)"(%2)		;\n"
   8.581 +#define PF2(x)		"	prefetchnta "PF_OFFS(x)"(%3)		;\n"
   8.582 +#define PF3(x)		"	prefetchnta "PF_OFFS(x)"(%4)		;\n"
   8.583 +#define PF4(x)		"	prefetchnta "PF_OFFS(x)"(%5)		;\n"
   8.584 +#define PF5(x)		"	prefetchnta "PF_OFFS(x)"(%6)		;\n"
   8.585 +#define XO1(x,y)	"       xorps   "OFFS(x)"(%2), %%xmm"#y"	;\n"
   8.586 +#define XO2(x,y)	"       xorps   "OFFS(x)"(%3), %%xmm"#y"	;\n"
   8.587 +#define XO3(x,y)	"       xorps   "OFFS(x)"(%4), %%xmm"#y"	;\n"
   8.588 +#define XO4(x,y)	"       xorps   "OFFS(x)"(%5), %%xmm"#y"	;\n"
   8.589 +#define XO5(x,y)	"       xorps   "OFFS(x)"(%6), %%xmm"#y"	;\n"
   8.590 +
   8.591 +
   8.592 +static void
   8.593 +xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
   8.594 +{
   8.595 +        unsigned long lines = bytes >> 8;
   8.596 +	char xmm_save[16*4] ALIGN16;
   8.597 +	int cr0;
   8.598 +
   8.599 +	XMMS_SAVE;
   8.600 +
   8.601 +        __asm__ __volatile__ (
   8.602 +#undef BLOCK
   8.603 +#define BLOCK(i) \
   8.604 +		LD(i,0)					\
   8.605 +			LD(i+1,1)			\
   8.606 +		PF1(i)					\
   8.607 +				PF1(i+2)		\
   8.608 +				LD(i+2,2)		\
   8.609 +					LD(i+3,3)	\
   8.610 +		PF0(i+4)				\
   8.611 +				PF0(i+6)		\
   8.612 +		XO1(i,0)				\
   8.613 +			XO1(i+1,1)			\
   8.614 +				XO1(i+2,2)		\
   8.615 +					XO1(i+3,3)	\
   8.616 +		ST(i,0)					\
   8.617 +			ST(i+1,1)			\
   8.618 +				ST(i+2,2)		\
   8.619 +					ST(i+3,3)	\
   8.620 +
   8.621 +
   8.622 +		PF0(0)
   8.623 +				PF0(2)
   8.624 +
   8.625 +	" .align 32			;\n"
   8.626 +        " 1:                            ;\n"
   8.627 +
   8.628 +		BLOCK(0)
   8.629 +		BLOCK(4)
   8.630 +		BLOCK(8)
   8.631 +		BLOCK(12)
   8.632 +
   8.633 +        "       addl $256, %1           ;\n"
   8.634 +        "       addl $256, %2           ;\n"
   8.635 +        "       decl %0                 ;\n"
   8.636 +        "       jnz 1b                  ;\n"
   8.637 +	: "+r" (lines),
   8.638 +	  "+r" (p1), "+r" (p2)
   8.639 +	:
   8.640 +        : "memory");
   8.641 +
   8.642 +	XMMS_RESTORE;
   8.643 +}
   8.644 +
   8.645 +static void
   8.646 +xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
   8.647 +	  unsigned long *p3)
   8.648 +{
   8.649 +        unsigned long lines = bytes >> 8;
   8.650 +	char xmm_save[16*4] ALIGN16;
   8.651 +	int cr0;
   8.652 +
   8.653 +	XMMS_SAVE;
   8.654 +
   8.655 +        __asm__ __volatile__ (
   8.656 +#undef BLOCK
   8.657 +#define BLOCK(i) \
   8.658 +		PF1(i)					\
   8.659 +				PF1(i+2)		\
   8.660 +		LD(i,0)					\
   8.661 +			LD(i+1,1)			\
   8.662 +				LD(i+2,2)		\
   8.663 +					LD(i+3,3)	\
   8.664 +		PF2(i)					\
   8.665 +				PF2(i+2)		\
   8.666 +		PF0(i+4)				\
   8.667 +				PF0(i+6)		\
   8.668 +		XO1(i,0)				\
   8.669 +			XO1(i+1,1)			\
   8.670 +				XO1(i+2,2)		\
   8.671 +					XO1(i+3,3)	\
   8.672 +		XO2(i,0)				\
   8.673 +			XO2(i+1,1)			\
   8.674 +				XO2(i+2,2)		\
   8.675 +					XO2(i+3,3)	\
   8.676 +		ST(i,0)					\
   8.677 +			ST(i+1,1)			\
   8.678 +				ST(i+2,2)		\
   8.679 +					ST(i+3,3)	\
   8.680 +
   8.681 +
   8.682 +		PF0(0)
   8.683 +				PF0(2)
   8.684 +
   8.685 +	" .align 32			;\n"
   8.686 +        " 1:                            ;\n"
   8.687 +
   8.688 +		BLOCK(0)
   8.689 +		BLOCK(4)
   8.690 +		BLOCK(8)
   8.691 +		BLOCK(12)
   8.692 +
   8.693 +        "       addl $256, %1           ;\n"
   8.694 +        "       addl $256, %2           ;\n"
   8.695 +        "       addl $256, %3           ;\n"
   8.696 +        "       decl %0                 ;\n"
   8.697 +        "       jnz 1b                  ;\n"
   8.698 +	: "+r" (lines),
   8.699 +	  "+r" (p1), "+r"(p2), "+r"(p3)
   8.700 +	:
   8.701 +        : "memory" );
   8.702 +
   8.703 +	XMMS_RESTORE;
   8.704 +}
   8.705 +
   8.706 +static void
   8.707 +xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
   8.708 +	  unsigned long *p3, unsigned long *p4)
   8.709 +{
   8.710 +        unsigned long lines = bytes >> 8;
   8.711 +	char xmm_save[16*4] ALIGN16;
   8.712 +	int cr0;
   8.713 +
   8.714 +	XMMS_SAVE;
   8.715 +
   8.716 +        __asm__ __volatile__ (
   8.717 +#undef BLOCK
   8.718 +#define BLOCK(i) \
   8.719 +		PF1(i)					\
   8.720 +				PF1(i+2)		\
   8.721 +		LD(i,0)					\
   8.722 +			LD(i+1,1)			\
   8.723 +				LD(i+2,2)		\
   8.724 +					LD(i+3,3)	\
   8.725 +		PF2(i)					\
   8.726 +				PF2(i+2)		\
   8.727 +		XO1(i,0)				\
   8.728 +			XO1(i+1,1)			\
   8.729 +				XO1(i+2,2)		\
   8.730 +					XO1(i+3,3)	\
   8.731 +		PF3(i)					\
   8.732 +				PF3(i+2)		\
   8.733 +		PF0(i+4)				\
   8.734 +				PF0(i+6)		\
   8.735 +		XO2(i,0)				\
   8.736 +			XO2(i+1,1)			\
   8.737 +				XO2(i+2,2)		\
   8.738 +					XO2(i+3,3)	\
   8.739 +		XO3(i,0)				\
   8.740 +			XO3(i+1,1)			\
   8.741 +				XO3(i+2,2)		\
   8.742 +					XO3(i+3,3)	\
   8.743 +		ST(i,0)					\
   8.744 +			ST(i+1,1)			\
   8.745 +				ST(i+2,2)		\
   8.746 +					ST(i+3,3)	\
   8.747 +
   8.748 +
   8.749 +		PF0(0)
   8.750 +				PF0(2)
   8.751 +
   8.752 +	" .align 32			;\n"
   8.753 +        " 1:                            ;\n"
   8.754 +
   8.755 +		BLOCK(0)
   8.756 +		BLOCK(4)
   8.757 +		BLOCK(8)
   8.758 +		BLOCK(12)
   8.759 +
   8.760 +        "       addl $256, %1           ;\n"
   8.761 +        "       addl $256, %2           ;\n"
   8.762 +        "       addl $256, %3           ;\n"
   8.763 +        "       addl $256, %4           ;\n"
   8.764 +        "       decl %0                 ;\n"
   8.765 +        "       jnz 1b                  ;\n"
   8.766 +	: "+r" (lines),
   8.767 +	  "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
   8.768 +	:
   8.769 +        : "memory" );
   8.770 +
   8.771 +	XMMS_RESTORE;
   8.772 +}
   8.773 +
   8.774 +static void
   8.775 +xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
   8.776 +	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
   8.777 +{
   8.778 +        unsigned long lines = bytes >> 8;
   8.779 +	char xmm_save[16*4] ALIGN16;
   8.780 +	int cr0;
   8.781 +
   8.782 +	XMMS_SAVE;
   8.783 +
   8.784 +	/* need to save p4/p5 manually to not exceed gcc's 10 argument limit */
   8.785 +        __asm__ __volatile__ (
   8.786 +		" pushl %4\n"
   8.787 +		" pushl %5\n"
   8.788 +#undef BLOCK
   8.789 +#define BLOCK(i) \
   8.790 +		PF1(i)					\
   8.791 +				PF1(i+2)		\
   8.792 +		LD(i,0)					\
   8.793 +			LD(i+1,1)			\
   8.794 +				LD(i+2,2)		\
   8.795 +					LD(i+3,3)	\
   8.796 +		PF2(i)					\
   8.797 +				PF2(i+2)		\
   8.798 +		XO1(i,0)				\
   8.799 +			XO1(i+1,1)			\
   8.800 +				XO1(i+2,2)		\
   8.801 +					XO1(i+3,3)	\
   8.802 +		PF3(i)					\
   8.803 +				PF3(i+2)		\
   8.804 +		XO2(i,0)				\
   8.805 +			XO2(i+1,1)			\
   8.806 +				XO2(i+2,2)		\
   8.807 +					XO2(i+3,3)	\
   8.808 +		PF4(i)					\
   8.809 +				PF4(i+2)		\
   8.810 +		PF0(i+4)				\
   8.811 +				PF0(i+6)		\
   8.812 +		XO3(i,0)				\
   8.813 +			XO3(i+1,1)			\
   8.814 +				XO3(i+2,2)		\
   8.815 +					XO3(i+3,3)	\
   8.816 +		XO4(i,0)				\
   8.817 +			XO4(i+1,1)			\
   8.818 +				XO4(i+2,2)		\
   8.819 +					XO4(i+3,3)	\
   8.820 +		ST(i,0)					\
   8.821 +			ST(i+1,1)			\
   8.822 +				ST(i+2,2)		\
   8.823 +					ST(i+3,3)	\
   8.824 +
   8.825 +
   8.826 +		PF0(0)
   8.827 +				PF0(2)
   8.828 +
   8.829 +	" .align 32			;\n"
   8.830 +        " 1:                            ;\n"
   8.831 +
   8.832 +		BLOCK(0)
   8.833 +		BLOCK(4)
   8.834 +		BLOCK(8)
   8.835 +		BLOCK(12)
   8.836 +
   8.837 +        "       addl $256, %1           ;\n"
   8.838 +        "       addl $256, %2           ;\n"
   8.839 +        "       addl $256, %3           ;\n"
   8.840 +        "       addl $256, %4           ;\n"
   8.841 +        "       addl $256, %5           ;\n"
   8.842 +        "       decl %0                 ;\n"
   8.843 +        "       jnz 1b                  ;\n"
   8.844 +	"	popl %5\n"	
   8.845 +	"	popl %4\n"	
   8.846 +	: "+r" (lines),
   8.847 +	  "+r" (p1), "+r" (p2), "+r" (p3)
   8.848 +	: "r" (p4), "r" (p5)
   8.849 +	: "memory");
   8.850 +
   8.851 +	XMMS_RESTORE;
   8.852 +}
   8.853 +
   8.854 +static struct xor_block_template xor_block_pIII_sse = {
   8.855 +        name: "pIII_sse",
   8.856 +        do_2: xor_sse_2,
   8.857 +        do_3: xor_sse_3,
   8.858 +        do_4: xor_sse_4,
   8.859 +        do_5: xor_sse_5,
   8.860 +};
   8.861 +
   8.862 +/* Also try the generic routines.  */
   8.863 +#include <asm-generic/xor.h>
   8.864 +
   8.865 +#undef XOR_TRY_TEMPLATES
   8.866 +#define XOR_TRY_TEMPLATES				\
   8.867 +	do {						\
   8.868 +		xor_speed(&xor_block_8regs);		\
   8.869 +		xor_speed(&xor_block_32regs);		\
   8.870 +	        if (cpu_has_xmm)			\
   8.871 +			xor_speed(&xor_block_pIII_sse);	\
   8.872 +	        if (md_cpu_has_mmx()) {			\
   8.873 +	                xor_speed(&xor_block_pII_mmx);	\
   8.874 +	                xor_speed(&xor_block_p5_mmx);	\
   8.875 +	        }					\
   8.876 +	} while (0)
   8.877 +
   8.878 +/* We force the use of the SSE xor block because it can write around L2.
   8.879 +   We may also be able to load into the L1 only depending on how the cpu
   8.880 +   deals with a load to a line that is being prefetched.  */
   8.881 +#define XOR_SELECT_TEMPLATE(FASTEST) \
   8.882 +	(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
     9.1 --- a/xenolinux-2.4.25-sparse/mkbuildtree	Sat Mar 27 13:51:01 2004 +0000
     9.2 +++ b/xenolinux-2.4.25-sparse/mkbuildtree	Sat Mar 27 15:29:57 2004 +0000
     9.3 @@ -196,7 +196,6 @@ ln -sf ../asm-i386/ucontext.h
     9.4  ln -sf ../asm-i386/unaligned.h
     9.5  ln -sf ../asm-i386/unistd.h 
     9.6  ln -sf ../asm-i386/user.h 
     9.7 -ln -sf ../asm-i386/xor.h 
     9.8  
     9.9  cd ../../arch/xen/kernel
    9.10  ln -sf ../../i386/kernel/i387.c