ia64/xen-unstable

changeset 3332:72b23176fb04

bitkeeper revision 1.1159.1.506 (41d00f89OifvQoN_EcnO5XuQN0RRjA)

Update to Linux 2.6.10.
author cl349@arcadians.cl.cam.ac.uk
date Mon Dec 27 13:35:05 2004 +0000 (2004-12-27)
parents 39a7a74fd6f9
children b2fa96909734
files .rootkeys linux-2.6.10-xen-sparse/arch/xen/configs/xen0_defconfig linux-2.6.10-xen-sparse/arch/xen/configs/xenU_defconfig linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c linux-2.6.9-xen-sparse/arch/xen/kernel/smp.c linux-2.6.9-xen-sparse/drivers/xen/blktap/Makefile linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.c linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.h linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_datapath.c linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_userdev.c linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/hardirq.h linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/spinlock.h
line diff
     1.1 --- a/.rootkeys	Mon Dec 27 10:12:02 2004 +0000
     1.2 +++ b/.rootkeys	Mon Dec 27 13:35:05 2004 +0000
     1.3 @@ -145,11 +145,14 @@ 40f56238XDtHSijkAFlbv1PT8Bhw_Q linux-2.6
     1.4  40f56238bnvciAuyzAiMkdzGErYt1A linux-2.6.10-xen-sparse/arch/xen/i386/kernel/head.S
     1.5  40f58a0d31M2EkuPbG94ns_nOi0PVA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
     1.6  40faa751_zbZlAmLyQgCXdYekVFdWA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/ioport.c
     1.7 +41d00d82zN8IfLBRxc7G_i7lbwT3cQ linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c
     1.8  40f56238ue3YRsK52HG7iccNzP1AwQ linux-2.6.10-xen-sparse/arch/xen/i386/kernel/ldt.c
     1.9  4107adf1cNtsuOxOB4T6paAoY2R2PA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/pci-dma.c
    1.10  40f56238a8iOVDEoostsbun_sy2i4g linux-2.6.10-xen-sparse/arch/xen/i386/kernel/process.c
    1.11  40f56238YQIJoYG2ehDGEcdTgLmGbg linux-2.6.10-xen-sparse/arch/xen/i386/kernel/setup.c
    1.12  40f56238nWMQg7CKbyTy0KJNvCzbtg linux-2.6.10-xen-sparse/arch/xen/i386/kernel/signal.c
    1.13 +41811cac4lkCB-fHir6CcxuEJ2pGsQ linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c
    1.14 +41811ca9mbGpqBrZVrUGEiv8CTV3ng linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c
    1.15  40f56238qVGkpO_ycnQA8k03kQzAgA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/time.c
    1.16  40f56238NzTgeO63RGoxHrW5NQeO3Q linux-2.6.10-xen-sparse/arch/xen/i386/kernel/timers/Makefile
    1.17  40f56238BMqG5PuSHufpjbvp_helBw linux-2.6.10-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c
    1.18 @@ -175,6 +178,7 @@ 4110f478aeQWllIN7J4kouAHiAqrPw linux-2.6
    1.19  412dfae9eA3_6e6bCGUtg1mj8b56fQ linux-2.6.10-xen-sparse/arch/xen/kernel/gnttab.c
    1.20  40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6.10-xen-sparse/arch/xen/kernel/reboot.c
    1.21  414c113396tK1HTVeUalm3u-1DF16g linux-2.6.10-xen-sparse/arch/xen/kernel/skbuff.c
    1.22 +418f90e4lGdeJK9rmbOB1kN-IKSjsQ linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c
    1.23  3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.10-xen-sparse/arch/xen/kernel/xen_proc.c
    1.24  41261688yS8eAyy-7kzG4KBs0xbYCA linux-2.6.10-xen-sparse/drivers/Makefile
    1.25  4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.10-xen-sparse/drivers/char/mem.c
    1.26 @@ -193,6 +197,12 @@ 40f562395atl9x4suKGhPkjqLOXESg linux-2.6
    1.27  40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.10-xen-sparse/drivers/xen/blkfront/blkfront.c
    1.28  40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.10-xen-sparse/drivers/xen/blkfront/block.h
    1.29  40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.10-xen-sparse/drivers/xen/blkfront/vbd.c
    1.30 +41a226e0vjAcDXHOnXE5ummcdUD2mg linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile
    1.31 +41a226e0VeZA1N8tbU6nvJ3OxUcJmw linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c
    1.32 +41a226e1k4J5VMLnrYXDWRqElS49YQ linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h
    1.33 +41a226e1-A_Hy7utS8vJKaXnH_tzfA linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
    1.34 +41a226e19NoUUTOvs7jumDMRYDIO4Q linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c
    1.35 +41a226e1MNSyWWK5dEVgvSQ5OW0fDA linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c
    1.36  40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.10-xen-sparse/drivers/xen/console/Makefile
    1.37  3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.10-xen-sparse/drivers/xen/console/console.c
    1.38  40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.10-xen-sparse/drivers/xen/evtchn/Makefile
    1.39 @@ -217,6 +227,7 @@ 40f5623aJVXQwpJMOLE99XgvGsfQ8Q linux-2.6
    1.40  40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h
    1.41  40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h
    1.42  40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h
    1.43 +41811f07Iri9hrvs97t-baxmhOwWDQ linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
    1.44  4120f807GCO0uqsLqdZj9csxR1Wthw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mmu_context.h
    1.45  40f5623adgjZq9nAgCt0IXdWl7udSA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/page.h
    1.46  40f5623a54NuG-7qHihGYmw4wWQnMA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/param.h
    1.47 @@ -229,6 +240,7 @@ 40f5623aPCkQQfPtJSooGdhcatrvnQ linux-2.6
    1.48  412ea0afQL2CAI-f522TbLjLPMibPQ linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/ptrace.h
    1.49  40f5623bzLvxr7WoJIxVf2OH4rCBJg linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/segment.h
    1.50  40f5623bG_LzgG6-qwk292nTc5Wabw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/setup.h
    1.51 +4198c32a8NzmcKVOzKaEJfaQxxiA0A linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h
    1.52  40f5623bgzm_9vwxpzJswlAxg298Gg linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h
    1.53  40f5623bVdKP7Dt7qm8twu3NcnGNbA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/system.h
    1.54  40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
    1.55 @@ -253,18 +265,6 @@ 40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6
    1.56  412f46c0LJuKAgSPGoC0Z1DEkLfuLA linux-2.6.10-xen-sparse/mm/memory.c
    1.57  410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.10-xen-sparse/mm/page_alloc.c
    1.58  41505c572m-s9ATiO1LiD1GPznTTIg linux-2.6.10-xen-sparse/net/core/skbuff.c
    1.59 -41811cac4lkCB-fHir6CcxuEJ2pGsQ linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c
    1.60 -41811ca9mbGpqBrZVrUGEiv8CTV3ng linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c
    1.61 -418f90e4lGdeJK9rmbOB1kN-IKSjsQ linux-2.6.9-xen-sparse/arch/xen/kernel/smp.c
    1.62 -41a226e0vjAcDXHOnXE5ummcdUD2mg linux-2.6.9-xen-sparse/drivers/xen/blktap/Makefile
    1.63 -41a226e0VeZA1N8tbU6nvJ3OxUcJmw linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.c
    1.64 -41a226e1k4J5VMLnrYXDWRqElS49YQ linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.h
    1.65 -41a226e1-A_Hy7utS8vJKaXnH_tzfA linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
    1.66 -41a226e19NoUUTOvs7jumDMRYDIO4Q linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_datapath.c
    1.67 -41a226e1MNSyWWK5dEVgvSQ5OW0fDA linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_userdev.c
    1.68 -41a64cdeQ5SWVEVbSZ0K-IeHHhIJ_w linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/hardirq.h
    1.69 -41811f07Iri9hrvs97t-baxmhOwWDQ linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h
    1.70 -4198c32a8NzmcKVOzKaEJfaQxxiA0A linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/spinlock.h
    1.71  413cb1e4zst25MDYjg63Y-NGC5_pLg netbsd-2.0-xen-sparse/Makefile
    1.72  413cb1e5c_Mkxf_X0zimEhTKI_l4DA netbsd-2.0-xen-sparse/mkbuildtree
    1.73  413cb1e5kY_Zil7-b0kI6hvCIxBEYg netbsd-2.0-xen-sparse/nbconfig-xen
     2.1 --- a/linux-2.6.10-xen-sparse/arch/xen/configs/xen0_defconfig	Mon Dec 27 10:12:02 2004 +0000
     2.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/configs/xen0_defconfig	Mon Dec 27 13:35:05 2004 +0000
     2.3 @@ -1,7 +1,7 @@
     2.4  #
     2.5  # Automatically generated make config: don't edit
     2.6 -# Linux kernel version: 2.6.10-rc3-xen0
     2.7 -# Sun Dec 26 10:34:29 2004
     2.8 +# Linux kernel version: 2.6.10-xen0
     2.9 +# Mon Dec 27 10:14:40 2004
    2.10  #
    2.11  CONFIG_XEN=y
    2.12  CONFIG_ARCH_XEN=y
    2.13 @@ -17,8 +17,8 @@ CONFIG_XEN_BLKDEV_BACKEND=y
    2.14  CONFIG_XEN_NETDEV_BACKEND=y
    2.15  CONFIG_XEN_BLKDEV_FRONTEND=y
    2.16  CONFIG_XEN_NETDEV_FRONTEND=y
    2.17 +# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
    2.18  # CONFIG_XEN_BLKDEV_TAP is not set
    2.19 -# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
    2.20  CONFIG_XEN_WRITABLE_PAGETABLES=y
    2.21  CONFIG_XEN_SCRUB_PAGES=y
    2.22  CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
     3.1 --- a/linux-2.6.10-xen-sparse/arch/xen/configs/xenU_defconfig	Mon Dec 27 10:12:02 2004 +0000
     3.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/configs/xenU_defconfig	Mon Dec 27 13:35:05 2004 +0000
     3.3 @@ -1,7 +1,7 @@
     3.4  #
     3.5  # Automatically generated make config: don't edit
     3.6 -# Linux kernel version: 2.6.10-rc3-xenU
     3.7 -# Sun Dec 26 10:35:15 2004
     3.8 +# Linux kernel version: 2.6.10-xenU
     3.9 +# Mon Dec 27 10:15:03 2004
    3.10  #
    3.11  CONFIG_XEN=y
    3.12  CONFIG_ARCH_XEN=y
    3.13 @@ -13,12 +13,11 @@ CONFIG_NO_IDLE_HZ=y
    3.14  # CONFIG_XEN_PRIVILEGED_GUEST is not set
    3.15  # CONFIG_XEN_PHYSDEV_ACCESS is not set
    3.16  # CONFIG_XEN_BLKDEV_BACKEND is not set
    3.17 -# CONFIG_XEN_BLKDEV_TAP_BE is not set
    3.18  # CONFIG_XEN_NETDEV_BACKEND is not set
    3.19  CONFIG_XEN_BLKDEV_FRONTEND=y
    3.20  CONFIG_XEN_NETDEV_FRONTEND=y
    3.21 +# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
    3.22  # CONFIG_XEN_BLKDEV_TAP is not set
    3.23 -# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
    3.24  CONFIG_XEN_WRITABLE_PAGETABLES=y
    3.25  CONFIG_XEN_SCRUB_PAGES=y
    3.26  CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/irq.c	Mon Dec 27 13:35:05 2004 +0000
     4.3 @@ -0,0 +1,258 @@
     4.4 +/*
     4.5 + *	linux/arch/i386/kernel/irq.c
     4.6 + *
     4.7 + *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
     4.8 + *
     4.9 + * This file contains the lowest level x86-specific interrupt
    4.10 + * entry, irq-stacks and irq statistics code. All the remaining
    4.11 + * irq logic is done by the generic kernel/irq/ code and
    4.12 + * by the x86-specific irq controller code. (e.g. i8259.c and
    4.13 + * io_apic.c.)
    4.14 + */
    4.15 +
    4.16 +#include <asm/uaccess.h>
    4.17 +#include <linux/module.h>
    4.18 +#include <linux/seq_file.h>
    4.19 +#include <linux/interrupt.h>
    4.20 +#include <linux/kernel_stat.h>
    4.21 +
    4.22 +#ifndef CONFIG_X86_LOCAL_APIC
    4.23 +/*
    4.24 + * 'what should we do if we get a hw irq event on an illegal vector'.
    4.25 + * each architecture has to answer this themselves.
    4.26 + */
    4.27 +void ack_bad_irq(unsigned int irq)
    4.28 +{
    4.29 +	printk("unexpected IRQ trap at vector %02x\n", irq);
    4.30 +}
    4.31 +#endif
    4.32 +
    4.33 +#ifdef CONFIG_4KSTACKS
    4.34 +/*
    4.35 + * per-CPU IRQ handling contexts (thread information and stack)
    4.36 + */
    4.37 +union irq_ctx {
    4.38 +	struct thread_info      tinfo;
    4.39 +	u32                     stack[THREAD_SIZE/sizeof(u32)];
    4.40 +};
    4.41 +
    4.42 +static union irq_ctx *hardirq_ctx[NR_CPUS];
    4.43 +static union irq_ctx *softirq_ctx[NR_CPUS];
    4.44 +#endif
    4.45 +
    4.46 +/*
    4.47 + * do_IRQ handles all normal device IRQ's (the special
    4.48 + * SMP cross-CPU interrupts have their own specific
    4.49 + * handlers).
    4.50 + */
    4.51 +fastcall unsigned int do_IRQ(struct pt_regs *regs)
    4.52 +{	
    4.53 +	/* high bits used in ret_from_ code */
    4.54 +	int irq = regs->orig_eax & __IRQ_MASK(HARDIRQ_BITS);
    4.55 +#ifdef CONFIG_4KSTACKS
    4.56 +	union irq_ctx *curctx, *irqctx;
    4.57 +	u32 *isp;
    4.58 +#endif
    4.59 +
    4.60 +	irq_enter();
    4.61 +#ifdef CONFIG_DEBUG_STACKOVERFLOW
    4.62 +	/* Debugging check for stack overflow: is there less than 1KB free? */
    4.63 +	{
    4.64 +		long esp;
    4.65 +
    4.66 +		__asm__ __volatile__("andl %%esp,%0" :
    4.67 +					"=r" (esp) : "0" (THREAD_SIZE - 1));
    4.68 +		if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
    4.69 +			printk("do_IRQ: stack overflow: %ld\n",
    4.70 +				esp - sizeof(struct thread_info));
    4.71 +			dump_stack();
    4.72 +		}
    4.73 +	}
    4.74 +#endif
    4.75 +
    4.76 +#ifdef CONFIG_4KSTACKS
    4.77 +
    4.78 +	curctx = (union irq_ctx *) current_thread_info();
    4.79 +	irqctx = hardirq_ctx[smp_processor_id()];
    4.80 +
    4.81 +	/*
    4.82 +	 * this is where we switch to the IRQ stack. However, if we are
    4.83 +	 * already using the IRQ stack (because we interrupted a hardirq
    4.84 +	 * handler) we can't do that and just have to keep using the
    4.85 +	 * current stack (which is the irq stack already after all)
    4.86 +	 */
    4.87 +	if (curctx != irqctx) {
    4.88 +		int arg1, arg2, ebx;
    4.89 +
    4.90 +		/* build the stack frame on the IRQ stack */
    4.91 +		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
    4.92 +		irqctx->tinfo.task = curctx->tinfo.task;
    4.93 +		irqctx->tinfo.previous_esp = current_stack_pointer;
    4.94 +
    4.95 +		asm volatile(
    4.96 +			"       xchgl   %%ebx,%%esp      \n"
    4.97 +			"       call    __do_IRQ         \n"
    4.98 +			"       movl   %%ebx,%%esp      \n"
    4.99 +			: "=a" (arg1), "=d" (arg2), "=b" (ebx)
   4.100 +			:  "0" (irq),   "1" (regs),  "2" (isp)
   4.101 +			: "memory", "cc", "ecx"
   4.102 +		);
   4.103 +	} else
   4.104 +#endif
   4.105 +		__do_IRQ(irq, regs);
   4.106 +
   4.107 +	irq_exit();
   4.108 +
   4.109 +	return 1;
   4.110 +}
   4.111 +
   4.112 +#ifdef CONFIG_4KSTACKS
   4.113 +
   4.114 +/*
   4.115 + * These should really be __section__(".bss.page_aligned") as well, but
   4.116 + * gcc's 3.0 and earlier don't handle that correctly.
   4.117 + */
   4.118 +static char softirq_stack[NR_CPUS * THREAD_SIZE]
   4.119 +		__attribute__((__aligned__(THREAD_SIZE)));
   4.120 +
   4.121 +static char hardirq_stack[NR_CPUS * THREAD_SIZE]
   4.122 +		__attribute__((__aligned__(THREAD_SIZE)));
   4.123 +
   4.124 +/*
   4.125 + * allocate per-cpu stacks for hardirq and for softirq processing
   4.126 + */
   4.127 +void irq_ctx_init(int cpu)
   4.128 +{
   4.129 +	union irq_ctx *irqctx;
   4.130 +
   4.131 +	if (hardirq_ctx[cpu])
   4.132 +		return;
   4.133 +
   4.134 +	irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
   4.135 +	irqctx->tinfo.task              = NULL;
   4.136 +	irqctx->tinfo.exec_domain       = NULL;
   4.137 +	irqctx->tinfo.cpu               = cpu;
   4.138 +	irqctx->tinfo.preempt_count     = HARDIRQ_OFFSET;
   4.139 +	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
   4.140 +
   4.141 +	hardirq_ctx[cpu] = irqctx;
   4.142 +
   4.143 +	irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE];
   4.144 +	irqctx->tinfo.task              = NULL;
   4.145 +	irqctx->tinfo.exec_domain       = NULL;
   4.146 +	irqctx->tinfo.cpu               = cpu;
   4.147 +	irqctx->tinfo.preempt_count     = SOFTIRQ_OFFSET;
   4.148 +	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
   4.149 +
   4.150 +	softirq_ctx[cpu] = irqctx;
   4.151 +
   4.152 +	printk("CPU %u irqstacks, hard=%p soft=%p\n",
   4.153 +		cpu,hardirq_ctx[cpu],softirq_ctx[cpu]);
   4.154 +}
   4.155 +
   4.156 +extern asmlinkage void __do_softirq(void);
   4.157 +
   4.158 +asmlinkage void do_softirq(void)
   4.159 +{
   4.160 +	unsigned long flags;
   4.161 +	struct thread_info *curctx;
   4.162 +	union irq_ctx *irqctx;
   4.163 +	u32 *isp;
   4.164 +
   4.165 +	if (in_interrupt())
   4.166 +		return;
   4.167 +
   4.168 +	local_irq_save(flags);
   4.169 +
   4.170 +	if (local_softirq_pending()) {
   4.171 +		curctx = current_thread_info();
   4.172 +		irqctx = softirq_ctx[smp_processor_id()];
   4.173 +		irqctx->tinfo.task = curctx->task;
   4.174 +		irqctx->tinfo.previous_esp = current_stack_pointer;
   4.175 +
   4.176 +		/* build the stack frame on the softirq stack */
   4.177 +		isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
   4.178 +
   4.179 +		asm volatile(
   4.180 +			"       xchgl   %%ebx,%%esp     \n"
   4.181 +			"       call    __do_softirq    \n"
   4.182 +			"       movl    %%ebx,%%esp     \n"
   4.183 +			: "=b"(isp)
   4.184 +			: "0"(isp)
   4.185 +			: "memory", "cc", "edx", "ecx", "eax"
   4.186 +		);
   4.187 +	}
   4.188 +
   4.189 +	local_irq_restore(flags);
   4.190 +}
   4.191 +
   4.192 +EXPORT_SYMBOL(do_softirq);
   4.193 +#endif
   4.194 +
   4.195 +/*
   4.196 + * Interrupt statistics:
   4.197 + */
   4.198 +
   4.199 +atomic_t irq_err_count;
   4.200 +
   4.201 +/*
   4.202 + * /proc/interrupts printing:
   4.203 + */
   4.204 +
   4.205 +int show_interrupts(struct seq_file *p, void *v)
   4.206 +{
   4.207 +	int i = *(loff_t *) v, j;
   4.208 +	struct irqaction * action;
   4.209 +	unsigned long flags;
   4.210 +
   4.211 +	if (i == 0) {
   4.212 +		seq_printf(p, "           ");
   4.213 +		for (j=0; j<NR_CPUS; j++)
   4.214 +			if (cpu_online(j))
   4.215 +				seq_printf(p, "CPU%d       ",j);
   4.216 +		seq_putc(p, '\n');
   4.217 +	}
   4.218 +
   4.219 +	if (i < NR_IRQS) {
   4.220 +		spin_lock_irqsave(&irq_desc[i].lock, flags);
   4.221 +		action = irq_desc[i].action;
   4.222 +		if (!action)
   4.223 +			goto skip;
   4.224 +		seq_printf(p, "%3d: ",i);
   4.225 +#ifndef CONFIG_SMP
   4.226 +		seq_printf(p, "%10u ", kstat_irqs(i));
   4.227 +#else
   4.228 +		for (j = 0; j < NR_CPUS; j++)
   4.229 +			if (cpu_online(j))
   4.230 +				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
   4.231 +#endif
   4.232 +		seq_printf(p, " %14s", irq_desc[i].handler->typename);
   4.233 +		seq_printf(p, "  %s", action->name);
   4.234 +
   4.235 +		for (action=action->next; action; action = action->next)
   4.236 +			seq_printf(p, ", %s", action->name);
   4.237 +
   4.238 +		seq_putc(p, '\n');
   4.239 +skip:
   4.240 +		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
   4.241 +	} else if (i == NR_IRQS) {
   4.242 +		seq_printf(p, "NMI: ");
   4.243 +		for (j = 0; j < NR_CPUS; j++)
   4.244 +			if (cpu_online(j))
   4.245 +				seq_printf(p, "%10u ", nmi_count(j));
   4.246 +		seq_putc(p, '\n');
   4.247 +#ifdef CONFIG_X86_LOCAL_APIC
   4.248 +		seq_printf(p, "LOC: ");
   4.249 +		for (j = 0; j < NR_CPUS; j++)
   4.250 +			if (cpu_online(j))
   4.251 +				seq_printf(p, "%10u ",
   4.252 +					irq_stat[j].apic_timer_irqs);
   4.253 +		seq_putc(p, '\n');
   4.254 +#endif
   4.255 +		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
   4.256 +#if defined(CONFIG_X86_IO_APIC)
   4.257 +		seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
   4.258 +#endif
   4.259 +	}
   4.260 +	return 0;
   4.261 +}
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smp.c	Mon Dec 27 13:35:05 2004 +0000
     5.3 @@ -0,0 +1,599 @@
     5.4 +/*
     5.5 + *	Intel SMP support routines.
     5.6 + *
     5.7 + *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
     5.8 + *	(c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
     5.9 + *
    5.10 + *	This code is released under the GNU General Public License version 2 or
    5.11 + *	later.
    5.12 + */
    5.13 +
    5.14 +#include <linux/init.h>
    5.15 +
    5.16 +#include <linux/mm.h>
    5.17 +#include <linux/irq.h>
    5.18 +#include <linux/delay.h>
    5.19 +#include <linux/spinlock.h>
    5.20 +#include <linux/smp_lock.h>
    5.21 +#include <linux/kernel_stat.h>
    5.22 +#include <linux/mc146818rtc.h>
    5.23 +#include <linux/cache.h>
    5.24 +#include <linux/interrupt.h>
    5.25 +
    5.26 +#include <asm/mtrr.h>
    5.27 +#include <asm/tlbflush.h>
    5.28 +#if 0
    5.29 +#include <mach_apic.h>
    5.30 +#endif
    5.31 +#include <asm-xen/evtchn.h>
    5.32 +
    5.33 +#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
    5.34 +
    5.35 +/*
    5.36 + *	Some notes on x86 processor bugs affecting SMP operation:
    5.37 + *
    5.38 + *	Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
    5.39 + *	The Linux implications for SMP are handled as follows:
    5.40 + *
    5.41 + *	Pentium III / [Xeon]
    5.42 + *		None of the E1AP-E3AP errata are visible to the user.
    5.43 + *
    5.44 + *	E1AP.	see PII A1AP
    5.45 + *	E2AP.	see PII A2AP
    5.46 + *	E3AP.	see PII A3AP
    5.47 + *
    5.48 + *	Pentium II / [Xeon]
    5.49 + *		None of the A1AP-A3AP errata are visible to the user.
    5.50 + *
    5.51 + *	A1AP.	see PPro 1AP
    5.52 + *	A2AP.	see PPro 2AP
    5.53 + *	A3AP.	see PPro 7AP
    5.54 + *
    5.55 + *	Pentium Pro
    5.56 + *		None of 1AP-9AP errata are visible to the normal user,
    5.57 + *	except occasional delivery of 'spurious interrupt' as trap #15.
    5.58 + *	This is very rare and a non-problem.
    5.59 + *
    5.60 + *	1AP.	Linux maps APIC as non-cacheable
    5.61 + *	2AP.	worked around in hardware
    5.62 + *	3AP.	fixed in C0 and above steppings microcode update.
    5.63 + *		Linux does not use excessive STARTUP_IPIs.
    5.64 + *	4AP.	worked around in hardware
    5.65 + *	5AP.	symmetric IO mode (normal Linux operation) not affected.
    5.66 + *		'noapic' mode has vector 0xf filled out properly.
    5.67 + *	6AP.	'noapic' mode might be affected - fixed in later steppings
    5.68 + *	7AP.	We do not assume writes to the LVT deassering IRQs
    5.69 + *	8AP.	We do not enable low power mode (deep sleep) during MP bootup
    5.70 + *	9AP.	We do not use mixed mode
    5.71 + *
    5.72 + *	Pentium
    5.73 + *		There is a marginal case where REP MOVS on 100MHz SMP
    5.74 + *	machines with B stepping processors can fail. XXX should provide
    5.75 + *	an L1cache=Writethrough or L1cache=off option.
    5.76 + *
    5.77 + *		B stepping CPUs may hang. There are hardware work arounds
    5.78 + *	for this. We warn about it in case your board doesn't have the work
    5.79 + *	arounds. Basically thats so I can tell anyone with a B stepping
    5.80 + *	CPU and SMP problems "tough".
    5.81 + *
    5.82 + *	Specific items [From Pentium Processor Specification Update]
    5.83 + *
    5.84 + *	1AP.	Linux doesn't use remote read
    5.85 + *	2AP.	Linux doesn't trust APIC errors
    5.86 + *	3AP.	We work around this
    5.87 + *	4AP.	Linux never generated 3 interrupts of the same priority
    5.88 + *		to cause a lost local interrupt.
    5.89 + *	5AP.	Remote read is never used
    5.90 + *	6AP.	not affected - worked around in hardware
    5.91 + *	7AP.	not affected - worked around in hardware
    5.92 + *	8AP.	worked around in hardware - we get explicit CS errors if not
    5.93 + *	9AP.	only 'noapic' mode affected. Might generate spurious
    5.94 + *		interrupts, we log only the first one and count the
    5.95 + *		rest silently.
    5.96 + *	10AP.	not affected - worked around in hardware
    5.97 + *	11AP.	Linux reads the APIC between writes to avoid this, as per
    5.98 + *		the documentation. Make sure you preserve this as it affects
    5.99 + *		the C stepping chips too.
   5.100 + *	12AP.	not affected - worked around in hardware
   5.101 + *	13AP.	not affected - worked around in hardware
   5.102 + *	14AP.	we always deassert INIT during bootup
   5.103 + *	15AP.	not affected - worked around in hardware
   5.104 + *	16AP.	not affected - worked around in hardware
   5.105 + *	17AP.	not affected - worked around in hardware
   5.106 + *	18AP.	not affected - worked around in hardware
   5.107 + *	19AP.	not affected - worked around in BIOS
   5.108 + *
   5.109 + *	If this sounds worrying believe me these bugs are either ___RARE___,
   5.110 + *	or are signal timing bugs worked around in hardware and there's
   5.111 + *	about nothing of note with C stepping upwards.
   5.112 + */
   5.113 +
   5.114 +DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
   5.115 +
   5.116 +/*
   5.117 + * the following functions deal with sending IPIs between CPUs.
   5.118 + *
   5.119 + * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
   5.120 + */
   5.121 +
   5.122 +static inline int __prepare_ICR (unsigned int shortcut, int vector)
   5.123 +{
   5.124 +	return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
   5.125 +}
   5.126 +
   5.127 +static inline int __prepare_ICR2 (unsigned int mask)
   5.128 +{
   5.129 +	return SET_APIC_DEST_FIELD(mask);
   5.130 +}
   5.131 +
   5.132 +DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
   5.133 +
   5.134 +static inline void __send_IPI_one(unsigned int cpu, int vector)
   5.135 +{
   5.136 +	unsigned int evtchn;
   5.137 +
   5.138 +	evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
   5.139 +	// printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, vector, evtchn);
   5.140 +	if (evtchn) {
   5.141 +#if 0
   5.142 +		shared_info_t *s = HYPERVISOR_shared_info;
   5.143 +		while (synch_test_bit(evtchn, &s->evtchn_pending[0]) ||
   5.144 +		       synch_test_bit(evtchn, &s->evtchn_mask[0]))
   5.145 +			;
   5.146 +#endif
   5.147 +		notify_via_evtchn(evtchn);
   5.148 +	} else
   5.149 +		printk("send_IPI to unbound port %d/%d",
   5.150 +		       cpu, vector);
   5.151 +}
   5.152 +
   5.153 +void __send_IPI_shortcut(unsigned int shortcut, int vector)
   5.154 +{
   5.155 +	int cpu;
   5.156 +
   5.157 +	switch (shortcut) {
   5.158 +	case APIC_DEST_SELF:
   5.159 +		__send_IPI_one(smp_processor_id(), vector);
   5.160 +		break;
   5.161 +	case APIC_DEST_ALLBUT:
   5.162 +		for (cpu = 0; cpu < NR_CPUS; ++cpu) {
   5.163 +			if (cpu == smp_processor_id())
   5.164 +				continue;
   5.165 +			if (cpu_isset(cpu, cpu_online_map)) {
   5.166 +				__send_IPI_one(cpu, vector);
   5.167 +			}
   5.168 +		}
   5.169 +		break;
   5.170 +	default:
   5.171 +		printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
   5.172 +		       vector);
   5.173 +		break;
   5.174 +	}
   5.175 +}
   5.176 +
   5.177 +void fastcall send_IPI_self(int vector)
   5.178 +{
   5.179 +	__send_IPI_shortcut(APIC_DEST_SELF, vector);
   5.180 +}
   5.181 +
   5.182 +/*
   5.183 + * This is only used on smaller machines.
   5.184 + */
   5.185 +void send_IPI_mask_bitmask(cpumask_t mask, int vector)
   5.186 +{
   5.187 +	unsigned long flags;
   5.188 +	unsigned int cpu;
   5.189 +
   5.190 +	local_irq_save(flags);
   5.191 +
   5.192 +	for (cpu = 0; cpu < NR_CPUS; ++cpu) {
   5.193 +		if (cpu_isset(cpu, mask)) {
   5.194 +			__send_IPI_one(cpu, vector);
   5.195 +		}
   5.196 +	}
   5.197 +
   5.198 +	local_irq_restore(flags);
   5.199 +}
   5.200 +
   5.201 +inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
   5.202 +{
   5.203 +
   5.204 +	send_IPI_mask_bitmask(mask, vector);
   5.205 +}
   5.206 +
   5.207 +#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
   5.208 +
   5.209 +/*
   5.210 + *	Smarter SMP flushing macros. 
   5.211 + *		c/o Linus Torvalds.
   5.212 + *
   5.213 + *	These mean you can really definitely utterly forget about
   5.214 + *	writing to user space from interrupts. (Its not allowed anyway).
   5.215 + *
   5.216 + *	Optimizations Manfred Spraul <manfred@colorfullife.com>
   5.217 + */
   5.218 +
   5.219 +static cpumask_t flush_cpumask;
   5.220 +static struct mm_struct * flush_mm;
   5.221 +static unsigned long flush_va;
   5.222 +static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED;
   5.223 +#define FLUSH_ALL	0xffffffff
   5.224 +
   5.225 +/*
   5.226 + * We cannot call mmdrop() because we are in interrupt context, 
   5.227 + * instead update mm->cpu_vm_mask.
   5.228 + *
   5.229 + * We need to reload %cr3 since the page tables may be going
   5.230 + * away from under us..
   5.231 + */
   5.232 +static inline void leave_mm (unsigned long cpu)
   5.233 +{
   5.234 +	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
   5.235 +		BUG();
   5.236 +	cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
   5.237 +	load_cr3(swapper_pg_dir);
   5.238 +}
   5.239 +
   5.240 +/*
   5.241 + *
   5.242 + * The flush IPI assumes that a thread switch happens in this order:
   5.243 + * [cpu0: the cpu that switches]
   5.244 + * 1) switch_mm() either 1a) or 1b)
   5.245 + * 1a) thread switch to a different mm
   5.246 + * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
   5.247 + * 	Stop ipi delivery for the old mm. This is not synchronized with
   5.248 + * 	the other cpus, but smp_invalidate_interrupt ignore flush ipis
   5.249 + * 	for the wrong mm, and in the worst case we perform a superflous
   5.250 + * 	tlb flush.
   5.251 + * 1a2) set cpu_tlbstate to TLBSTATE_OK
   5.252 + * 	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
   5.253 + *	was in lazy tlb mode.
   5.254 + * 1a3) update cpu_tlbstate[].active_mm
   5.255 + * 	Now cpu0 accepts tlb flushes for the new mm.
   5.256 + * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
   5.257 + * 	Now the other cpus will send tlb flush ipis.
   5.258 + * 1a4) change cr3.
   5.259 + * 1b) thread switch without mm change
   5.260 + *	cpu_tlbstate[].active_mm is correct, cpu0 already handles
   5.261 + *	flush ipis.
   5.262 + * 1b1) set cpu_tlbstate to TLBSTATE_OK
   5.263 + * 1b2) test_and_set the cpu bit in cpu_vm_mask.
   5.264 + * 	Atomically set the bit [other cpus will start sending flush ipis],
   5.265 + * 	and test the bit.
   5.266 + * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
   5.267 + * 2) switch %%esp, ie current
   5.268 + *
   5.269 + * The interrupt must handle 2 special cases:
   5.270 + * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
   5.271 + * - the cpu performs speculative tlb reads, i.e. even if the cpu only
   5.272 + *   runs in kernel space, the cpu could load tlb entries for user space
   5.273 + *   pages.
   5.274 + *
   5.275 + * The good news is that cpu_tlbstate is local to each cpu, no
   5.276 + * write/read ordering problems.
   5.277 + */
   5.278 +
   5.279 +/*
   5.280 + * TLB flush IPI:
   5.281 + *
   5.282 + * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
   5.283 + * 2) Leave the mm if we are in the lazy tlb mode.
   5.284 + */
   5.285 +
   5.286 +irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
   5.287 +				     struct pt_regs *regs)
   5.288 +{
   5.289 +	unsigned long cpu;
   5.290 +
   5.291 +	cpu = get_cpu();
   5.292 +
   5.293 +	if (!cpu_isset(cpu, flush_cpumask))
   5.294 +		goto out;
   5.295 +		/* 
   5.296 +		 * This was a BUG() but until someone can quote me the
   5.297 +		 * line from the intel manual that guarantees an IPI to
   5.298 +		 * multiple CPUs is retried _only_ on the erroring CPUs
   5.299 +		 * its staying as a return
   5.300 +		 *
   5.301 +		 * BUG();
   5.302 +		 */
   5.303 +		 
   5.304 +	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
   5.305 +		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
   5.306 +			if (flush_va == FLUSH_ALL)
   5.307 +				local_flush_tlb();
   5.308 +			else
   5.309 +				__flush_tlb_one(flush_va);
   5.310 +		} else
   5.311 +			leave_mm(cpu);
   5.312 +	}
   5.313 +	smp_mb__before_clear_bit();
   5.314 +	cpu_clear(cpu, flush_cpumask);
   5.315 +	smp_mb__after_clear_bit();
   5.316 +out:
   5.317 +	put_cpu_no_resched();
   5.318 +
   5.319 +	return IRQ_HANDLED;
   5.320 +}
   5.321 +
   5.322 +static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
   5.323 +						unsigned long va)
   5.324 +{
   5.325 +	cpumask_t tmp;
   5.326 +	/*
   5.327 +	 * A couple of (to be removed) sanity checks:
   5.328 +	 *
   5.329 +	 * - we do not send IPIs to not-yet booted CPUs.
   5.330 +	 * - current CPU must not be in mask
   5.331 +	 * - mask must exist :)
   5.332 +	 */
   5.333 +	BUG_ON(cpus_empty(cpumask));
   5.334 +
   5.335 +	cpus_and(tmp, cpumask, cpu_online_map);
   5.336 +	BUG_ON(!cpus_equal(cpumask, tmp));
   5.337 +	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
   5.338 +	BUG_ON(!mm);
   5.339 +
   5.340 +	/*
   5.341 +	 * i'm not happy about this global shared spinlock in the
   5.342 +	 * MM hot path, but we'll see how contended it is.
   5.343 +	 * Temporarily this turns IRQs off, so that lockups are
   5.344 +	 * detected by the NMI watchdog.
   5.345 +	 */
   5.346 +	spin_lock(&tlbstate_lock);
   5.347 +	
   5.348 +	flush_mm = mm;
   5.349 +	flush_va = va;
   5.350 +#if NR_CPUS <= BITS_PER_LONG
   5.351 +	atomic_set_mask(cpumask, &flush_cpumask);
   5.352 +#else
   5.353 +	{
   5.354 +		int k;
   5.355 +		unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
   5.356 +		unsigned long *cpu_mask = (unsigned long *)&cpumask;
   5.357 +		for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
   5.358 +			atomic_set_mask(cpu_mask[k], &flush_mask[k]);
   5.359 +	}
   5.360 +#endif
   5.361 +	/*
   5.362 +	 * We have to send the IPI only to
   5.363 +	 * CPUs affected.
   5.364 +	 */
   5.365 +	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
   5.366 +
   5.367 +	while (!cpus_empty(flush_cpumask))
   5.368 +		/* nothing. lockup detection does not belong here */
   5.369 +		mb();
   5.370 +
   5.371 +	flush_mm = NULL;
   5.372 +	flush_va = 0;
   5.373 +	spin_unlock(&tlbstate_lock);
   5.374 +}
   5.375 +	
   5.376 +void flush_tlb_current_task(void)
   5.377 +{
   5.378 +	struct mm_struct *mm = current->mm;
   5.379 +	cpumask_t cpu_mask;
   5.380 +
   5.381 +	preempt_disable();
   5.382 +	cpu_mask = mm->cpu_vm_mask;
   5.383 +	cpu_clear(smp_processor_id(), cpu_mask);
   5.384 +
   5.385 +	local_flush_tlb();
   5.386 +	if (!cpus_empty(cpu_mask))
   5.387 +		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
   5.388 +	preempt_enable();
   5.389 +}
   5.390 +
   5.391 +void flush_tlb_mm (struct mm_struct * mm)
   5.392 +{
   5.393 +	cpumask_t cpu_mask;
   5.394 +
   5.395 +	preempt_disable();
   5.396 +	cpu_mask = mm->cpu_vm_mask;
   5.397 +	cpu_clear(smp_processor_id(), cpu_mask);
   5.398 +
   5.399 +	if (current->active_mm == mm) {
   5.400 +		if (current->mm)
   5.401 +			local_flush_tlb();
   5.402 +		else
   5.403 +			leave_mm(smp_processor_id());
   5.404 +	}
   5.405 +	if (!cpus_empty(cpu_mask))
   5.406 +		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
   5.407 +
   5.408 +	preempt_enable();
   5.409 +}
   5.410 +
   5.411 +void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
   5.412 +{
   5.413 +	struct mm_struct *mm = vma->vm_mm;
   5.414 +	cpumask_t cpu_mask;
   5.415 +
   5.416 +	preempt_disable();
   5.417 +	cpu_mask = mm->cpu_vm_mask;
   5.418 +	cpu_clear(smp_processor_id(), cpu_mask);
   5.419 +
   5.420 +	if (current->active_mm == mm) {
   5.421 +		if(current->mm)
   5.422 +			__flush_tlb_one(va);
   5.423 +		else
   5.424 +		 	leave_mm(smp_processor_id());
   5.425 +	}
   5.426 +
   5.427 +	if (!cpus_empty(cpu_mask))
   5.428 +		flush_tlb_others(cpu_mask, mm, va);
   5.429 +
   5.430 +	preempt_enable();
   5.431 +}
   5.432 +
   5.433 +static void do_flush_tlb_all(void* info)
   5.434 +{
   5.435 +	unsigned long cpu = smp_processor_id();
   5.436 +
   5.437 +	__flush_tlb_all();
   5.438 +	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
   5.439 +		leave_mm(cpu);
   5.440 +}
   5.441 +
   5.442 +void flush_tlb_all(void)
   5.443 +{
   5.444 +	on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
   5.445 +}
   5.446 +
   5.447 +/*
   5.448 + * this function sends a 'reschedule' IPI to another CPU.
   5.449 + * it goes straight through and wastes no time serializing
   5.450 + * anything. Worst case is that we lose a reschedule ...
   5.451 + */
   5.452 +void smp_send_reschedule(int cpu)
   5.453 +{
   5.454 +	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
   5.455 +}
   5.456 +
   5.457 +/*
   5.458 + * Structure and data for smp_call_function(). This is designed to minimise
   5.459 + * static memory requirements. It also looks cleaner.
   5.460 + */
   5.461 +static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
   5.462 +
   5.463 +struct call_data_struct {
   5.464 +	void (*func) (void *info);
   5.465 +	void *info;
   5.466 +	atomic_t started;
   5.467 +	atomic_t finished;
   5.468 +	int wait;
   5.469 +};
   5.470 +
   5.471 +static struct call_data_struct * call_data;
   5.472 +
   5.473 +/*
   5.474 + * this function sends a 'generic call function' IPI to all other CPUs
   5.475 + * in the system.
   5.476 + */
   5.477 +
   5.478 +int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
   5.479 +			int wait)
   5.480 +/*
   5.481 + * [SUMMARY] Run a function on all other CPUs.
   5.482 + * <func> The function to run. This must be fast and non-blocking.
   5.483 + * <info> An arbitrary pointer to pass to the function.
   5.484 + * <nonatomic> currently unused.
   5.485 + * <wait> If true, wait (atomically) until function has completed on other CPUs.
   5.486 + * [RETURNS] 0 on success, else a negative status code. Does not return until
   5.487 + * remote CPUs are nearly ready to execute <<func>> or are or have executed.
   5.488 + *
   5.489 + * You must not call this function with disabled interrupts or from a
   5.490 + * hardware interrupt handler or from a bottom half handler.
   5.491 + */
   5.492 +{
   5.493 +	struct call_data_struct data;
   5.494 +	int cpus = num_online_cpus()-1;
   5.495 +
   5.496 +	if (!cpus)
   5.497 +		return 0;
   5.498 +
   5.499 +	/* Can deadlock when called with interrupts disabled */
   5.500 +	WARN_ON(irqs_disabled());
   5.501 +
   5.502 +	data.func = func;
   5.503 +	data.info = info;
   5.504 +	atomic_set(&data.started, 0);
   5.505 +	data.wait = wait;
   5.506 +	if (wait)
   5.507 +		atomic_set(&data.finished, 0);
   5.508 +
   5.509 +	spin_lock(&call_lock);
   5.510 +	call_data = &data;
   5.511 +	mb();
   5.512 +	
   5.513 +	/* Send a message to all other CPUs and wait for them to respond */
   5.514 +	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
   5.515 +
   5.516 +	/* Wait for response */
   5.517 +	while (atomic_read(&data.started) != cpus)
   5.518 +		barrier();
   5.519 +
   5.520 +	if (wait)
   5.521 +		while (atomic_read(&data.finished) != cpus)
   5.522 +			barrier();
   5.523 +	spin_unlock(&call_lock);
   5.524 +
   5.525 +	return 0;
   5.526 +}
   5.527 +
   5.528 +static void stop_this_cpu (void * dummy)
   5.529 +{
   5.530 +	/*
   5.531 +	 * Remove this CPU:
   5.532 +	 */
   5.533 +	cpu_clear(smp_processor_id(), cpu_online_map);
   5.534 +	local_irq_disable();
   5.535 +#if 1
   5.536 +	xxprint("stop_this_cpu disable_local_APIC\n");
   5.537 +#else
   5.538 +	disable_local_APIC();
   5.539 +#endif
   5.540 +	if (cpu_data[smp_processor_id()].hlt_works_ok)
   5.541 +		for(;;) __asm__("hlt");
   5.542 +	for (;;);
   5.543 +}
   5.544 +
   5.545 +/*
   5.546 + * this function calls the 'stop' function on all other CPUs in the system.
   5.547 + */
   5.548 +
   5.549 +void smp_send_stop(void)
   5.550 +{
   5.551 +	smp_call_function(stop_this_cpu, NULL, 1, 0);
   5.552 +
   5.553 +	local_irq_disable();
   5.554 +#if 1
   5.555 +	xxprint("smp_send_stop disable_local_APIC\n");
   5.556 +#else
   5.557 +	disable_local_APIC();
   5.558 +#endif
   5.559 +	local_irq_enable();
   5.560 +}
   5.561 +
   5.562 +/*
   5.563 + * Reschedule call back. Nothing to do,
   5.564 + * all the work is done automatically when
   5.565 + * we return from the interrupt.
   5.566 + */
   5.567 +irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
   5.568 +				     struct pt_regs *regs)
   5.569 +{
   5.570 +
   5.571 +	return IRQ_HANDLED;
   5.572 +}
   5.573 +
   5.574 +#include <linux/kallsyms.h>
   5.575 +irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
   5.576 +					struct pt_regs *regs)
   5.577 +{
   5.578 +	void (*func) (void *info) = call_data->func;
   5.579 +	void *info = call_data->info;
   5.580 +	int wait = call_data->wait;
   5.581 +
   5.582 +	/*
   5.583 +	 * Notify initiating CPU that I've grabbed the data and am
   5.584 +	 * about to execute the function
   5.585 +	 */
   5.586 +	mb();
   5.587 +	atomic_inc(&call_data->started);
   5.588 +	/*
   5.589 +	 * At this point the info structure may be out of scope unless wait==1
   5.590 +	 */
   5.591 +	irq_enter();
   5.592 +	(*func)(info);
   5.593 +	irq_exit();
   5.594 +
   5.595 +	if (wait) {
   5.596 +		mb();
   5.597 +		atomic_inc(&call_data->finished);
   5.598 +	}
   5.599 +
   5.600 +	return IRQ_HANDLED;
   5.601 +}
   5.602 +
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/smpboot.c	Mon Dec 27 13:35:05 2004 +0000
     6.3 @@ -0,0 +1,1364 @@
     6.4 +/*
     6.5 + *	x86 SMP booting functions
     6.6 + *
     6.7 + *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
     6.8 + *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
     6.9 + *
    6.10 + *	Much of the core SMP work is based on previous work by Thomas Radke, to
    6.11 + *	whom a great many thanks are extended.
    6.12 + *
    6.13 + *	Thanks to Intel for making available several different Pentium,
    6.14 + *	Pentium Pro and Pentium-II/Xeon MP machines.
    6.15 + *	Original development of Linux SMP code supported by Caldera.
    6.16 + *
    6.17 + *	This code is released under the GNU General Public License version 2 or
    6.18 + *	later.
    6.19 + *
    6.20 + *	Fixes
    6.21 + *		Felix Koop	:	NR_CPUS used properly
    6.22 + *		Jose Renau	:	Handle single CPU case.
    6.23 + *		Alan Cox	:	By repeated request 8) - Total BogoMIPS report.
    6.24 + *		Greg Wright	:	Fix for kernel stacks panic.
    6.25 + *		Erich Boleyn	:	MP v1.4 and additional changes.
    6.26 + *	Matthias Sattler	:	Changes for 2.1 kernel map.
    6.27 + *	Michel Lespinasse	:	Changes for 2.1 kernel map.
    6.28 + *	Michael Chastain	:	Change trampoline.S to gnu as.
    6.29 + *		Alan Cox	:	Dumb bug: 'B' step PPro's are fine
    6.30 + *		Ingo Molnar	:	Added APIC timers, based on code
    6.31 + *					from Jose Renau
    6.32 + *		Ingo Molnar	:	various cleanups and rewrites
    6.33 + *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
    6.34 + *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
    6.35 + *		Martin J. Bligh	: 	Added support for multi-quad systems
    6.36 + *		Dave Jones	:	Report invalid combinations of Athlon CPUs.
    6.37 +*		Rusty Russell	:	Hacked into shape for new "hotplug" boot process. */
    6.38 +
    6.39 +#include <linux/module.h>
    6.40 +#include <linux/config.h>
    6.41 +#include <linux/init.h>
    6.42 +#include <linux/kernel.h>
    6.43 +
    6.44 +#include <linux/mm.h>
    6.45 +#include <linux/sched.h>
    6.46 +#include <linux/kernel_stat.h>
    6.47 +#include <linux/smp_lock.h>
    6.48 +#include <linux/irq.h>
    6.49 +#include <linux/bootmem.h>
    6.50 +
    6.51 +#include <linux/delay.h>
    6.52 +#include <linux/mc146818rtc.h>
    6.53 +#include <asm/tlbflush.h>
    6.54 +#include <asm/desc.h>
    6.55 +#include <asm/arch_hooks.h>
    6.56 +
    6.57 +#if 1
    6.58 +#define Dprintk(args...)
    6.59 +#else
    6.60 +#include <mach_apic.h>
    6.61 +#endif
    6.62 +#include <mach_wakecpu.h>
    6.63 +#include <smpboot_hooks.h>
    6.64 +
    6.65 +/* Set if we find a B stepping CPU */
    6.66 +static int __initdata smp_b_stepping;
    6.67 +
    6.68 +/* Number of siblings per CPU package */
    6.69 +int smp_num_siblings = 1;
    6.70 +int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
    6.71 +
    6.72 +/* bitmap of online cpus */
    6.73 +cpumask_t cpu_online_map;
    6.74 +
    6.75 +static cpumask_t cpu_callin_map;
    6.76 +cpumask_t cpu_callout_map;
    6.77 +static cpumask_t smp_commenced_mask;
    6.78 +
    6.79 +/* Per CPU bogomips and other parameters */
    6.80 +struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
    6.81 +
    6.82 +u8 x86_cpu_to_apicid[NR_CPUS] =
    6.83 +			{ [0 ... NR_CPUS-1] = 0xff };
    6.84 +EXPORT_SYMBOL(x86_cpu_to_apicid);
    6.85 +
    6.86 +/* Set when the idlers are all forked */
    6.87 +int smp_threads_ready;
    6.88 +
    6.89 +#if 0
    6.90 +/*
    6.91 + * Trampoline 80x86 program as an array.
    6.92 + */
    6.93 +
    6.94 +extern unsigned char trampoline_data [];
    6.95 +extern unsigned char trampoline_end  [];
    6.96 +static unsigned char *trampoline_base;
    6.97 +static int trampoline_exec;
    6.98 +
    6.99 +/*
   6.100 + * Currently trivial. Write the real->protected mode
   6.101 + * bootstrap into the page concerned. The caller
   6.102 + * has made sure it's suitably aligned.
   6.103 + */
   6.104 +
   6.105 +static unsigned long __init setup_trampoline(void)
   6.106 +{
   6.107 +	memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
   6.108 +	return virt_to_phys(trampoline_base);
   6.109 +}
   6.110 +#endif
   6.111 +
   6.112 +/*
   6.113 + * We are called very early to get the low memory for the
   6.114 + * SMP bootup trampoline page.
   6.115 + */
   6.116 +void __init smp_alloc_memory(void)
   6.117 +{
   6.118 +#if 1
   6.119 +	int cpu;
   6.120 +
   6.121 +	for (cpu = 1; cpu < NR_CPUS; cpu++) {
   6.122 +		cpu_gdt_descr[cpu].address = (unsigned long)
   6.123 +			alloc_bootmem_low_pages(PAGE_SIZE);
   6.124 +		/* XXX free unused pages later */
   6.125 +	}
   6.126 +#else
   6.127 +	trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
   6.128 +	/*
   6.129 +	 * Has to be in very low memory so we can execute
   6.130 +	 * real-mode AP code.
   6.131 +	 */
   6.132 +	if (__pa(trampoline_base) >= 0x9F000)
   6.133 +		BUG();
   6.134 +	/*
   6.135 +	 * Make the SMP trampoline executable:
   6.136 +	 */
   6.137 +	trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
   6.138 +#endif
   6.139 +}
   6.140 +
   6.141 +/*
   6.142 + * The bootstrap kernel entry code has set these up. Save them for
   6.143 + * a given CPU
   6.144 + */
   6.145 +
   6.146 +static void __init smp_store_cpu_info(int id)
   6.147 +{
   6.148 +	struct cpuinfo_x86 *c = cpu_data + id;
   6.149 +
   6.150 +	*c = boot_cpu_data;
   6.151 +	if (id!=0)
   6.152 +		identify_cpu(c);
   6.153 +	/*
   6.154 +	 * Mask B, Pentium, but not Pentium MMX
   6.155 +	 */
   6.156 +	if (c->x86_vendor == X86_VENDOR_INTEL &&
   6.157 +	    c->x86 == 5 &&
   6.158 +	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
   6.159 +	    c->x86_model <= 3)
   6.160 +		/*
   6.161 +		 * Remember we have B step Pentia with bugs
   6.162 +		 */
   6.163 +		smp_b_stepping = 1;
   6.164 +
   6.165 +	/*
   6.166 +	 * Certain Athlons might work (for various values of 'work') in SMP
   6.167 +	 * but they are not certified as MP capable.
   6.168 +	 */
   6.169 +	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
   6.170 +
   6.171 +		/* Athlon 660/661 is valid. */	
   6.172 +		if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
   6.173 +			goto valid_k7;
   6.174 +
   6.175 +		/* Duron 670 is valid */
   6.176 +		if ((c->x86_model==7) && (c->x86_mask==0))
   6.177 +			goto valid_k7;
   6.178 +
   6.179 +		/*
   6.180 +		 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
   6.181 +		 * It's worth noting that the A5 stepping (662) of some Athlon XP's
   6.182 +		 * have the MP bit set.
   6.183 +		 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
   6.184 +		 */
   6.185 +		if (((c->x86_model==6) && (c->x86_mask>=2)) ||
   6.186 +		    ((c->x86_model==7) && (c->x86_mask>=1)) ||
   6.187 +		     (c->x86_model> 7))
   6.188 +			if (cpu_has_mp)
   6.189 +				goto valid_k7;
   6.190 +
   6.191 +		/* If we get here, it's not a certified SMP capable AMD system. */
   6.192 +		tainted |= TAINT_UNSAFE_SMP;
   6.193 +	}
   6.194 +
   6.195 +valid_k7:
   6.196 +	;
   6.197 +}
   6.198 +
   6.199 +#if 0
   6.200 +/*
   6.201 + * TSC synchronization.
   6.202 + *
   6.203 + * We first check whether all CPUs have their TSC's synchronized,
   6.204 + * then we print a warning if not, and always resync.
   6.205 + */
   6.206 +
   6.207 +static atomic_t tsc_start_flag = ATOMIC_INIT(0);
   6.208 +static atomic_t tsc_count_start = ATOMIC_INIT(0);
   6.209 +static atomic_t tsc_count_stop = ATOMIC_INIT(0);
   6.210 +static unsigned long long tsc_values[NR_CPUS];
   6.211 +
   6.212 +#define NR_LOOPS 5
   6.213 +
   6.214 +static void __init synchronize_tsc_bp (void)
   6.215 +{
   6.216 +	int i;
   6.217 +	unsigned long long t0;
   6.218 +	unsigned long long sum, avg;
   6.219 +	long long delta;
   6.220 +	unsigned long one_usec;
   6.221 +	int buggy = 0;
   6.222 +
   6.223 +	printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
   6.224 +
   6.225 +	/* convert from kcyc/sec to cyc/usec */
   6.226 +	one_usec = cpu_khz / 1000;
   6.227 +
   6.228 +	atomic_set(&tsc_start_flag, 1);
   6.229 +	wmb();
   6.230 +
   6.231 +	/*
   6.232 +	 * We loop a few times to get a primed instruction cache,
   6.233 +	 * then the last pass is more or less synchronized and
   6.234 +	 * the BP and APs set their cycle counters to zero all at
   6.235 +	 * once. This reduces the chance of having random offsets
   6.236 +	 * between the processors, and guarantees that the maximum
   6.237 +	 * delay between the cycle counters is never bigger than
   6.238 +	 * the latency of information-passing (cachelines) between
   6.239 +	 * two CPUs.
   6.240 +	 */
   6.241 +	for (i = 0; i < NR_LOOPS; i++) {
   6.242 +		/*
   6.243 +		 * all APs synchronize but they loop on '== num_cpus'
   6.244 +		 */
   6.245 +		while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
   6.246 +			mb();
   6.247 +		atomic_set(&tsc_count_stop, 0);
   6.248 +		wmb();
   6.249 +		/*
   6.250 +		 * this lets the APs save their current TSC:
   6.251 +		 */
   6.252 +		atomic_inc(&tsc_count_start);
   6.253 +
   6.254 +		rdtscll(tsc_values[smp_processor_id()]);
   6.255 +		/*
   6.256 +		 * We clear the TSC in the last loop:
   6.257 +		 */
   6.258 +		if (i == NR_LOOPS-1)
   6.259 +			write_tsc(0, 0);
   6.260 +
   6.261 +		/*
   6.262 +		 * Wait for all APs to leave the synchronization point:
   6.263 +		 */
   6.264 +		while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
   6.265 +			mb();
   6.266 +		atomic_set(&tsc_count_start, 0);
   6.267 +		wmb();
   6.268 +		atomic_inc(&tsc_count_stop);
   6.269 +	}
   6.270 +
   6.271 +	sum = 0;
   6.272 +	for (i = 0; i < NR_CPUS; i++) {
   6.273 +		if (cpu_isset(i, cpu_callout_map)) {
   6.274 +			t0 = tsc_values[i];
   6.275 +			sum += t0;
   6.276 +		}
   6.277 +	}
   6.278 +	avg = sum;
   6.279 +	do_div(avg, num_booting_cpus());
   6.280 +
   6.281 +	sum = 0;
   6.282 +	for (i = 0; i < NR_CPUS; i++) {
   6.283 +		if (!cpu_isset(i, cpu_callout_map))
   6.284 +			continue;
   6.285 +		delta = tsc_values[i] - avg;
   6.286 +		if (delta < 0)
   6.287 +			delta = -delta;
   6.288 +		/*
   6.289 +		 * We report bigger than 2 microseconds clock differences.
   6.290 +		 */
   6.291 +		if (delta > 2*one_usec) {
   6.292 +			long realdelta;
   6.293 +			if (!buggy) {
   6.294 +				buggy = 1;
   6.295 +				printk("\n");
   6.296 +			}
   6.297 +			realdelta = delta;
   6.298 +			do_div(realdelta, one_usec);
   6.299 +			if (tsc_values[i] < avg)
   6.300 +				realdelta = -realdelta;
   6.301 +
   6.302 +			printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
   6.303 +		}
   6.304 +
   6.305 +		sum += delta;
   6.306 +	}
   6.307 +	if (!buggy)
   6.308 +		printk("passed.\n");
   6.309 +}
   6.310 +
   6.311 +static void __init synchronize_tsc_ap (void)
   6.312 +{
   6.313 +	int i;
   6.314 +
   6.315 +	/*
   6.316 +	 * Not every cpu is online at the time
   6.317 +	 * this gets called, so we first wait for the BP to
   6.318 +	 * finish SMP initialization:
   6.319 +	 */
   6.320 +	while (!atomic_read(&tsc_start_flag)) mb();
   6.321 +
   6.322 +	for (i = 0; i < NR_LOOPS; i++) {
   6.323 +		atomic_inc(&tsc_count_start);
   6.324 +		while (atomic_read(&tsc_count_start) != num_booting_cpus())
   6.325 +			mb();
   6.326 +
   6.327 +		rdtscll(tsc_values[smp_processor_id()]);
   6.328 +		if (i == NR_LOOPS-1)
   6.329 +			write_tsc(0, 0);
   6.330 +
   6.331 +		atomic_inc(&tsc_count_stop);
   6.332 +		while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
   6.333 +	}
   6.334 +}
   6.335 +#undef NR_LOOPS
   6.336 +#endif
   6.337 +
   6.338 +extern void calibrate_delay(void);
   6.339 +
   6.340 +static atomic_t init_deasserted;
   6.341 +
   6.342 +void __init smp_callin(void)
   6.343 +{
   6.344 +	int cpuid, phys_id;
   6.345 +	unsigned long timeout;
   6.346 +
   6.347 +#if 0
   6.348 +	/*
   6.349 +	 * If waken up by an INIT in an 82489DX configuration
   6.350 +	 * we may get here before an INIT-deassert IPI reaches
   6.351 +	 * our local APIC.  We have to wait for the IPI or we'll
   6.352 +	 * lock up on an APIC access.
   6.353 +	 */
   6.354 +	wait_for_init_deassert(&init_deasserted);
   6.355 +#endif
   6.356 +
   6.357 +	/*
   6.358 +	 * (This works even if the APIC is not enabled.)
   6.359 +	 */
   6.360 +	phys_id = smp_processor_id();
   6.361 +	cpuid = smp_processor_id();
   6.362 +	if (cpu_isset(cpuid, cpu_callin_map)) {
   6.363 +		printk("huh, phys CPU#%d, CPU#%d already present??\n",
   6.364 +					phys_id, cpuid);
   6.365 +		BUG();
   6.366 +	}
   6.367 +	Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
   6.368 +
   6.369 +	/*
   6.370 +	 * STARTUP IPIs are fragile beasts as they might sometimes
   6.371 +	 * trigger some glue motherboard logic. Complete APIC bus
   6.372 +	 * silence for 1 second, this overestimates the time the
   6.373 +	 * boot CPU is spending to send the up to 2 STARTUP IPIs
   6.374 +	 * by a factor of two. This should be enough.
   6.375 +	 */
   6.376 +
   6.377 +	/*
   6.378 +	 * Waiting 2s total for startup (udelay is not yet working)
   6.379 +	 */
   6.380 +	timeout = jiffies + 2*HZ;
   6.381 +	while (time_before(jiffies, timeout)) {
   6.382 +		/*
   6.383 +		 * Has the boot CPU finished it's STARTUP sequence?
   6.384 +		 */
   6.385 +		if (cpu_isset(cpuid, cpu_callout_map))
   6.386 +			break;
   6.387 +		rep_nop();
   6.388 +	}
   6.389 +
   6.390 +	if (!time_before(jiffies, timeout)) {
   6.391 +		printk("BUG: CPU%d started up but did not get a callout!\n",
   6.392 +			cpuid);
   6.393 +		BUG();
   6.394 +	}
   6.395 +
   6.396 +#if 0
   6.397 +	/*
   6.398 +	 * the boot CPU has finished the init stage and is spinning
   6.399 +	 * on callin_map until we finish. We are free to set up this
   6.400 +	 * CPU, first the APIC. (this is probably redundant on most
   6.401 +	 * boards)
   6.402 +	 */
   6.403 +
   6.404 +	Dprintk("CALLIN, before setup_local_APIC().\n");
   6.405 +	smp_callin_clear_local_apic();
   6.406 +	setup_local_APIC();
   6.407 +#endif
   6.408 +	map_cpu_to_logical_apicid();
   6.409 +
   6.410 +	local_irq_enable();
   6.411 +
   6.412 +	/*
   6.413 +	 * Get our bogomips.
   6.414 +	 */
   6.415 +	calibrate_delay();
   6.416 +	Dprintk("Stack at about %p\n",&cpuid);
   6.417 +
   6.418 +	/*
   6.419 +	 * Save our processor parameters
   6.420 +	 */
   6.421 + 	smp_store_cpu_info(cpuid);
   6.422 +
   6.423 +#if 0
   6.424 +	disable_APIC_timer();
   6.425 +#endif
   6.426 +	local_irq_disable();
   6.427 +	/*
   6.428 +	 * Allow the master to continue.
   6.429 +	 */
   6.430 +	cpu_set(cpuid, cpu_callin_map);
   6.431 +
   6.432 +#if 0
   6.433 +	/*
   6.434 +	 *      Synchronize the TSC with the BP
   6.435 +	 */
   6.436 +	if (cpu_has_tsc && cpu_khz)
   6.437 +		synchronize_tsc_ap();
   6.438 +#endif
   6.439 +}
   6.440 +
   6.441 +int cpucount;
   6.442 +
   6.443 +extern int cpu_idle(void);
   6.444 +
   6.445 +
   6.446 +static irqreturn_t local_debug_interrupt(int irq, void *dev_id,
   6.447 +					 struct pt_regs *regs)
   6.448 +{
   6.449 +
   6.450 +	return IRQ_HANDLED;
   6.451 +}
   6.452 +
   6.453 +static struct irqaction local_irq_debug = {
   6.454 +	local_debug_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ldebug",
   6.455 +	NULL, NULL
   6.456 +};
   6.457 +
   6.458 +void local_setup_debug(void)
   6.459 +{
   6.460 +	(void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &local_irq_debug);
   6.461 +}
   6.462 +
   6.463 +
   6.464 +extern void local_setup_timer(void);
   6.465 +
   6.466 +/*
   6.467 + * Activate a secondary processor.
   6.468 + */
   6.469 +int __init start_secondary(void *unused)
   6.470 +{
   6.471 +	/*
   6.472 +	 * Dont put anything before smp_callin(), SMP
   6.473 +	 * booting is too fragile that we want to limit the
   6.474 +	 * things done here to the most necessary things.
   6.475 +	 */
   6.476 +	cpu_init();
   6.477 +	smp_callin();
   6.478 +	while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
   6.479 +		rep_nop();
   6.480 +	local_setup_timer();
   6.481 +	local_setup_debug();	/* XXX */
   6.482 +	smp_intr_init();
   6.483 +	local_irq_enable();
   6.484 +	/*
   6.485 +	 * low-memory mappings have been cleared, flush them from
   6.486 +	 * the local TLBs too.
   6.487 +	 */
   6.488 +	local_flush_tlb();
   6.489 +	cpu_set(smp_processor_id(), cpu_online_map);
   6.490 +	wmb();
   6.491 +	if (0) {
   6.492 +		char *msg2 = "delay2\n";
   6.493 +		int timeout;
   6.494 +		for (timeout = 0; timeout < 50000; timeout++) {
   6.495 +			udelay(1000);
   6.496 +			if (timeout == 2000) {
   6.497 +				(void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
   6.498 +				timeout = 0;
   6.499 +			}
   6.500 +		}
   6.501 +	}
   6.502 +	return cpu_idle();
   6.503 +}
   6.504 +
   6.505 +/*
   6.506 + * Everything has been set up for the secondary
   6.507 + * CPUs - they just need to reload everything
   6.508 + * from the task structure
   6.509 + * This function must not return.
   6.510 + */
   6.511 +void __init initialize_secondary(void)
   6.512 +{
   6.513 +	/*
   6.514 +	 * We don't actually need to load the full TSS,
   6.515 +	 * basically just the stack pointer and the eip.
   6.516 +	 */
   6.517 +
   6.518 +	asm volatile(
   6.519 +		"movl %0,%%esp\n\t"
   6.520 +		"jmp *%1"
   6.521 +		:
   6.522 +		:"r" (current->thread.esp),"r" (current->thread.eip));
   6.523 +}
   6.524 +
   6.525 +extern struct {
   6.526 +	void * esp;
   6.527 +	unsigned short ss;
   6.528 +} stack_start;
   6.529 +
   6.530 +#ifdef CONFIG_NUMA
   6.531 +
   6.532 +/* which logical CPUs are on which nodes */
   6.533 +cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
   6.534 +				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
   6.535 +/* which node each logical CPU is on */
   6.536 +int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
   6.537 +EXPORT_SYMBOL(cpu_2_node);
   6.538 +
   6.539 +/* set up a mapping between cpu and node. */
   6.540 +static inline void map_cpu_to_node(int cpu, int node)
   6.541 +{
   6.542 +	printk("Mapping cpu %d to node %d\n", cpu, node);
   6.543 +	cpu_set(cpu, node_2_cpu_mask[node]);
   6.544 +	cpu_2_node[cpu] = node;
   6.545 +}
   6.546 +
   6.547 +/* undo a mapping between cpu and node. */
   6.548 +static inline void unmap_cpu_to_node(int cpu)
   6.549 +{
   6.550 +	int node;
   6.551 +
   6.552 +	printk("Unmapping cpu %d from all nodes\n", cpu);
   6.553 +	for (node = 0; node < MAX_NUMNODES; node ++)
   6.554 +		cpu_clear(cpu, node_2_cpu_mask[node]);
   6.555 +	cpu_2_node[cpu] = 0;
   6.556 +}
   6.557 +#else /* !CONFIG_NUMA */
   6.558 +
   6.559 +#define map_cpu_to_node(cpu, node)	({})
   6.560 +#define unmap_cpu_to_node(cpu)	({})
   6.561 +
   6.562 +#endif /* CONFIG_NUMA */
   6.563 +
   6.564 +u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
   6.565 +
   6.566 +void map_cpu_to_logical_apicid(void)
   6.567 +{
   6.568 +	int cpu = smp_processor_id();
   6.569 +	int apicid = smp_processor_id();
   6.570 +
   6.571 +	cpu_2_logical_apicid[cpu] = apicid;
   6.572 +	map_cpu_to_node(cpu, apicid_to_node(apicid));
   6.573 +}
   6.574 +
   6.575 +void unmap_cpu_to_logical_apicid(int cpu)
   6.576 +{
   6.577 +	cpu_2_logical_apicid[cpu] = BAD_APICID;
   6.578 +	unmap_cpu_to_node(cpu);
   6.579 +}
   6.580 +
   6.581 +#if APIC_DEBUG
   6.582 +static inline void __inquire_remote_apic(int apicid)
   6.583 +{
   6.584 +	int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
   6.585 +	char *names[] = { "ID", "VERSION", "SPIV" };
   6.586 +	int timeout, status;
   6.587 +
   6.588 +	printk("Inquiring remote APIC #%d...\n", apicid);
   6.589 +
   6.590 +	for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
   6.591 +		printk("... APIC #%d %s: ", apicid, names[i]);
   6.592 +
   6.593 +		/*
   6.594 +		 * Wait for idle.
   6.595 +		 */
   6.596 +		apic_wait_icr_idle();
   6.597 +
   6.598 +		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
   6.599 +		apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
   6.600 +
   6.601 +		timeout = 0;
   6.602 +		do {
   6.603 +			udelay(100);
   6.604 +			status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
   6.605 +		} while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
   6.606 +
   6.607 +		switch (status) {
   6.608 +		case APIC_ICR_RR_VALID:
   6.609 +			status = apic_read(APIC_RRR);
   6.610 +			printk("%08x\n", status);
   6.611 +			break;
   6.612 +		default:
   6.613 +			printk("failed\n");
   6.614 +		}
   6.615 +	}
   6.616 +}
   6.617 +#endif
   6.618 +
   6.619 +#if 0
   6.620 +#ifdef WAKE_SECONDARY_VIA_NMI
   6.621 +/* 
   6.622 + * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
   6.623 + * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
   6.624 + * won't ... remember to clear down the APIC, etc later.
   6.625 + */
   6.626 +static int __init
   6.627 +wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
   6.628 +{
   6.629 +	unsigned long send_status = 0, accept_status = 0;
   6.630 +	int timeout, maxlvt;
   6.631 +
   6.632 +	/* Target chip */
   6.633 +	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
   6.634 +
   6.635 +	/* Boot on the stack */
   6.636 +	/* Kick the second */
   6.637 +	apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
   6.638 +
   6.639 +	Dprintk("Waiting for send to finish...\n");
   6.640 +	timeout = 0;
   6.641 +	do {
   6.642 +		Dprintk("+");
   6.643 +		udelay(100);
   6.644 +		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
   6.645 +	} while (send_status && (timeout++ < 1000));
   6.646 +
   6.647 +	/*
   6.648 +	 * Give the other CPU some time to accept the IPI.
   6.649 +	 */
   6.650 +	udelay(200);
   6.651 +	/*
   6.652 +	 * Due to the Pentium erratum 3AP.
   6.653 +	 */
   6.654 +	maxlvt = get_maxlvt();
   6.655 +	if (maxlvt > 3) {
   6.656 +		apic_read_around(APIC_SPIV);
   6.657 +		apic_write(APIC_ESR, 0);
   6.658 +	}
   6.659 +	accept_status = (apic_read(APIC_ESR) & 0xEF);
   6.660 +	Dprintk("NMI sent.\n");
   6.661 +
   6.662 +	if (send_status)
   6.663 +		printk("APIC never delivered???\n");
   6.664 +	if (accept_status)
   6.665 +		printk("APIC delivery error (%lx).\n", accept_status);
   6.666 +
   6.667 +	return (send_status | accept_status);
   6.668 +}
   6.669 +#endif	/* WAKE_SECONDARY_VIA_NMI */
   6.670 +
   6.671 +#ifdef WAKE_SECONDARY_VIA_INIT
   6.672 +static int __init
   6.673 +wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
   6.674 +{
   6.675 +	unsigned long send_status = 0, accept_status = 0;
   6.676 +	int maxlvt, timeout, num_starts, j;
   6.677 +
   6.678 +	/*
   6.679 +	 * Be paranoid about clearing APIC errors.
   6.680 +	 */
   6.681 +	if (APIC_INTEGRATED(apic_version[phys_apicid])) {
   6.682 +		apic_read_around(APIC_SPIV);
   6.683 +		apic_write(APIC_ESR, 0);
   6.684 +		apic_read(APIC_ESR);
   6.685 +	}
   6.686 +
   6.687 +	Dprintk("Asserting INIT.\n");
   6.688 +
   6.689 +	/*
   6.690 +	 * Turn INIT on target chip
   6.691 +	 */
   6.692 +	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
   6.693 +
   6.694 +	/*
   6.695 +	 * Send IPI
   6.696 +	 */
   6.697 +	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
   6.698 +				| APIC_DM_INIT);
   6.699 +
   6.700 +	Dprintk("Waiting for send to finish...\n");
   6.701 +	timeout = 0;
   6.702 +	do {
   6.703 +		Dprintk("+");
   6.704 +		udelay(100);
   6.705 +		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
   6.706 +	} while (send_status && (timeout++ < 1000));
   6.707 +
   6.708 +	mdelay(10);
   6.709 +
   6.710 +	Dprintk("Deasserting INIT.\n");
   6.711 +
   6.712 +	/* Target chip */
   6.713 +	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
   6.714 +
   6.715 +	/* Send IPI */
   6.716 +	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
   6.717 +
   6.718 +	Dprintk("Waiting for send to finish...\n");
   6.719 +	timeout = 0;
   6.720 +	do {
   6.721 +		Dprintk("+");
   6.722 +		udelay(100);
   6.723 +		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
   6.724 +	} while (send_status && (timeout++ < 1000));
   6.725 +
   6.726 +	atomic_set(&init_deasserted, 1);
   6.727 +
   6.728 +	/*
   6.729 +	 * Should we send STARTUP IPIs ?
   6.730 +	 *
   6.731 +	 * Determine this based on the APIC version.
   6.732 +	 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
   6.733 +	 */
   6.734 +	if (APIC_INTEGRATED(apic_version[phys_apicid]))
   6.735 +		num_starts = 2;
   6.736 +	else
   6.737 +		num_starts = 0;
   6.738 +
   6.739 +	/*
   6.740 +	 * Run STARTUP IPI loop.
   6.741 +	 */
   6.742 +	Dprintk("#startup loops: %d.\n", num_starts);
   6.743 +
   6.744 +	maxlvt = get_maxlvt();
   6.745 +
   6.746 +	for (j = 1; j <= num_starts; j++) {
   6.747 +		Dprintk("Sending STARTUP #%d.\n",j);
   6.748 +		apic_read_around(APIC_SPIV);
   6.749 +		apic_write(APIC_ESR, 0);
   6.750 +		apic_read(APIC_ESR);
   6.751 +		Dprintk("After apic_write.\n");
   6.752 +
   6.753 +		/*
   6.754 +		 * STARTUP IPI
   6.755 +		 */
   6.756 +
   6.757 +		/* Target chip */
   6.758 +		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
   6.759 +
   6.760 +		/* Boot on the stack */
   6.761 +		/* Kick the second */
   6.762 +		apic_write_around(APIC_ICR, APIC_DM_STARTUP
   6.763 +					| (start_eip >> 12));
   6.764 +
   6.765 +		/*
   6.766 +		 * Give the other CPU some time to accept the IPI.
   6.767 +		 */
   6.768 +		udelay(300);
   6.769 +
   6.770 +		Dprintk("Startup point 1.\n");
   6.771 +
   6.772 +		Dprintk("Waiting for send to finish...\n");
   6.773 +		timeout = 0;
   6.774 +		do {
   6.775 +			Dprintk("+");
   6.776 +			udelay(100);
   6.777 +			send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
   6.778 +		} while (send_status && (timeout++ < 1000));
   6.779 +
   6.780 +		/*
   6.781 +		 * Give the other CPU some time to accept the IPI.
   6.782 +		 */
   6.783 +		udelay(200);
   6.784 +		/*
   6.785 +		 * Due to the Pentium erratum 3AP.
   6.786 +		 */
   6.787 +		if (maxlvt > 3) {
   6.788 +			apic_read_around(APIC_SPIV);
   6.789 +			apic_write(APIC_ESR, 0);
   6.790 +		}
   6.791 +		accept_status = (apic_read(APIC_ESR) & 0xEF);
   6.792 +		if (send_status || accept_status)
   6.793 +			break;
   6.794 +	}
   6.795 +	Dprintk("After Startup.\n");
   6.796 +
   6.797 +	if (send_status)
   6.798 +		printk("APIC never delivered???\n");
   6.799 +	if (accept_status)
   6.800 +		printk("APIC delivery error (%lx).\n", accept_status);
   6.801 +
   6.802 +	return (send_status | accept_status);
   6.803 +}
   6.804 +#endif	/* WAKE_SECONDARY_VIA_INIT */
   6.805 +#endif
   6.806 +
   6.807 +extern cpumask_t cpu_initialized;
   6.808 +
   6.809 +static int __init do_boot_cpu(int apicid)
   6.810 +/*
   6.811 + * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
   6.812 + * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
   6.813 + * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
   6.814 + */
   6.815 +{
   6.816 +	struct task_struct *idle;
   6.817 +	unsigned long boot_error;
   6.818 +	int timeout, cpu;
   6.819 +	unsigned long start_eip;
   6.820 +#if 0
   6.821 +	unsigned short nmi_high = 0, nmi_low = 0;
   6.822 +#endif
   6.823 +	full_execution_context_t ctxt;
   6.824 +	extern void startup_32_smp(void);
   6.825 +	extern void hypervisor_callback(void);
   6.826 +	extern void failsafe_callback(void);
   6.827 +	extern int smp_trap_init(trap_info_t *);
   6.828 +	int i;
   6.829 +
   6.830 +	cpu = ++cpucount;
   6.831 +	/*
   6.832 +	 * We can't use kernel_thread since we must avoid to
   6.833 +	 * reschedule the child.
   6.834 +	 */
   6.835 +	idle = fork_idle(cpu);
   6.836 +	if (IS_ERR(idle))
   6.837 +		panic("failed fork for CPU %d", cpu);
   6.838 +	idle->thread.eip = (unsigned long) start_secondary;
   6.839 +	/* start_eip had better be page-aligned! */
   6.840 +	start_eip = (unsigned long)startup_32_smp;
   6.841 +
   6.842 +	/* So we see what's up   */
   6.843 +	printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
   6.844 +	/* Stack for startup_32 can be just as for start_secondary onwards */
   6.845 +	stack_start.esp = (void *) idle->thread.esp;
   6.846 +
   6.847 +	irq_ctx_init(cpu);
   6.848 +
   6.849 +	/*
   6.850 +	 * This grunge runs the startup process for
   6.851 +	 * the targeted processor.
   6.852 +	 */
   6.853 +
   6.854 +	atomic_set(&init_deasserted, 0);
   6.855 +
   6.856 +#if 1
   6.857 +	if (cpu_gdt_descr[0].size > PAGE_SIZE)
   6.858 +		BUG();
   6.859 +	cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
   6.860 +	memcpy((void *)cpu_gdt_descr[cpu].address,
   6.861 +	       (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
   6.862 +		memset((char *)cpu_gdt_descr[cpu].address +
   6.863 +		       FIRST_RESERVED_GDT_ENTRY * 8, 0,
   6.864 +		       NR_RESERVED_GDT_ENTRIES * 8);
   6.865 +
   6.866 +	memset(&ctxt, 0, sizeof(ctxt));
   6.867 +
   6.868 +	ctxt.cpu_ctxt.ds = __USER_DS;
   6.869 +	ctxt.cpu_ctxt.es = __USER_DS;
   6.870 +	ctxt.cpu_ctxt.fs = 0;
   6.871 +	ctxt.cpu_ctxt.gs = 0;
   6.872 +	ctxt.cpu_ctxt.ss = __KERNEL_DS;
   6.873 +	ctxt.cpu_ctxt.cs = __KERNEL_CS;
   6.874 +	ctxt.cpu_ctxt.eip = start_eip;
   6.875 +	ctxt.cpu_ctxt.esp = idle->thread.esp;
   6.876 +	ctxt.cpu_ctxt.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
   6.877 +
   6.878 +	/* FPU is set up to default initial state. */
   6.879 +	memset(ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
   6.880 +
   6.881 +	/* Virtual IDT is empty at start-of-day. */
   6.882 +	for ( i = 0; i < 256; i++ )
   6.883 +	{
   6.884 +		ctxt.trap_ctxt[i].vector = i;
   6.885 +		ctxt.trap_ctxt[i].cs     = FLAT_GUESTOS_CS;
   6.886 +	}
   6.887 +	ctxt.fast_trap_idx = smp_trap_init(ctxt.trap_ctxt);
   6.888 +
   6.889 +	/* No LDT. */
   6.890 +	ctxt.ldt_ents = 0;
   6.891 +
   6.892 +	{
   6.893 +		unsigned long va;
   6.894 +		int f;
   6.895 +
   6.896 +		for (va = cpu_gdt_descr[cpu].address, f = 0;
   6.897 +		     va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
   6.898 +		     va += PAGE_SIZE, f++) {
   6.899 +			ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
   6.900 +			make_page_readonly((void *)va);
   6.901 +		}
   6.902 +		ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
   6.903 +		flush_page_update_queue();
   6.904 +	}
   6.905 +
   6.906 +	/* Ring 1 stack is the initial stack. */
   6.907 +	ctxt.guestos_ss  = __KERNEL_DS;
   6.908 +	ctxt.guestos_esp = idle->thread.esp;
   6.909 +
   6.910 +	/* Callback handlers. */
   6.911 +	ctxt.event_callback_cs     = __KERNEL_CS;
   6.912 +	ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
   6.913 +	ctxt.failsafe_callback_cs  = __KERNEL_CS;
   6.914 +	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
   6.915 +
   6.916 +	ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir);
   6.917 +
   6.918 +	boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
   6.919 +
   6.920 +	if (!boot_error) {
   6.921 +		/*
   6.922 +		 * allow APs to start initializing.
   6.923 +		 */
   6.924 +		Dprintk("Before Callout %d.\n", cpu);
   6.925 +		cpu_set(cpu, cpu_callout_map);
   6.926 +		Dprintk("After Callout %d.\n", cpu);
   6.927 +
   6.928 +		/*
   6.929 +		 * Wait 5s total for a response
   6.930 +		 */
   6.931 +		for (timeout = 0; timeout < 50000; timeout++) {
   6.932 +			if (cpu_isset(cpu, cpu_callin_map))
   6.933 +				break;	/* It has booted */
   6.934 +			udelay(100);
   6.935 +		}
   6.936 +
   6.937 +		if (cpu_isset(cpu, cpu_callin_map)) {
   6.938 +			/* number CPUs logically, starting from 1 (BSP is 0) */
   6.939 +			Dprintk("OK.\n");
   6.940 +			printk("CPU%d: ", cpu);
   6.941 +			print_cpu_info(&cpu_data[cpu]);
   6.942 +			Dprintk("CPU has booted.\n");
   6.943 +		} else {
   6.944 +			boot_error= 1;
   6.945 +		}
   6.946 +	}
   6.947 +	x86_cpu_to_apicid[cpu] = apicid;
   6.948 +	if (boot_error) {
   6.949 +		/* Try to put things back the way they were before ... */
   6.950 +		unmap_cpu_to_logical_apicid(cpu);
   6.951 +		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
   6.952 +		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
   6.953 +		cpucount--;
   6.954 +	}
   6.955 +
   6.956 +#else
   6.957 +	Dprintk("Setting warm reset code and vector.\n");
   6.958 +
   6.959 +	store_NMI_vector(&nmi_high, &nmi_low);
   6.960 +
   6.961 +	smpboot_setup_warm_reset_vector(start_eip);
   6.962 +
   6.963 +	/*
   6.964 +	 * Starting actual IPI sequence...
   6.965 +	 */
   6.966 +	boot_error = wakeup_secondary_cpu(apicid, start_eip);
   6.967 +
   6.968 +	if (!boot_error) {
   6.969 +		/*
   6.970 +		 * allow APs to start initializing.
   6.971 +		 */
   6.972 +		Dprintk("Before Callout %d.\n", cpu);
   6.973 +		cpu_set(cpu, cpu_callout_map);
   6.974 +		Dprintk("After Callout %d.\n", cpu);
   6.975 +
   6.976 +		/*
   6.977 +		 * Wait 5s total for a response
   6.978 +		 */
   6.979 +		for (timeout = 0; timeout < 50000; timeout++) {
   6.980 +			if (cpu_isset(cpu, cpu_callin_map))
   6.981 +				break;	/* It has booted */
   6.982 +			udelay(100);
   6.983 +		}
   6.984 +
   6.985 +		if (cpu_isset(cpu, cpu_callin_map)) {
   6.986 +			/* number CPUs logically, starting from 1 (BSP is 0) */
   6.987 +			Dprintk("OK.\n");
   6.988 +			printk("CPU%d: ", cpu);
   6.989 +			print_cpu_info(&cpu_data[cpu]);
   6.990 +			Dprintk("CPU has booted.\n");
   6.991 +		} else {
   6.992 +			boot_error= 1;
   6.993 +			if (*((volatile unsigned char *)trampoline_base)
   6.994 +					== 0xA5)
   6.995 +				/* trampoline started but...? */
   6.996 +				printk("Stuck ??\n");
   6.997 +			else
   6.998 +				/* trampoline code not run */
   6.999 +				printk("Not responding.\n");
  6.1000 +			inquire_remote_apic(apicid);
  6.1001 +		}
  6.1002 +	}
  6.1003 +	x86_cpu_to_apicid[cpu] = apicid;
  6.1004 +	if (boot_error) {
  6.1005 +		/* Try to put things back the way they were before ... */
  6.1006 +		unmap_cpu_to_logical_apicid(cpu);
  6.1007 +		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
  6.1008 +		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
  6.1009 +		cpucount--;
  6.1010 +	}
  6.1011 +
  6.1012 +	/* mark "stuck" area as not stuck */
  6.1013 +	*((volatile unsigned long *)trampoline_base) = 0;
  6.1014 +#endif
  6.1015 +
  6.1016 +	return boot_error;
  6.1017 +}
  6.1018 +
  6.1019 +cycles_t cacheflush_time;
  6.1020 +unsigned long cache_decay_ticks;
  6.1021 +
  6.1022 +static void smp_tune_scheduling (void)
  6.1023 +{
  6.1024 +	unsigned long cachesize;       /* kB   */
  6.1025 +	unsigned long bandwidth = 350; /* MB/s */
  6.1026 +	/*
  6.1027 +	 * Rough estimation for SMP scheduling, this is the number of
  6.1028 +	 * cycles it takes for a fully memory-limited process to flush
  6.1029 +	 * the SMP-local cache.
  6.1030 +	 *
  6.1031 +	 * (For a P5 this pretty much means we will choose another idle
  6.1032 +	 *  CPU almost always at wakeup time (this is due to the small
  6.1033 +	 *  L1 cache), on PIIs it's around 50-100 usecs, depending on
  6.1034 +	 *  the cache size)
  6.1035 +	 */
  6.1036 +
  6.1037 +	if (!cpu_khz) {
  6.1038 +		/*
  6.1039 +		 * this basically disables processor-affinity
  6.1040 +		 * scheduling on SMP without a TSC.
  6.1041 +		 */
  6.1042 +		cacheflush_time = 0;
  6.1043 +		return;
  6.1044 +	} else {
  6.1045 +		cachesize = boot_cpu_data.x86_cache_size;
  6.1046 +		if (cachesize == -1) {
  6.1047 +			cachesize = 16; /* Pentiums, 2x8kB cache */
  6.1048 +			bandwidth = 100;
  6.1049 +		}
  6.1050 +
  6.1051 +		cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
  6.1052 +	}
  6.1053 +
  6.1054 +	cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
  6.1055 +
  6.1056 +	printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
  6.1057 +		(long)cacheflush_time/(cpu_khz/1000),
  6.1058 +		((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
  6.1059 +	printk("task migration cache decay timeout: %ld msecs.\n",
  6.1060 +		cache_decay_ticks);
  6.1061 +}
  6.1062 +
  6.1063 +/*
  6.1064 + * Cycle through the processors sending APIC IPIs to boot each.
  6.1065 + */
  6.1066 +
  6.1067 +#if 0
  6.1068 +static int boot_cpu_logical_apicid;
  6.1069 +#endif
  6.1070 +/* Where the IO area was mapped on multiquad, always 0 otherwise */
  6.1071 +void *xquad_portio;
  6.1072 +
  6.1073 +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
  6.1074 +
  6.1075 +static void __init smp_boot_cpus(unsigned int max_cpus)
  6.1076 +{
  6.1077 +	int cpu, kicked;
  6.1078 +	unsigned long bogosum = 0;
  6.1079 +#if 0
  6.1080 +	int apicid, bit;
  6.1081 +#endif
  6.1082 +
  6.1083 +	/*
  6.1084 +	 * Setup boot CPU information
  6.1085 +	 */
  6.1086 +	smp_store_cpu_info(0); /* Final full version of the data */
  6.1087 +	printk("CPU%d: ", 0);
  6.1088 +	print_cpu_info(&cpu_data[0]);
  6.1089 +
  6.1090 +#if 0
  6.1091 +	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
  6.1092 +	boot_cpu_logical_apicid = logical_smp_processor_id();
  6.1093 +	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
  6.1094 +#else
  6.1095 +	// boot_cpu_physical_apicid = 0;
  6.1096 +	// boot_cpu_logical_apicid = 0;
  6.1097 +	x86_cpu_to_apicid[0] = 0;
  6.1098 +#endif
  6.1099 +
  6.1100 +	current_thread_info()->cpu = 0;
  6.1101 +	smp_tune_scheduling();
  6.1102 +	cpus_clear(cpu_sibling_map[0]);
  6.1103 +	cpu_set(0, cpu_sibling_map[0]);
  6.1104 +
  6.1105 +	/*
  6.1106 +	 * If we couldn't find an SMP configuration at boot time,
  6.1107 +	 * get out of here now!
  6.1108 +	 */
  6.1109 +	if (!smp_found_config /* && !acpi_lapic) */) {
  6.1110 +		printk(KERN_NOTICE "SMP motherboard not detected.\n");
  6.1111 +		smpboot_clear_io_apic_irqs();
  6.1112 +#if 0
  6.1113 +		phys_cpu_present_map = physid_mask_of_physid(0);
  6.1114 +		if (APIC_init_uniprocessor())
  6.1115 +			printk(KERN_NOTICE "Local APIC not detected."
  6.1116 +					   " Using dummy APIC emulation.\n");
  6.1117 +#endif
  6.1118 +		map_cpu_to_logical_apicid();
  6.1119 +		return;
  6.1120 +	}
  6.1121 +
  6.1122 +#if 0
  6.1123 +	/*
  6.1124 +	 * Should not be necessary because the MP table should list the boot
  6.1125 +	 * CPU too, but we do it for the sake of robustness anyway.
  6.1126 +	 * Makes no sense to do this check in clustered apic mode, so skip it
  6.1127 +	 */
  6.1128 +	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
  6.1129 +		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
  6.1130 +				boot_cpu_physical_apicid);
  6.1131 +		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
  6.1132 +	}
  6.1133 +
  6.1134 +	/*
  6.1135 +	 * If we couldn't find a local APIC, then get out of here now!
  6.1136 +	 */
  6.1137 +	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
  6.1138 +		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
  6.1139 +			boot_cpu_physical_apicid);
  6.1140 +		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
  6.1141 +		smpboot_clear_io_apic_irqs();
  6.1142 +		phys_cpu_present_map = physid_mask_of_physid(0);
  6.1143 +		return;
  6.1144 +	}
  6.1145 +
  6.1146 +	verify_local_APIC();
  6.1147 +#endif
  6.1148 +
  6.1149 +	/*
  6.1150 +	 * If SMP should be disabled, then really disable it!
  6.1151 +	 */
  6.1152 +	if (!max_cpus) {
  6.1153 +		HYPERVISOR_shared_info->n_vcpu = 1;
  6.1154 +		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
  6.1155 +		smpboot_clear_io_apic_irqs();
  6.1156 +#if 0
  6.1157 +		phys_cpu_present_map = physid_mask_of_physid(0);
  6.1158 +#endif
  6.1159 +		return;
  6.1160 +	}
  6.1161 +
  6.1162 +	smp_intr_init();
  6.1163 +
  6.1164 +#if 0
  6.1165 +	connect_bsp_APIC();
  6.1166 +	setup_local_APIC();
  6.1167 +#endif
  6.1168 +	map_cpu_to_logical_apicid();
  6.1169 +#if 0
  6.1170 +
  6.1171 +
  6.1172 +	setup_portio_remap();
  6.1173 +
  6.1174 +	/*
  6.1175 +	 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
  6.1176 +	 *
  6.1177 +	 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
  6.1178 +	 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
  6.1179 +	 * clustered apic ID.
  6.1180 +	 */
  6.1181 +	Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
  6.1182 +#endif
  6.1183 +	Dprintk("CPU present map: %lx\n",
  6.1184 +		(1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
  6.1185 +
  6.1186 +	kicked = 1;
  6.1187 +	for (cpu = 1; kicked < NR_CPUS &&
  6.1188 +		     cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
  6.1189 +		if (max_cpus <= cpucount+1)
  6.1190 +			continue;
  6.1191 +
  6.1192 +		if (do_boot_cpu(cpu))
  6.1193 +			printk("CPU #%d not responding - cannot use it.\n",
  6.1194 +								cpu);
  6.1195 +		else
  6.1196 +			++kicked;
  6.1197 +	}
  6.1198 +
  6.1199 +#if 0
  6.1200 +	/*
  6.1201 +	 * Cleanup possible dangling ends...
  6.1202 +	 */
  6.1203 +	smpboot_restore_warm_reset_vector();
  6.1204 +#endif
  6.1205 +
  6.1206 +	/*
  6.1207 +	 * Allow the user to impress friends.
  6.1208 +	 */
  6.1209 +	Dprintk("Before bogomips.\n");
  6.1210 +	for (cpu = 0; cpu < NR_CPUS; cpu++)
  6.1211 +		if (cpu_isset(cpu, cpu_callout_map))
  6.1212 +			bogosum += cpu_data[cpu].loops_per_jiffy;
  6.1213 +	printk(KERN_INFO
  6.1214 +		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
  6.1215 +		cpucount+1,
  6.1216 +		bogosum/(500000/HZ),
  6.1217 +		(bogosum/(5000/HZ))%100);
  6.1218 +	
  6.1219 +	Dprintk("Before bogocount - setting activated=1.\n");
  6.1220 +
  6.1221 +	if (smp_b_stepping)
  6.1222 +		printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
  6.1223 +
  6.1224 +	/*
  6.1225 +	 * Don't taint if we are running SMP kernel on a single non-MP
  6.1226 +	 * approved Athlon
  6.1227 +	 */
  6.1228 +	if (tainted & TAINT_UNSAFE_SMP) {
  6.1229 +		if (cpucount)
  6.1230 +			printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
  6.1231 +		else
  6.1232 +			tainted &= ~TAINT_UNSAFE_SMP;
  6.1233 +	}
  6.1234 +
  6.1235 +	Dprintk("Boot done.\n");
  6.1236 +
  6.1237 +	/*
  6.1238 +	 * construct cpu_sibling_map[], so that we can tell sibling CPUs
  6.1239 +	 * efficiently.
  6.1240 +	 */
  6.1241 +	for (cpu = 0; cpu < NR_CPUS; cpu++)
  6.1242 +		cpus_clear(cpu_sibling_map[cpu]);
  6.1243 +
  6.1244 +	for (cpu = 0; cpu < NR_CPUS; cpu++) {
  6.1245 +		int siblings = 0;
  6.1246 +		int i;
  6.1247 +		if (!cpu_isset(cpu, cpu_callout_map))
  6.1248 +			continue;
  6.1249 +
  6.1250 +		if (smp_num_siblings > 1) {
  6.1251 +			for (i = 0; i < NR_CPUS; i++) {
  6.1252 +				if (!cpu_isset(i, cpu_callout_map))
  6.1253 +					continue;
  6.1254 +				if (phys_proc_id[cpu] == phys_proc_id[i]) {
  6.1255 +					siblings++;
  6.1256 +					cpu_set(i, cpu_sibling_map[cpu]);
  6.1257 +				}
  6.1258 +			}
  6.1259 +		} else {
  6.1260 +			siblings++;
  6.1261 +			cpu_set(cpu, cpu_sibling_map[cpu]);
  6.1262 +		}
  6.1263 +
  6.1264 +		if (siblings != smp_num_siblings)
  6.1265 +			printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
  6.1266 +	}
  6.1267 +
  6.1268 +#if 0
  6.1269 +	if (nmi_watchdog == NMI_LOCAL_APIC)
  6.1270 +		check_nmi_watchdog();
  6.1271 +
  6.1272 +	smpboot_setup_io_apic();
  6.1273 +
  6.1274 +	setup_boot_APIC_clock();
  6.1275 +
  6.1276 +	/*
  6.1277 +	 * Synchronize the TSC with the AP
  6.1278 +	 */
  6.1279 +	if (cpu_has_tsc && cpucount && cpu_khz)
  6.1280 +		synchronize_tsc_bp();
  6.1281 +#endif
  6.1282 +}
  6.1283 +
  6.1284 +/* These are wrappers to interface to the new boot process.  Someone
  6.1285 +   who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
  6.1286 +void __init smp_prepare_cpus(unsigned int max_cpus)
  6.1287 +{
  6.1288 +	smp_boot_cpus(max_cpus);
  6.1289 +}
  6.1290 +
  6.1291 +void __devinit smp_prepare_boot_cpu(void)
  6.1292 +{
  6.1293 +	cpu_set(smp_processor_id(), cpu_online_map);
  6.1294 +	cpu_set(smp_processor_id(), cpu_callout_map);
  6.1295 +}
  6.1296 +
  6.1297 +int __devinit __cpu_up(unsigned int cpu)
  6.1298 +{
  6.1299 +	/* This only works at boot for x86.  See "rewrite" above. */
  6.1300 +	if (cpu_isset(cpu, smp_commenced_mask)) {
  6.1301 +		local_irq_enable();
  6.1302 +		return -ENOSYS;
  6.1303 +	}
  6.1304 +
  6.1305 +	/* In case one didn't come up */
  6.1306 +	if (!cpu_isset(cpu, cpu_callin_map)) {
  6.1307 +		local_irq_enable();
  6.1308 +		return -EIO;
  6.1309 +	}
  6.1310 +
  6.1311 +	local_irq_enable();
  6.1312 +	/* Unleash the CPU! */
  6.1313 +	cpu_set(cpu, smp_commenced_mask);
  6.1314 +	while (!cpu_isset(cpu, cpu_online_map))
  6.1315 +		mb();
  6.1316 +	return 0;
  6.1317 +}
  6.1318 +
  6.1319 +void __init smp_cpus_done(unsigned int max_cpus)
  6.1320 +{
  6.1321 +#if 1
  6.1322 +#else
  6.1323 +#ifdef CONFIG_X86_IO_APIC
  6.1324 +	setup_ioapic_dest();
  6.1325 +#endif
  6.1326 +	zap_low_mappings();
  6.1327 +	/*
  6.1328 +	 * Disable executability of the SMP trampoline:
  6.1329 +	 */
  6.1330 +	set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
  6.1331 +#endif
  6.1332 +}
  6.1333 +
  6.1334 +extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
  6.1335 +
  6.1336 +static struct irqaction reschedule_irq = {
  6.1337 +	smp_reschedule_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "reschedule",
  6.1338 +	NULL, NULL
  6.1339 +};
  6.1340 +
  6.1341 +extern irqreturn_t smp_invalidate_interrupt(int, void *, struct pt_regs *);
  6.1342 +
  6.1343 +static struct irqaction invalidate_irq = {
  6.1344 +	smp_invalidate_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "invalidate",
  6.1345 +	NULL, NULL
  6.1346 +};
  6.1347 +
  6.1348 +extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
  6.1349 +
  6.1350 +static struct irqaction call_function_irq = {
  6.1351 +	smp_call_function_interrupt, SA_INTERRUPT, CPU_MASK_NONE,
  6.1352 +	"call_function", NULL, NULL
  6.1353 +};
  6.1354 +
  6.1355 +void __init smp_intr_init(void)
  6.1356 +{
  6.1357 +
  6.1358 +	(void)setup_irq(
  6.1359 +	    bind_ipi_on_cpu_to_irq(smp_processor_id(), RESCHEDULE_VECTOR),
  6.1360 +	    &reschedule_irq);
  6.1361 +	(void)setup_irq(
  6.1362 +	    bind_ipi_on_cpu_to_irq(smp_processor_id(), INVALIDATE_TLB_VECTOR),
  6.1363 +	    &invalidate_irq);
  6.1364 +	(void)setup_irq(
  6.1365 +	    bind_ipi_on_cpu_to_irq(smp_processor_id(), CALL_FUNCTION_VECTOR),
  6.1366 +	    &call_function_irq);
  6.1367 +}
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/kernel/smp.c	Mon Dec 27 13:35:05 2004 +0000
     7.3 @@ -0,0 +1,19 @@
     7.4 +/* Copyright (C) 2004, Christian Limpach */
     7.5 +
     7.6 +#include <linux/init.h>
     7.7 +#include <linux/kernel.h>
     7.8 +#include <linux/threads.h>
     7.9 +
    7.10 +unsigned int __initdata maxcpus = NR_CPUS;
    7.11 +
    7.12 +
    7.13 +/*
    7.14 + * the frequency of the profiling timer can be changed
    7.15 + * by writing a multiplier value into /proc/profile.
    7.16 + */
    7.17 +int setup_profiling_timer(unsigned int multiplier)
    7.18 +{
    7.19 +	printk("setup_profiling_timer\n");
    7.20 +
    7.21 +	return 0;
    7.22 +}
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/Makefile	Mon Dec 27 13:35:05 2004 +0000
     8.3 @@ -0,0 +1,3 @@
     8.4 +
     8.5 +obj-y	:= blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o 
     8.6 +
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c	Mon Dec 27 13:35:05 2004 +0000
     9.3 @@ -0,0 +1,86 @@
     9.4 +/******************************************************************************
     9.5 + * blktap.c
     9.6 + * 
     9.7 + * XenLinux virtual block-device tap.
     9.8 + * 
     9.9 + * Copyright (c) 2004, Andrew Warfield
    9.10 + *
    9.11 + * Based on the original split block driver:
    9.12 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
    9.13 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
    9.14 + * Copyright (c) 2004, Christian Limpach
    9.15 + * 
    9.16 + * Note that unlike the split block driver code, this driver has been developed
    9.17 + * strictly for Linux 2.6
    9.18 + */
    9.19 +
    9.20 +#include "blktap.h"
    9.21 +
    9.22 +int __init xlblk_init(void)
    9.23 +{
    9.24 +    ctrl_msg_t               cmsg;
    9.25 +    blkif_fe_driver_status_t fe_st;
    9.26 +    blkif_be_driver_status_t be_st;
    9.27 +
    9.28 +    printk(KERN_INFO "Initialising Xen block tap device\n");
    9.29 +
    9.30 +    DPRINTK("   tap - Backend connection init:\n");
    9.31 +
    9.32 +
    9.33 +    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
    9.34 +                                    CALLBACK_IN_BLOCKING_CONTEXT);
    9.35 +
    9.36 +    /* Send a driver-UP notification to the domain controller. */
    9.37 +    cmsg.type      = CMSG_BLKIF_FE;
    9.38 +    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
    9.39 +    cmsg.length    = sizeof(blkif_fe_driver_status_t);
    9.40 +    fe_st.status   = BLKIF_DRIVER_STATUS_UP;
    9.41 +    memcpy(cmsg.msg, &fe_st, sizeof(fe_st));
    9.42 +    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
    9.43 +
    9.44 +    DPRINTK("   tap - Frontend connection init:\n");
    9.45 +    
    9.46 +    active_reqs_init();
    9.47 +    
    9.48 +    ptfe_blkif.status = DISCONNECTED;
    9.49 +
    9.50 +    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, 
    9.51 +                                    CALLBACK_IN_BLOCKING_CONTEXT);
    9.52 +
    9.53 +    /* Send a driver-UP notification to the domain controller. */
    9.54 +    cmsg.type      = CMSG_BLKIF_BE;
    9.55 +    cmsg.subtype   = CMSG_BLKIF_BE_DRIVER_STATUS;
    9.56 +    cmsg.length    = sizeof(blkif_be_driver_status_t);
    9.57 +    be_st.status   = BLKIF_DRIVER_STATUS_UP;
    9.58 +    memcpy(cmsg.msg, &be_st, sizeof(be_st));
    9.59 +    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
    9.60 +
    9.61 +    DPRINTK("   tap - Userland channel init:\n");
    9.62 +
    9.63 +    blktap_init();
    9.64 +
    9.65 +    DPRINTK("Blkif tap device initialized.\n");
    9.66 +
    9.67 +    return 0;
    9.68 +}
    9.69 +
    9.70 +void blkdev_suspend(void)
    9.71 +{
    9.72 +}
    9.73 +
    9.74 +void blkdev_resume(void)
    9.75 +{
    9.76 +    ctrl_msg_t               cmsg;
    9.77 +    blkif_fe_driver_status_t st;    
    9.78 +
    9.79 +    /* Send a driver-UP notification to the domain controller. */
    9.80 +    cmsg.type      = CMSG_BLKIF_FE;
    9.81 +    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
    9.82 +    cmsg.length    = sizeof(blkif_fe_driver_status_t);
    9.83 +    st.status      = BLKIF_DRIVER_STATUS_UP;
    9.84 +    memcpy(cmsg.msg, &st, sizeof(st));
    9.85 +    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
    9.86 +}
    9.87 +
    9.88 +
    9.89 +__initcall(xlblk_init);
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h	Mon Dec 27 13:35:05 2004 +0000
    10.3 @@ -0,0 +1,254 @@
    10.4 +/*
    10.5 + * blktap.h
    10.6 + * 
    10.7 + * Interfaces for the Xen block tap driver.
    10.8 + * 
    10.9 + * (c) 2004, Andrew Warfield, University of Cambridge
   10.10 + * 
   10.11 + */
   10.12 +
   10.13 +#ifndef __BLKTAP_H__
   10.14 +#define __BLKTAP_H__
   10.15 +
   10.16 +#include <linux/version.h>
   10.17 +#include <linux/blkdev.h>
   10.18 +#include <linux/config.h>
   10.19 +#include <linux/sched.h>
   10.20 +#include <linux/interrupt.h>
   10.21 +#include <asm-xen/ctrl_if.h>
   10.22 +#include <linux/slab.h>
   10.23 +#include <linux/blkdev.h>
   10.24 +#include <asm/io.h>
   10.25 +#include <asm/setup.h>
   10.26 +#include <asm/pgalloc.h>
   10.27 +#include <asm-xen/hypervisor.h>
   10.28 +#include <asm-xen/xen-public/io/blkif.h>
   10.29 +
   10.30 +/* -------[ debug / pretty printing ]--------------------------------- */
   10.31 +
   10.32 +#if 0
   10.33 +#define ASSERT(_p) \
   10.34 +    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
   10.35 +    __LINE__, __FILE__); *(int*)0=0; }
   10.36 +#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
   10.37 +                           __FILE__ , __LINE__ , ## _a )
   10.38 +#else
   10.39 +#define ASSERT(_p) ((void)0)
   10.40 +#define DPRINTK(_f, _a...) ((void)0)
   10.41 +#endif
   10.42 +
   10.43 +#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
   10.44 +
   10.45 +/* -------[ connection / request tracking ]--------------------------- */
   10.46 +
   10.47 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   10.48 +#define VMALLOC_VMADDR(x) ((unsigned long)(x))
   10.49 +#endif
   10.50 +
   10.51 +extern spinlock_t blkif_io_lock;
   10.52 +
   10.53 +typedef struct blkif_st {
   10.54 +    /* Unique identifier for this interface. */
   10.55 +    domid_t          domid;
   10.56 +    unsigned int     handle;
   10.57 +    /* Physical parameters of the comms window. */
   10.58 +    unsigned long    shmem_frame;
   10.59 +    unsigned int     evtchn;
   10.60 +    int              irq;
   10.61 +    /* Comms information. */
   10.62 +    blkif_ring_t    *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
   10.63 +    BLKIF_RING_IDX     blk_req_cons;  /* Request consumer. */
   10.64 +    BLKIF_RING_IDX     blk_resp_prod; /* Private version of resp. producer. */
   10.65 +    
   10.66 +    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
   10.67 +    /*
   10.68 +     * DISCONNECT response is deferred until pending requests are ack'ed.
   10.69 +     * We therefore need to store the id from the original request.
   10.70 +     */    u8               disconnect_rspid;
   10.71 +    struct blkif_st *hash_next;
   10.72 +    struct list_head blkdev_list;
   10.73 +    spinlock_t       blk_ring_lock;
   10.74 +    atomic_t         refcnt;
   10.75 +    
   10.76 +    struct work_struct work;
   10.77 +} blkif_t;
   10.78 +
   10.79 +typedef struct {
   10.80 +    blkif_t       *blkif;
   10.81 +    unsigned long  id;
   10.82 +    int            nr_pages;
   10.83 +    unsigned long  mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
   10.84 +    unsigned long  virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
   10.85 +    int            next_free;
   10.86 +} active_req_t;
   10.87 +
   10.88 +
   10.89 +/* -------[ block ring structs ]-------------------------------------- */
   10.90 +
   10.91 +/* Types of ring. */
   10.92 +#define BLKIF_REQ_RING_TYPE 1
   10.93 +#define BLKIF_RSP_RING_TYPE 2
   10.94 +
   10.95 +/* generic ring struct. */
   10.96 +typedef struct blkif_generic_ring_struct {
   10.97 +    int type;
   10.98 +} blkif_generic_ring_t;
   10.99 +
  10.100 +/* A requestor's view of a ring. */
  10.101 +typedef struct blkif_req_ring_struct {
  10.102 +
  10.103 +    int type;                    /* Will be BLKIF_REQ_RING_TYPE        */
  10.104 +    BLKIF_RING_IDX req_prod;     /* PRIVATE req_prod index             */
  10.105 +    BLKIF_RING_IDX rsp_cons;     /* Response consumer index            */
  10.106 +    blkif_ring_t *ring;          /* Pointer to shared ring struct      */
  10.107 +
  10.108 +} blkif_req_ring_t;
  10.109 +
  10.110 +#define BLKIF_REQ_RING_INIT { BLKIF_REQ_RING_TYPE, 0, 0, 0 }
  10.111 +
  10.112 +/* A responder's view of a ring. */
  10.113 +typedef struct blkif_rsp_ring_struct {
  10.114 +
  10.115 +    int type;       
  10.116 +    BLKIF_RING_IDX rsp_prod;     /* PRIVATE rsp_prod index             */
  10.117 +    BLKIF_RING_IDX req_cons;     /* Request consumer index             */
  10.118 +    blkif_ring_t *ring;          /* Pointer to shared ring struct      */
  10.119 +
  10.120 +} blkif_rsp_ring_t;
  10.121 +
  10.122 +#define BLKIF_RSP_RING_INIT = { BLKIF_RSP_RING_TYPE, 0, 0, 0 }
  10.123 +
  10.124 +#define RING(a) (blkif_generic_ring_t *)(a)
  10.125 +
  10.126 +inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring);
  10.127 +
  10.128 +
  10.129 +/* -------[ interposition -> character device interface ]------------- */
  10.130 +
  10.131 +/* /dev/xen/blktap resides at device number major=10, minor=200        */ 
  10.132 +#define BLKTAP_MINOR 202
  10.133 +
  10.134 +/* size of the extra VMA area to map in attached pages. */
  10.135 +#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
  10.136 +
  10.137 +/* blktap IOCTLs:                                                      */
  10.138 +#define BLKTAP_IOCTL_KICK_FE         1
  10.139 +#define BLKTAP_IOCTL_KICK_BE         2
  10.140 +#define BLKTAP_IOCTL_SETMODE         3
  10.141 +
  10.142 +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
  10.143 +#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
  10.144 +#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
  10.145 +#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
  10.146 +#define BLKTAP_MODE_COPY_FE          0x00000004
  10.147 +#define BLKTAP_MODE_COPY_BE          0x00000008
  10.148 +#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
  10.149 +#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
  10.150 +
  10.151 +#define BLKTAP_MODE_INTERPOSE \
  10.152 +           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
  10.153 +
  10.154 +#define BLKTAP_MODE_COPY_BOTH \
  10.155 +           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
  10.156 +
  10.157 +#define BLKTAP_MODE_COPY_BOTH_PAGES \
  10.158 +           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
  10.159 +
  10.160 +static inline int BLKTAP_MODE_VALID(unsigned long arg)
  10.161 +{
  10.162 +    return (
  10.163 +        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
  10.164 +        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
  10.165 +        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
  10.166 +        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
  10.167 +        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
  10.168 +        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
  10.169 +        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
  10.170 +        );
  10.171 +}
  10.172 +
  10.173 +
  10.174 +
  10.175 +/* -------[ Mappings to User VMA ]------------------------------------ */
  10.176 +#define MAX_PENDING_REQS 64
  10.177 +#define BATCH_PER_DOMAIN 16
  10.178 +extern struct vm_area_struct *blktap_vma;
  10.179 +
  10.180 +/* The following are from blkback.c and should probably be put in a
  10.181 + * header and included from there.
  10.182 + * The mmap area described here is where attached data pages eill be mapped.
  10.183 + */
  10.184 + 
  10.185 +extern unsigned long mmap_vstart;
  10.186 +#define MMAP_PAGES_PER_REQUEST \
  10.187 +    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
  10.188 +#define MMAP_PAGES             \
  10.189 +    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
  10.190 +#define MMAP_VADDR(_req,_seg)                        \
  10.191 +    (mmap_vstart +                                   \
  10.192 +     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
  10.193 +     ((_seg) * PAGE_SIZE))
  10.194 +
  10.195 +/* immediately before the mmap area, we have a bunch of pages reserved
  10.196 + * for shared memory rings.
  10.197 + */
  10.198 +
  10.199 +#define RING_PAGES 128 
  10.200 +extern unsigned long rings_vstart;
  10.201 +
  10.202 +/* -------[ Here be globals ]----------------------------------------- */
  10.203 +
  10.204 +extern unsigned long blktap_mode;
  10.205 +
  10.206 +
  10.207 +/* blkif struct, containing ring to FE domain */
  10.208 +extern blkif_t ptfe_blkif; 
  10.209 +
  10.210 +/* Connection to a single backend domain. */
  10.211 +extern blkif_ring_t *blk_ptbe_ring;   /* Ring from the PT to the BE dom    */ 
  10.212 +extern BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */
  10.213 +extern BLKIF_RING_IDX ptbe_req_prod;  /* Private request producer.         */
  10.214 +
  10.215 +/* Rings up to user space. */ 
  10.216 +extern blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT;
  10.217 +extern blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT;
  10.218 +
  10.219 +/* Event channel to backend domain. */
  10.220 +extern unsigned int blkif_ptbe_evtchn;
  10.221 +
  10.222 +/* User ring status... this will soon vanish into a ring struct. */
  10.223 +extern unsigned long blktap_ring_ok;
  10.224 +
  10.225 +/* -------[ ...and function prototypes. ]----------------------------- */
  10.226 +
  10.227 +/* init function for character device interface.                       */
  10.228 +int blktap_init(void);
  10.229 +
  10.230 +/* interfaces to the char driver, passing messages to and from apps.   */
  10.231 +void blktap_kick_user(void);
  10.232 +int blktap_write_to_ring(blkif_request_t *req);
  10.233 +
  10.234 +
  10.235 +/* user ring access functions: */
  10.236 +int blktap_write_fe_ring(blkif_request_t *req);
  10.237 +int blktap_write_be_ring(blkif_response_t *rsp);
  10.238 +int blktap_read_fe_ring(void);
  10.239 +int blktap_read_be_ring(void);
  10.240 +
  10.241 +/* and the helpers they call: */
  10.242 +inline int write_resp_to_fe_ring(blkif_response_t *rsp);
  10.243 +inline void kick_fe_domain(void);
  10.244 +
  10.245 +inline int write_req_to_be_ring(blkif_request_t *req);
  10.246 +inline void kick_be_domain(void);
  10.247 +
  10.248 +/* Interrupt handlers. */
  10.249 +irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
  10.250 +                                  struct pt_regs *ptregs);
  10.251 +irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs);
  10.252 +
  10.253 +/* Control message receiver. */
  10.254 +extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
  10.255 +
  10.256 +#define __BLKINT_H__
  10.257 +#endif
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c	Mon Dec 27 13:35:05 2004 +0000
    11.3 @@ -0,0 +1,358 @@
    11.4 +/******************************************************************************
    11.5 + * blktap_controlmsg.c
    11.6 + * 
    11.7 + * XenLinux virtual block-device tap.
    11.8 + * Control interfaces to the frontend and backend drivers.
    11.9 + * 
   11.10 + * Copyright (c) 2004, Andrew Warfield
   11.11 + *
   11.12 + */
   11.13 + 
   11.14 +#include "blktap.h"
   11.15 +
   11.16 +#define BLKIF_STATE_CLOSED       0
   11.17 +#define BLKIF_STATE_DISCONNECTED 1
   11.18 +#define BLKIF_STATE_CONNECTED    2
   11.19 +
   11.20 +static char *blkif_state_name[] = {
   11.21 +    [BLKIF_STATE_CLOSED]       = "closed",
   11.22 +    [BLKIF_STATE_DISCONNECTED] = "disconnected",
   11.23 +    [BLKIF_STATE_CONNECTED]    = "connected",
   11.24 +};
   11.25 +
   11.26 +static char * blkif_status_name[] = {
   11.27 +    [BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
   11.28 +    [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
   11.29 +    [BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
   11.30 +    [BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
   11.31 +};
   11.32 +static unsigned int blkif_pt_state = BLKIF_STATE_CLOSED;
   11.33 +static unsigned blkif_ptbe_irq;
   11.34 +unsigned int blkif_ptbe_evtchn;
   11.35 +
   11.36 +/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
   11.37 +
   11.38 +
   11.39 +void blkif_ptfe_create(blkif_be_create_t *create)
   11.40 +{
   11.41 +    blkif_t      *blkif;
   11.42 +    domid_t       domid  = create->domid;
   11.43 +    unsigned int  handle = create->blkif_handle;
   11.44 +
   11.45 +
   11.46 +    /* May want to store info on the connecting domain here. */
   11.47 +
   11.48 +    DPRINTK("PT got BE_CREATE\n");
   11.49 +    blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */
   11.50 +
   11.51 +    /* blkif struct init code from blkback.c */
   11.52 +    memset(blkif, 0, sizeof(*blkif));
   11.53 +    blkif->domid  = domid;
   11.54 +    blkif->handle = handle;
   11.55 +    blkif->status = DISCONNECTED;    
   11.56 +    spin_lock_init(&blkif->blk_ring_lock);
   11.57 +    atomic_set(&blkif->refcnt, 0);
   11.58 +
   11.59 +    create->status = BLKIF_BE_STATUS_OKAY;
   11.60 +}
   11.61 +
   11.62 +
   11.63 +void blkif_ptfe_destroy(blkif_be_destroy_t *destroy)
   11.64 +{
   11.65 +    /* Clear anything that we initialized above. */
   11.66 +
   11.67 +    DPRINTK("PT got BE_DESTROY\n");
   11.68 +    destroy->status = BLKIF_BE_STATUS_OKAY;
   11.69 +}
   11.70 +
   11.71 +void blkif_ptfe_connect(blkif_be_connect_t *connect)
   11.72 +{
   11.73 +    domid_t       domid  = connect->domid;
   11.74 +    /*unsigned int  handle = connect->blkif_handle;*/
   11.75 +    unsigned int  evtchn = connect->evtchn;
   11.76 +    unsigned long shmem_frame = connect->shmem_frame;
   11.77 +    struct vm_struct *vma;
   11.78 +    pgprot_t      prot;
   11.79 +    int           error;
   11.80 +    blkif_t      *blkif;
   11.81 +
   11.82 +    DPRINTK("PT got BE_CONNECT\n");
   11.83 +
   11.84 +    blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */
   11.85 +
   11.86 +    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
   11.87 +    {
   11.88 +        connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
   11.89 +        return;
   11.90 +    }
   11.91 +
   11.92 +    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
   11.93 +    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
   11.94 +                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
   11.95 +                                    prot, domid);
   11.96 +    if ( error != 0 )
   11.97 +    {
   11.98 +        WPRINTK("BE_CONNECT: error! (%d)\n", error);
   11.99 +        if ( error == -ENOMEM ) 
  11.100 +            connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
  11.101 +        else if ( error == -EFAULT ) {
  11.102 +            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
  11.103 +            WPRINTK("BE_CONNECT: MAPPING error!\n");
  11.104 +        }
  11.105 +        else
  11.106 +            connect->status = BLKIF_BE_STATUS_ERROR;
  11.107 +        vfree(vma->addr);
  11.108 +        return;
  11.109 +    }
  11.110 +
  11.111 +    if ( blkif->status != DISCONNECTED )
  11.112 +    {
  11.113 +        connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
  11.114 +        vfree(vma->addr);
  11.115 +        return;
  11.116 +    }
  11.117 +
  11.118 +    blkif->evtchn        = evtchn;
  11.119 +    blkif->irq           = bind_evtchn_to_irq(evtchn);
  11.120 +    blkif->shmem_frame   = shmem_frame;
  11.121 +    blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
  11.122 +    blkif->status        = CONNECTED;
  11.123 +    /*blkif_get(blkif);*/
  11.124 +
  11.125 +    request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
  11.126 +
  11.127 +    connect->status = BLKIF_BE_STATUS_OKAY;
  11.128 +}
  11.129 +
  11.130 +void blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect)
  11.131 +{
  11.132 +    /*
  11.133 +     * don't actually set the passthrough to disconnected.
  11.134 +     * We just act as a pipe, and defer to the real ends to handle things like
  11.135 +     * recovery.
  11.136 +     */
  11.137 +
  11.138 +    DPRINTK("PT got BE_DISCONNECT\n");
  11.139 +
  11.140 +    disconnect->status = BLKIF_BE_STATUS_OKAY;
  11.141 +    return;
  11.142 +}
  11.143 +
  11.144 +/*-----[ Control Messages to/from Backend VM ]----------------------------*/
  11.145 +
  11.146 +/* Tell the controller to bring up the interface. */
  11.147 +static void blkif_ptbe_send_interface_connect(void)
  11.148 +{
  11.149 +    ctrl_msg_t cmsg = {
  11.150 +        .type    = CMSG_BLKIF_FE,
  11.151 +        .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
  11.152 +        .length  = sizeof(blkif_fe_interface_connect_t),
  11.153 +    };
  11.154 +    blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
  11.155 +    msg->handle      = 0;
  11.156 +    msg->shmem_frame = virt_to_machine(blk_ptbe_ring) >> PAGE_SHIFT;
  11.157 +    
  11.158 +    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
  11.159 +}
  11.160 +
  11.161 +static void blkif_ptbe_close(void)
  11.162 +{
  11.163 +}
  11.164 +
  11.165 +/* Move from CLOSED to DISCONNECTED state. */
  11.166 +static void blkif_ptbe_disconnect(void)
  11.167 +{
  11.168 +    blk_ptbe_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
  11.169 +    blk_ptbe_ring->req_prod = blk_ptbe_ring->resp_prod 
  11.170 +                            = ptbe_resp_cons = ptbe_req_prod = 0;
  11.171 +    blkif_pt_state  = BLKIF_STATE_DISCONNECTED;
  11.172 +    DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n");
  11.173 +    blkif_ptbe_send_interface_connect();
  11.174 +}
  11.175 +
  11.176 +static void blkif_ptbe_connect(blkif_fe_interface_status_t *status)
  11.177 +{
  11.178 +    int err = 0;
  11.179 +    
  11.180 +    blkif_ptbe_evtchn = status->evtchn;
  11.181 +    blkif_ptbe_irq    = bind_evtchn_to_irq(blkif_ptbe_evtchn);
  11.182 +
  11.183 +    err = request_irq(blkif_ptbe_irq, blkif_ptbe_int, 
  11.184 +                      SA_SAMPLE_RANDOM, "blkif", NULL);
  11.185 +    if ( err ) {
  11.186 +	WPRINTK("blkfront request_irq failed (%d)\n", err);
  11.187 +        return;
  11.188 +    } else {
  11.189 +	/* transtion to connected in case we need to do a 
  11.190 +           a partion probe on a whole disk */
  11.191 +        blkif_pt_state = BLKIF_STATE_CONNECTED;
  11.192 +    }
  11.193 +}
  11.194 +
  11.195 +static void unexpected(blkif_fe_interface_status_t *status)
  11.196 +{
  11.197 +    WPRINTK(" TAP: Unexpected blkif status %s in state %s\n", 
  11.198 +           blkif_status_name[status->status],
  11.199 +           blkif_state_name[blkif_pt_state]);
  11.200 +}
  11.201 +
  11.202 +static void blkif_ptbe_status(
  11.203 +    blkif_fe_interface_status_t *status)
  11.204 +{
  11.205 +    if ( status->handle != 0 )
  11.206 +    {
  11.207 +        DPRINTK("Status change on unsupported blkif %d\n",
  11.208 +               status->handle);
  11.209 +        return;
  11.210 +    }
  11.211 +
  11.212 +    DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]);
  11.213 +    
  11.214 +    switch ( status->status )
  11.215 +    {
  11.216 +    case BLKIF_INTERFACE_STATUS_CLOSED:
  11.217 +        switch ( blkif_pt_state )
  11.218 +        {
  11.219 +        case BLKIF_STATE_CLOSED:
  11.220 +            unexpected(status);
  11.221 +            break;
  11.222 +        case BLKIF_STATE_DISCONNECTED:
  11.223 +        case BLKIF_STATE_CONNECTED:
  11.224 +            unexpected(status);
  11.225 +            blkif_ptbe_close();
  11.226 +            break;
  11.227 +        }
  11.228 +        break;
  11.229 +        
  11.230 +    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
  11.231 +        switch ( blkif_pt_state )
  11.232 +        {
  11.233 +        case BLKIF_STATE_CLOSED:
  11.234 +            blkif_ptbe_disconnect();
  11.235 +            break;
  11.236 +        case BLKIF_STATE_DISCONNECTED:
  11.237 +        case BLKIF_STATE_CONNECTED:
  11.238 +            printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n");
  11.239 +            unexpected(status);
  11.240 +            break;
  11.241 +        }
  11.242 +        break;
  11.243 +        
  11.244 +    case BLKIF_INTERFACE_STATUS_CONNECTED:
  11.245 +        switch ( blkif_pt_state )
  11.246 +        {
  11.247 +        case BLKIF_STATE_CLOSED:
  11.248 +            unexpected(status);
  11.249 +            blkif_ptbe_disconnect();
  11.250 +            blkif_ptbe_connect(status);
  11.251 +            break;
  11.252 +        case BLKIF_STATE_DISCONNECTED:
  11.253 +            blkif_ptbe_connect(status);
  11.254 +            break;
  11.255 +        case BLKIF_STATE_CONNECTED:
  11.256 +            unexpected(status);
  11.257 +            blkif_ptbe_connect(status);
  11.258 +            break;
  11.259 +        }
  11.260 +        break;
  11.261 +
  11.262 +   case BLKIF_INTERFACE_STATUS_CHANGED:
  11.263 +        switch ( blkif_pt_state )
  11.264 +        {
  11.265 +        case BLKIF_STATE_CLOSED:
  11.266 +        case BLKIF_STATE_DISCONNECTED:
  11.267 +            unexpected(status);
  11.268 +            break;
  11.269 +        case BLKIF_STATE_CONNECTED:
  11.270 +            /* vbd_update(); */
  11.271 +            /* tap doesn't really get state changes... */
  11.272 +            unexpected(status);
  11.273 +            break;
  11.274 +        }
  11.275 +       break;
  11.276 +       
  11.277 +    default:
  11.278 +        DPRINTK("Status change to unknown value %d\n", status->status);
  11.279 +        break;
  11.280 +    }
  11.281 +}
  11.282 +
  11.283 +/*-----[ All control messages enter here: ]-------------------------------*/
  11.284 +
  11.285 +void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
  11.286 +{
  11.287 +    switch ( msg->type )
  11.288 +    {
  11.289 +    case CMSG_BLKIF_FE:
  11.290 +
  11.291 +        switch ( msg->subtype )
  11.292 +        {
  11.293 +        case CMSG_BLKIF_FE_INTERFACE_STATUS:
  11.294 +            if ( msg->length != sizeof(blkif_fe_interface_status_t) )
  11.295 +                goto parse_error;
  11.296 +            blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]);
  11.297 +            break;        
  11.298 +
  11.299 +        default:
  11.300 +            goto parse_error;
  11.301 +        }
  11.302 +
  11.303 +    case CMSG_BLKIF_BE:
  11.304 +        
  11.305 +        switch ( msg->subtype )
  11.306 +        {
  11.307 +        case CMSG_BLKIF_BE_CREATE:
  11.308 +            if ( msg->length != sizeof(blkif_be_create_t) )
  11.309 +                goto parse_error;
  11.310 +            blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]);
  11.311 +            break; 
  11.312 +        case CMSG_BLKIF_BE_DESTROY:
  11.313 +            if ( msg->length != sizeof(blkif_be_destroy_t) )
  11.314 +                goto parse_error;
  11.315 +            blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]);
  11.316 +            break;        
  11.317 +        case CMSG_BLKIF_BE_CONNECT:
  11.318 +            if ( msg->length != sizeof(blkif_be_connect_t) )
  11.319 +                goto parse_error;
  11.320 +            blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]);
  11.321 +            break;        
  11.322 +        case CMSG_BLKIF_BE_DISCONNECT:
  11.323 +            if ( msg->length != sizeof(blkif_be_disconnect_t) )
  11.324 +                goto parse_error;
  11.325 +            blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0]);
  11.326 +            break;        
  11.327 +
  11.328 +        /* We just ignore anything to do with vbds for now. */
  11.329 +        
  11.330 +        case CMSG_BLKIF_BE_VBD_CREATE:
  11.331 +            DPRINTK("PT got VBD_CREATE\n");
  11.332 +            ((blkif_be_vbd_create_t *)&msg->msg[0])->status 
  11.333 +                = BLKIF_BE_STATUS_OKAY;
  11.334 +            break;
  11.335 +        case CMSG_BLKIF_BE_VBD_DESTROY:
  11.336 +            DPRINTK("PT got VBD_DESTROY\n");
  11.337 +            ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status
  11.338 +                = BLKIF_BE_STATUS_OKAY;
  11.339 +            break;
  11.340 +        case CMSG_BLKIF_BE_VBD_GROW:
  11.341 +            DPRINTK("PT got VBD_GROW\n");
  11.342 +            ((blkif_be_vbd_grow_t *)&msg->msg[0])->status
  11.343 +                = BLKIF_BE_STATUS_OKAY;
  11.344 +            break;
  11.345 +        case CMSG_BLKIF_BE_VBD_SHRINK:
  11.346 +            DPRINTK("PT got VBD_SHRINK\n");
  11.347 +            ((blkif_be_vbd_shrink_t *)&msg->msg[0])->status
  11.348 +                = BLKIF_BE_STATUS_OKAY;
  11.349 +            break;
  11.350 +        default:
  11.351 +            goto parse_error;
  11.352 +        }
  11.353 +    }
  11.354 +
  11.355 +    ctrl_if_send_response(msg);
  11.356 +    return;
  11.357 +
  11.358 + parse_error:
  11.359 +    msg->length = 0;
  11.360 +    ctrl_if_send_response(msg);
  11.361 +}
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c	Mon Dec 27 13:35:05 2004 +0000
    12.3 @@ -0,0 +1,517 @@
    12.4 +/******************************************************************************
    12.5 + * blktap_datapath.c
    12.6 + * 
    12.7 + * XenLinux virtual block-device tap.
    12.8 + * Block request routing data path.
    12.9 + * 
   12.10 + * Copyright (c) 2004, Andrew Warfield
   12.11 + *
   12.12 + */
   12.13 + 
   12.14 +#include "blktap.h"
   12.15 +
   12.16 +/*-----[ The data paths ]-------------------------------------------------*/
   12.17 + 
   12.18 +/* Connections to the frontend domains.*/
   12.19 +blkif_t   ptfe_blkif; 
   12.20 + 
   12.21 +/* Connection to a single backend domain. */
   12.22 +blkif_ring_t *blk_ptbe_ring;   /* Ring from the PT to the BE dom    */ 
   12.23 +BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */
   12.24 +BLKIF_RING_IDX ptbe_req_prod;  /* Private request producer.         */
   12.25 +
   12.26 +/* Rings up to user space. */ 
   12.27 +blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT;
   12.28 +blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT;
   12.29 +
   12.30 +/*-----[ Ring helpers ]---------------------------------------------------*/
   12.31 +
   12.32 +inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring)
   12.33 +{
   12.34 +    if (ring->type == BLKIF_REQ_RING_TYPE) {
   12.35 +        blkif_req_ring_t *r = (blkif_req_ring_t *)ring;
   12.36 +        return ( ( r->req_prod - r->rsp_cons ) == BLKIF_RING_SIZE );
   12.37 +    }
   12.38 +    
   12.39 +    /* for now assume that there is always room in the response path. */
   12.40 +    return 0;
   12.41 +}
   12.42 +
   12.43 +/*-----[ Tracking active requests ]---------------------------------------*/
   12.44 +
   12.45 +/* this must be the same as MAX_PENDING_REQS in blkback.c */
   12.46 +#define MAX_ACTIVE_REQS 64
   12.47 +
   12.48 +active_req_t  active_reqs[MAX_ACTIVE_REQS];
   12.49 +unsigned char active_req_ring[MAX_ACTIVE_REQS];
   12.50 +spinlock_t    active_req_lock = SPIN_LOCK_UNLOCKED;
   12.51 +typedef unsigned int ACTIVE_RING_IDX;
   12.52 +ACTIVE_RING_IDX active_prod, active_cons;
   12.53 +#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
   12.54 +#define ACTIVE_IDX(_ar) (_ar - active_reqs)
   12.55 +
   12.56 +inline active_req_t *get_active_req(void) 
   12.57 +{
   12.58 +    ASSERT(active_cons != active_prod);    
   12.59 +    return &active_reqs[MASK_ACTIVE_IDX(active_cons++)];
   12.60 +}
   12.61 +
   12.62 +inline void free_active_req(active_req_t *ar) 
   12.63 +{
   12.64 +    unsigned long flags;
   12.65 +        
   12.66 +    spin_lock_irqsave(&active_req_lock, flags);
   12.67 +    active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
   12.68 +    spin_unlock_irqrestore(&active_req_lock, flags);
   12.69 +}
   12.70 +
   12.71 +inline void active_reqs_init(void)
   12.72 +{
   12.73 +    ACTIVE_RING_IDX i;
   12.74 +    
   12.75 +    active_cons = 0;
   12.76 +    active_prod = MAX_ACTIVE_REQS;
   12.77 +    memset(active_reqs, 0, sizeof(active_reqs));
   12.78 +    for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
   12.79 +        active_req_ring[i] = i;
   12.80 +}
   12.81 +
   12.82 +/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
   12.83 +
   12.84 +irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
   12.85 +{
   12.86 +    /* we have pending messages from the real frontend. */
   12.87 +
   12.88 +    blkif_request_t *req_s, *req_d;
   12.89 +    BLKIF_RING_IDX fe_rp;
   12.90 +    unsigned long flags;
   12.91 +    int notify;
   12.92 +    unsigned long i;
   12.93 +    active_req_t *ar;
   12.94 +    
   12.95 +    DPRINTK("PT got FE interrupt.\n");
   12.96 +    
   12.97 +    /* lock both rings */
   12.98 +    spin_lock_irqsave(&blkif_io_lock, flags);
   12.99 +
  12.100 +    /* While there are REQUESTS on FERing: */
  12.101 +    fe_rp = ptfe_blkif.blk_ring_base->req_prod;
  12.102 +    rmb();
  12.103 +    notify = (ptfe_blkif.blk_req_cons != fe_rp);
  12.104 +
  12.105 +    for (i = ptfe_blkif.blk_req_cons; i != fe_rp; i++) {
  12.106 +
  12.107 +        /* Get the next request */
  12.108 +        req_s = &ptfe_blkif.blk_ring_base->ring[MASK_BLKIF_IDX(i)].req;
  12.109 +        
  12.110 +        /* This is a new request:  
  12.111 +         * Assign an active request record, and remap the id. 
  12.112 +         */
  12.113 +        ar = get_active_req();
  12.114 +        ar->id = req_s->id;
  12.115 +        req_s->id = ACTIVE_IDX(ar);
  12.116 +        DPRINTK("%3lu < %3lu\n", req_s->id, ar->id);
  12.117 +
  12.118 +        /* FE -> BE interposition point is here. */
  12.119 +        
  12.120 +        /* ------------------------------------------------------------- */
  12.121 +        /* BLKIF_OP_PROBE_HACK:                                          */
  12.122 +        /* Until we have grant tables, we need to allow the backent to   */
  12.123 +        /* map pages that are either from this domain, or more commonly  */
  12.124 +        /* from the real front end.  We achieve this in a terrible way,  */
  12.125 +        /* by passing the front end's domid allong with PROBE messages   */
  12.126 +        /* Once grant tables appear, this should all go away.            */
  12.127 +
  12.128 +        if (req_s->operation == BLKIF_OP_PROBE) {
  12.129 +            DPRINTK("Adding FE domid to PROBE request.\n");
  12.130 +            (domid_t)(req_s->frame_and_sects[1]) = ptfe_blkif.domid;
  12.131 +        }
  12.132 +
  12.133 +        /* ------------------------------------------------------------- */
  12.134 +
  12.135 +        /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
  12.136 +        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
  12.137 +             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
  12.138 +            
  12.139 +            /* Copy the response message to UFERing */
  12.140 +            /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
  12.141 +            /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
  12.142 +
  12.143 +            /* XXX: mapping/copying of attached pages is still not done! */
  12.144 +
  12.145 +            DPRINTK("req->UFERing\n"); 
  12.146 +            blktap_write_fe_ring(req_s);
  12.147 +
  12.148 +
  12.149 +        }
  12.150 +
  12.151 +        /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
  12.152 +        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
  12.153 +               (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
  12.154 +            
  12.155 +            /* be included to prevent noise from the fe when its off */
  12.156 +            /* copy the request message to the BERing */
  12.157 +
  12.158 +            DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", 
  12.159 +                    (unsigned)MASK_BLKIF_IDX(i), 
  12.160 +                    (unsigned)MASK_BLKIF_IDX(ptbe_req_prod));
  12.161 +
  12.162 +            req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req;
  12.163 +            
  12.164 +            memcpy(req_d, req_s, sizeof(blkif_request_t));
  12.165 +
  12.166 +            ptbe_req_prod++;
  12.167 +        }
  12.168 +    }
  12.169 +
  12.170 +    ptfe_blkif.blk_req_cons = i;
  12.171 +
  12.172 +    /* If we have forwarded any responses, notify the appropriate ends. */
  12.173 +    if (notify) {
  12.174 +
  12.175 +        /* we have sent stuff to the be, notify it. */
  12.176 +        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
  12.177 +               (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
  12.178 +            wmb();
  12.179 +            blk_ptbe_ring->req_prod = ptbe_req_prod;
  12.180 +
  12.181 +            notify_via_evtchn(blkif_ptbe_evtchn);
  12.182 +            DPRINTK(" -- and notified.\n");
  12.183 +        }
  12.184 +
  12.185 +        /* we sent stuff to the app, notify it. */
  12.186 +        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
  12.187 +             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
  12.188 +
  12.189 +            blktap_kick_user();
  12.190 +        }
  12.191 +    }
  12.192 +
  12.193 +    /* unlock rings */
  12.194 +    spin_unlock_irqrestore(&blkif_io_lock, flags);
  12.195 +
  12.196 +    return IRQ_HANDLED;
  12.197 +}
  12.198 +
  12.199 +inline int write_req_to_be_ring(blkif_request_t *req)
  12.200 +{
  12.201 +    blkif_request_t *req_d;
  12.202 +
  12.203 +    req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req;
  12.204 +    memcpy(req_d, req, sizeof(blkif_request_t));
  12.205 +    ptbe_req_prod++;
  12.206 +
  12.207 +    return 0;
  12.208 +}
  12.209 +
  12.210 +inline void kick_be_domain(void) {
  12.211 +    wmb();
  12.212 +    blk_ptbe_ring->req_prod = ptbe_req_prod;
  12.213 +    notify_via_evtchn(blkif_ptbe_evtchn);
  12.214 +}
  12.215 +
  12.216 +/*-----[ Data to/from Backend (server) VM ]------------------------------*/
  12.217 +
  12.218 +
  12.219 +irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
  12.220 +                                  struct pt_regs *ptregs)
  12.221 +{
  12.222 +    blkif_response_t  *resp_s, *resp_d;
  12.223 +    BLKIF_RING_IDX be_rp;
  12.224 +    unsigned long flags;
  12.225 +    int notify;
  12.226 +    unsigned long i;
  12.227 +    active_req_t *ar;
  12.228 +
  12.229 +    DPRINTK("PT got BE interrupt.\n");
  12.230 +
  12.231 +    /* lock both rings */
  12.232 +    spin_lock_irqsave(&blkif_io_lock, flags);
  12.233 +    
  12.234 +    /* While there are RESPONSES on BERing: */
  12.235 +    be_rp = blk_ptbe_ring->resp_prod;
  12.236 +    rmb();
  12.237 +    notify = (ptbe_resp_cons != be_rp);
  12.238 +    
  12.239 +    for ( i = ptbe_resp_cons; i != be_rp; i++ )
  12.240 +    {
  12.241 +        /* BE -> FE interposition point is here. */
  12.242 +        
  12.243 +        /* Get the next response */
  12.244 +        resp_s = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(i)].resp;
  12.245 +    
  12.246 +       
  12.247 +        /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
  12.248 +        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
  12.249 +             (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
  12.250 +
  12.251 +            /* Copy the response message to UBERing */
  12.252 +            /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
  12.253 +            /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
  12.254 +
  12.255 +            /* XXX: copy/map the attached page! */
  12.256 +
  12.257 +            DPRINTK("rsp->UBERing\n"); 
  12.258 +            blktap_write_be_ring(resp_s);
  12.259 +
  12.260 +        }
  12.261 +       
  12.262 +        /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
  12.263 +        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
  12.264 +               (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
  12.265 +            
  12.266 +            /* (fe included to prevent random interference from the BE) */
  12.267 +            /* Copy the response message to FERing */
  12.268 +         
  12.269 +            DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", 
  12.270 +                    (unsigned) MASK_BLKIF_IDX(i), 
  12.271 +                    (unsigned) MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod));
  12.272 +
  12.273 +            /* remap id, and free the active req. blkif lookup goes here too.*/
  12.274 +            ar = &active_reqs[resp_s->id];
  12.275 +            DPRINTK("%3lu > %3lu\n", resp_s->id, ar->id);
  12.276 +            resp_s->id = ar->id;
  12.277 +            free_active_req(ar);
  12.278 +           
  12.279 +            resp_d = &ptfe_blkif.blk_ring_base->ring[
  12.280 +                MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp;
  12.281 +
  12.282 +            memcpy(resp_d, resp_s, sizeof(blkif_response_t));
  12.283 +            
  12.284 +            ptfe_blkif.blk_resp_prod++;
  12.285 +
  12.286 +        }
  12.287 +    }
  12.288 +
  12.289 +    ptbe_resp_cons = i;
  12.290 +    
  12.291 +    /* If we have forwarded any responses, notify the apropriate domains. */
  12.292 +    if (notify) {
  12.293 +
  12.294 +        /* we have sent stuff to the fe.  notify it. */
  12.295 +        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
  12.296 +               (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
  12.297 +            wmb();
  12.298 +            ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod;
  12.299 +        
  12.300 +            notify_via_evtchn(ptfe_blkif.evtchn);
  12.301 +            DPRINTK(" -- and notified.\n");
  12.302 +        }
  12.303 +
  12.304 +        /* we sent stuff to the app, notify it. */
  12.305 +        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
  12.306 +             (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
  12.307 +
  12.308 +            blktap_kick_user();
  12.309 +        }
  12.310 +    }
  12.311 +
  12.312 +    spin_unlock_irqrestore(&blkif_io_lock, flags);
  12.313 +    return IRQ_HANDLED;
  12.314 +}
  12.315 +
  12.316 +inline int write_resp_to_fe_ring(blkif_response_t *rsp)
  12.317 +{
  12.318 +    blkif_response_t *resp_d;
  12.319 +    active_req_t *ar;
  12.320 +    
  12.321 +    /* remap id, and free the active req. blkif lookup goes here too.*/
  12.322 +    ar = &active_reqs[rsp->id];
  12.323 +    DPRINTK("%3lu > %3lu\n", rsp->id, ar->id);
  12.324 +    rsp->id = ar->id;
  12.325 +    free_active_req(ar);
  12.326 +            
  12.327 +    resp_d = &ptfe_blkif.blk_ring_base->ring[
  12.328 +        MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp;
  12.329 +
  12.330 +    memcpy(resp_d, rsp, sizeof(blkif_response_t));
  12.331 +    ptfe_blkif.blk_resp_prod++;
  12.332 +
  12.333 +    return 0;
  12.334 +}
  12.335 +
  12.336 +inline void kick_fe_domain(void) {
  12.337 +    wmb();
  12.338 +    ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod;
  12.339 +    notify_via_evtchn(ptfe_blkif.evtchn);
  12.340 +    
  12.341 +}
  12.342 +
  12.343 +static inline void flush_requests(void)
  12.344 +{
  12.345 +    wmb(); /* Ensure that the frontend can see the requests. */
  12.346 +    blk_ptbe_ring->req_prod = ptbe_req_prod;
  12.347 +    notify_via_evtchn(blkif_ptbe_evtchn);
  12.348 +}
  12.349 +
  12.350 +/*-----[ Data to/from user space ]----------------------------------------*/
  12.351 +
  12.352 +
  12.353 +int blktap_write_fe_ring(blkif_request_t *req)
  12.354 +{
  12.355 +    blkif_request_t *target;
  12.356 +    int error, i;
  12.357 +
  12.358 +    /*
  12.359 +     * This is called to pass a request from the real frontend domain's
  12.360 +     * blkif ring to the character device.
  12.361 +     */
  12.362 +
  12.363 +    if ( ! blktap_ring_ok ) {
  12.364 +        DPRINTK("blktap: fe_ring not ready for a request!\n");
  12.365 +        return 0;
  12.366 +    }
  12.367 +
  12.368 +    if ( BLKTAP_RING_FULL(RING(&fe_ring)) ) {
  12.369 +        DPRINTK("blktap: fe_ring is full, can't add.\n");
  12.370 +        return 0;
  12.371 +    }
  12.372 +
  12.373 +    target = &fe_ring.ring->ring[MASK_BLKIF_IDX(fe_ring.req_prod)].req;
  12.374 +    memcpy(target, req, sizeof(*req));
  12.375 +
  12.376 +/* maybe move this stuff out into a seperate func ------------------- */
  12.377 +
  12.378 +    /*
  12.379 +     * For now, map attached page into a fixed position into the vma.
  12.380 +     * XXX: make this map to a free page.
  12.381 +     */
  12.382 +
  12.383 +    /* Attempt to map the foreign pages directly in to the application */
  12.384 +    for (i=0; i<target->nr_segments; i++) {
  12.385 +
  12.386 +        /* get an unused virtual address from the char device */
  12.387 +        /* store the old page address */
  12.388 +        /* replace the address with the virtual address */
  12.389 +
  12.390 +        /* blktap_vma->vm_start+((2+i)*PAGE_SIZE) */
  12.391 +
  12.392 +        error = direct_remap_area_pages(blktap_vma->vm_mm, 
  12.393 +                                        MMAP_VADDR(req->id, i), 
  12.394 +                                        target->frame_and_sects[0] & PAGE_MASK,
  12.395 +                                        PAGE_SIZE,
  12.396 +                                        blktap_vma->vm_page_prot,
  12.397 +                                        ptfe_blkif.domid);
  12.398 +        if ( error != 0 ) {
  12.399 +            printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
  12.400 +            return 0;
  12.401 +        }
  12.402 +    }
  12.403 +    /* fix the address of the attached page in the message. */
  12.404 +    /* TODO:      preserve the segment number stuff here... */
  12.405 +    /* target->frame_and_sects[0] = blktap_vma->vm_start + PAGE_SIZE;*/
  12.406 +/* ------------------------------------------------------------------ */
  12.407 +
  12.408 +    
  12.409 +    fe_ring.req_prod++;
  12.410 +
  12.411 +    return 0;
  12.412 +}
  12.413 +
  12.414 +int blktap_write_be_ring(blkif_response_t *rsp)
  12.415 +{
  12.416 +    blkif_response_t *target;
  12.417 +
  12.418 +    /*
  12.419 +     * This is called to pass a request from the real backend domain's
  12.420 +     * blkif ring to the character device.
  12.421 +     */
  12.422 +
  12.423 +    if ( ! blktap_ring_ok ) {
  12.424 +        DPRINTK("blktap: be_ring not ready for a request!\n");
  12.425 +        return 0;
  12.426 +    }
  12.427 +
  12.428 +    if ( BLKTAP_RING_FULL(RING(&be_ring)) ) {
  12.429 +        DPRINTK("blktap: be_ring is full, can't add.\n");
  12.430 +        return 0;
  12.431 +    }
  12.432 +
  12.433 +    target = &be_ring.ring->ring[MASK_BLKIF_IDX(be_ring.rsp_prod)].resp;
  12.434 +    memcpy(target, rsp, sizeof(*rsp));
  12.435 +
  12.436 +
  12.437 +    /* XXX: map attached pages and fix-up addresses in the copied address. */
  12.438 +
  12.439 +    be_ring.rsp_prod++;
  12.440 +
  12.441 +    return 0;
  12.442 +}
  12.443 +
  12.444 +int blktap_read_fe_ring(void)
  12.445 +{
  12.446 +    /* This is called to read responses from the UFE ring. */
  12.447 +
  12.448 +    BLKIF_RING_IDX fe_rp;
  12.449 +    unsigned long i;
  12.450 +    int notify;
  12.451 +
  12.452 +    DPRINTK("blktap_read_fe_ring()\n");
  12.453 +
  12.454 +    fe_rp = fe_ring.ring->resp_prod;
  12.455 +    rmb();
  12.456 +    notify = (fe_rp != fe_ring.rsp_cons);
  12.457 +
  12.458 +    /* if we are forwarding from UFERring to FERing */
  12.459 +    if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
  12.460 +
  12.461 +        /* for each outstanding message on the UFEring  */
  12.462 +        for ( i = fe_ring.rsp_cons; i != fe_rp; i++ ) {
  12.463 +
  12.464 +            /* XXX: remap pages on that message as necessary */
  12.465 +            /* copy the message to the UBEring */
  12.466 +
  12.467 +            DPRINTK("resp->fe_ring\n");
  12.468 +            write_resp_to_fe_ring(&fe_ring.ring->ring[MASK_BLKIF_IDX(i)].resp);
  12.469 +        }
  12.470 +    
  12.471 +        fe_ring.rsp_cons = fe_rp;
  12.472 +
  12.473 +        /* notify the fe if necessary */
  12.474 +        if ( notify ) {
  12.475 +            DPRINTK("kick_fe_domain()\n");
  12.476 +            kick_fe_domain();
  12.477 +        }
  12.478 +    }
  12.479 +
  12.480 +    return 0;
  12.481 +}
  12.482 +
  12.483 +int blktap_read_be_ring(void)
  12.484 +{
  12.485 +    /* This is called to read responses from the UBE ring. */
  12.486 +
  12.487 +    BLKIF_RING_IDX be_rp;
  12.488 +    unsigned long i;
  12.489 +    int notify;
  12.490 +
  12.491 +    DPRINTK("blktap_read_be_ring()\n");
  12.492 +
  12.493 +    be_rp = be_ring.ring->req_prod;
  12.494 +    rmb();
  12.495 +    notify = (be_rp != be_ring.req_cons);
  12.496 +
  12.497 +    /* if we are forwarding from UFERring to FERing */
  12.498 +    if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
  12.499 +
  12.500 +        /* for each outstanding message on the UFEring  */
  12.501 +        for ( i = be_ring.req_cons; i != be_rp; i++ ) {
  12.502 +
  12.503 +            /* XXX: remap pages on that message as necessary */
  12.504 +            /* copy the message to the UBEring */
  12.505 +
  12.506 +            DPRINTK("req->be_ring\n");
  12.507 +            write_req_to_be_ring(&be_ring.ring->ring[MASK_BLKIF_IDX(i)].req);
  12.508 +        }
  12.509 +    
  12.510 +        be_ring.req_cons = be_rp;
  12.511 +
  12.512 +        /* notify the fe if necessary */
  12.513 +        if ( notify ) {
  12.514 +            DPRINTK("kick_be_domain()\n");
  12.515 +            kick_be_domain();
  12.516 +        }
  12.517 +    }
  12.518 +
  12.519 +    return 0;
  12.520 +}
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c	Mon Dec 27 13:35:05 2004 +0000
    13.3 @@ -0,0 +1,243 @@
    13.4 +/******************************************************************************
    13.5 + * blktap_userdev.c
    13.6 + * 
    13.7 + * XenLinux virtual block-device tap.
    13.8 + * Control interface between the driver and a character device.
    13.9 + * 
   13.10 + * Copyright (c) 2004, Andrew Warfield
   13.11 + *
   13.12 + */
   13.13 +
   13.14 +#include <linux/config.h>
   13.15 +#include <linux/module.h>
   13.16 +#include <linux/kernel.h>
   13.17 +#include <linux/fs.h>
   13.18 +#include <linux/mm.h>
   13.19 +#include <linux/miscdevice.h>
   13.20 +#include <linux/errno.h>
   13.21 +#include <linux/major.h>
   13.22 +#include <linux/gfp.h>
   13.23 +#include <linux/poll.h>
   13.24 +#include <asm/pgalloc.h>
   13.25 +
   13.26 +#include "blktap.h"
   13.27 +
   13.28 +
   13.29 +unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH;
   13.30 +
   13.31 +/* Only one process may open /dev/xen/blktap at any time. */
   13.32 +static unsigned long blktap_dev_inuse;
   13.33 +unsigned long blktap_ring_ok; /* make this ring->state */
   13.34 +
   13.35 +/* for poll: */
   13.36 +static wait_queue_head_t blktap_wait;
   13.37 +
   13.38 +/* Where things are inside the device mapping. */
   13.39 +struct vm_area_struct *blktap_vma;
   13.40 +unsigned long mmap_vstart;
   13.41 +unsigned long rings_vstart;
   13.42 +
   13.43 +/* -------[ blktap vm ops ]------------------------------------------- */
   13.44 +
   13.45 +static struct page *blktap_nopage(struct vm_area_struct *vma,
   13.46 +                                             unsigned long address,
   13.47 +                                             int *type)
   13.48 +{
   13.49 +    /*
   13.50 +     * if the page has not been mapped in by the driver then generate
   13.51 +     * a SIGBUS to the domain.
   13.52 +     */
   13.53 +
   13.54 +    force_sig(SIGBUS, current);
   13.55 +
   13.56 +    return 0;
   13.57 +}
   13.58 +
   13.59 +struct vm_operations_struct blktap_vm_ops = {
   13.60 +    nopage:   blktap_nopage,
   13.61 +};
   13.62 +
   13.63 +/* -------[ blktap file ops ]----------------------------------------- */
   13.64 +
   13.65 +static int blktap_open(struct inode *inode, struct file *filp)
   13.66 +{
   13.67 +    if ( test_and_set_bit(0, &blktap_dev_inuse) )
   13.68 +        return -EBUSY;
   13.69 +
   13.70 +    printk(KERN_ALERT "blktap open.\n");
   13.71 +
   13.72 +    /* Allocate the fe ring. */
   13.73 +    fe_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL);
   13.74 +    if (fe_ring.ring == NULL)
   13.75 +        goto fail_nomem;
   13.76 +
   13.77 +    SetPageReserved(virt_to_page(fe_ring.ring));
   13.78 +    
   13.79 +    fe_ring.ring->req_prod = fe_ring.ring->resp_prod
   13.80 +                           = fe_ring.req_prod
   13.81 +                           = fe_ring.rsp_cons
   13.82 +                           = 0;
   13.83 +
   13.84 +    /* Allocate the be ring. */
   13.85 +    be_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL);
   13.86 +    if (be_ring.ring == NULL)
   13.87 +        goto fail_free_fe;
   13.88 +
   13.89 +    SetPageReserved(virt_to_page(be_ring.ring));
   13.90 +    
   13.91 +    be_ring.ring->req_prod = be_ring.ring->resp_prod
   13.92 +                           = be_ring.rsp_prod
   13.93 +                           = be_ring.req_cons
   13.94 +                           = 0;
   13.95 +
   13.96 +    DPRINTK(KERN_ALERT "blktap open.\n");
   13.97 +
   13.98 +    return 0;
   13.99 +
  13.100 + fail_free_fe:
  13.101 +    free_page( (unsigned long) fe_ring.ring);
  13.102 +
  13.103 + fail_nomem:
  13.104 +    return -ENOMEM;
  13.105 +}
  13.106 +
  13.107 +static int blktap_release(struct inode *inode, struct file *filp)
  13.108 +{
  13.109 +    blktap_dev_inuse = 0;
  13.110 +    blktap_ring_ok = 0;
  13.111 +
  13.112 +    printk(KERN_ALERT "blktap closed.\n");
  13.113 +
  13.114 +    /* Free the ring page. */
  13.115 +    ClearPageReserved(virt_to_page(fe_ring.ring));
  13.116 +    free_page((unsigned long) fe_ring.ring);
  13.117 +
  13.118 +    ClearPageReserved(virt_to_page(be_ring.ring));
  13.119 +    free_page((unsigned long) be_ring.ring);
  13.120 +    
  13.121 +    return 0;
  13.122 +}
  13.123 +
  13.124 +static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
  13.125 +{
  13.126 +    int size;
  13.127 +
  13.128 +    printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
  13.129 +           vma->vm_start, vma->vm_end);
  13.130 +
  13.131 +    vma->vm_ops = &blktap_vm_ops;
  13.132 +
  13.133 +    size = vma->vm_end - vma->vm_start;
  13.134 +    if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
  13.135 +        printk(KERN_INFO 
  13.136 +               "blktap: you _must_ map exactly %d pages!\n",
  13.137 +               MMAP_PAGES + RING_PAGES);
  13.138 +        return -EAGAIN;
  13.139 +    }
  13.140 +
  13.141 +    size >>= PAGE_SHIFT;
  13.142 +    printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
  13.143 +    
  13.144 +    rings_vstart = vma->vm_start;
  13.145 +    mmap_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
  13.146 +    
  13.147 +    /* Map the ring pages to the start of the region and reserve it. */
  13.148 +
  13.149 +    /* not sure if I really need to do this... */
  13.150 +    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  13.151 +
  13.152 +    DPRINTK("Mapping be_ring page %lx.\n", __pa(be_ring.ring));
  13.153 +    if (remap_page_range(vma, vma->vm_start, __pa(be_ring.ring), PAGE_SIZE, 
  13.154 +                         vma->vm_page_prot)) {
  13.155 +        printk(KERN_ERR "be_ring: remap_page_range failure!\n");
  13.156 +    }
  13.157 +
  13.158 +    DPRINTK("Mapping fe_ring page %lx.\n", __pa(fe_ring.ring));
  13.159 +    if (remap_page_range(vma, vma->vm_start + PAGE_SIZE, __pa(fe_ring.ring), 
  13.160 +                         PAGE_SIZE, vma->vm_page_prot)) {
  13.161 +        printk(KERN_ERR "fe_ring: remap_page_range failure!\n");
  13.162 +    }
  13.163 +
  13.164 +    blktap_vma = vma;
  13.165 +    blktap_ring_ok = 1;
  13.166 +
  13.167 +    return 0;
  13.168 +}
  13.169 +
  13.170 +static int blktap_ioctl(struct inode *inode, struct file *filp,
  13.171 +                        unsigned int cmd, unsigned long arg)
  13.172 +{
  13.173 +    switch(cmd) {
  13.174 +    case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
  13.175 +        return blktap_read_fe_ring();
  13.176 +
  13.177 +    case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */
  13.178 +        return blktap_read_be_ring();
  13.179 +
  13.180 +    case BLKTAP_IOCTL_SETMODE:
  13.181 +        if (BLKTAP_MODE_VALID(arg)) {
  13.182 +            blktap_mode = arg;
  13.183 +            /* XXX: may need to flush rings here. */
  13.184 +            printk(KERN_INFO "blktap: set mode to %lx\n", arg);
  13.185 +            return 0;
  13.186 +        }
  13.187 +        /* XXX: return a more meaningful error case here. */
  13.188 +    }
  13.189 +    return -ENOIOCTLCMD;
  13.190 +}
  13.191 +
  13.192 +static unsigned int blktap_poll(struct file *file, poll_table *wait)
  13.193 +{
  13.194 +        poll_wait(file, &blktap_wait, wait);
  13.195 +
  13.196 +        if ( (fe_ring.req_prod != fe_ring.ring->req_prod) ||
  13.197 +             (be_ring.rsp_prod != be_ring.ring->resp_prod) ) {
  13.198 +
  13.199 +            fe_ring.ring->req_prod = fe_ring.req_prod;
  13.200 +            be_ring.ring->resp_prod = be_ring.rsp_prod;
  13.201 +            return POLLIN | POLLRDNORM;
  13.202 +        }
  13.203 +
  13.204 +        return 0;
  13.205 +}
  13.206 +
  13.207 +void blktap_kick_user(void)
  13.208 +{
  13.209 +    /* blktap_ring->req_prod = blktap_req_prod; */
  13.210 +    wake_up_interruptible(&blktap_wait);
  13.211 +}
  13.212 +
  13.213 +static struct file_operations blktap_fops = {
  13.214 +    owner:    THIS_MODULE,
  13.215 +    poll:     blktap_poll,
  13.216 +    ioctl:    blktap_ioctl,
  13.217 +    open:     blktap_open,
  13.218 +    release:  blktap_release,
  13.219 +    mmap:     blktap_mmap,
  13.220 +};
  13.221 +
  13.222 +/* -------[ blktap module setup ]------------------------------------- */
  13.223 +
  13.224 +static struct miscdevice blktap_miscdev = {
  13.225 +    .minor        = BLKTAP_MINOR,
  13.226 +    .name         = "blktap",
  13.227 +    .fops         = &blktap_fops,
  13.228 +    .devfs_name   = "misc/blktap",
  13.229 +};
  13.230 +
  13.231 +int blktap_init(void)
  13.232 +{
  13.233 +    int err;
  13.234 +
  13.235 +    err = misc_register(&blktap_miscdev);
  13.236 +    if ( err != 0 )
  13.237 +    {
  13.238 +        printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
  13.239 +        return err;
  13.240 +    }
  13.241 +
  13.242 +    init_waitqueue_head(&blktap_wait);
  13.243 +
  13.244 +
  13.245 +    return 0;
  13.246 +}
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h	Mon Dec 27 13:35:05 2004 +0000
    14.3 @@ -0,0 +1,59 @@
    14.4 +/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
    14.5 + * which needs to alter them. */
    14.6 +
    14.7 +static inline void smpboot_clear_io_apic_irqs(void)
    14.8 +{
    14.9 +#if 1
   14.10 +	printk("smpboot_clear_io_apic_irqs\n");
   14.11 +#else
   14.12 +	io_apic_irqs = 0;
   14.13 +#endif
   14.14 +}
   14.15 +
   14.16 +static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
   14.17 +{
   14.18 +#if 1
   14.19 +	printk("smpboot_setup_warm_reset_vector\n");
   14.20 +#else
   14.21 +	CMOS_WRITE(0xa, 0xf);
   14.22 +	local_flush_tlb();
   14.23 +	Dprintk("1.\n");
   14.24 +	*((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
   14.25 +	Dprintk("2.\n");
   14.26 +	*((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
   14.27 +	Dprintk("3.\n");
   14.28 +#endif
   14.29 +}
   14.30 +
   14.31 +static inline void smpboot_restore_warm_reset_vector(void)
   14.32 +{
   14.33 +	/*
   14.34 +	 * Install writable page 0 entry to set BIOS data area.
   14.35 +	 */
   14.36 +	local_flush_tlb();
   14.37 +
   14.38 +	/*
   14.39 +	 * Paranoid:  Set warm reset code and vector here back
   14.40 +	 * to default values.
   14.41 +	 */
   14.42 +	CMOS_WRITE(0, 0xf);
   14.43 +
   14.44 +	*((volatile long *) phys_to_virt(0x467)) = 0;
   14.45 +}
   14.46 +
   14.47 +static inline void smpboot_setup_io_apic(void)
   14.48 +{
   14.49 +#if 1
   14.50 +	printk("smpboot_setup_io_apic\n");
   14.51 +#else
   14.52 +	/*
   14.53 +	 * Here we can be sure that there is an IO-APIC in the system. Let's
   14.54 +	 * go and set it up:
   14.55 +	 */
   14.56 +	if (!skip_ioapic_setup && nr_ioapics)
   14.57 +		setup_IO_APIC();
   14.58 +#endif
   14.59 +}
   14.60 +
   14.61 +
   14.62 +#define	smp_found_config	(HYPERVISOR_shared_info->n_vcpu > 1)
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/spinlock.h	Mon Dec 27 13:35:05 2004 +0000
    15.3 @@ -0,0 +1,224 @@
    15.4 +#ifndef __ASM_SPINLOCK_H
    15.5 +#define __ASM_SPINLOCK_H
    15.6 +
    15.7 +#include <asm/atomic.h>
    15.8 +#include <asm/rwlock.h>
    15.9 +#include <asm/page.h>
   15.10 +#include <linux/config.h>
   15.11 +#include <linux/compiler.h>
   15.12 +
   15.13 +asmlinkage int printk(const char * fmt, ...)
   15.14 +	__attribute__ ((format (printf, 1, 2)));
   15.15 +
   15.16 +/*
   15.17 + * Your basic SMP spinlocks, allowing only a single CPU anywhere
   15.18 + */
   15.19 +
   15.20 +typedef struct {
   15.21 +	volatile unsigned int lock;
   15.22 +#ifdef CONFIG_DEBUG_SPINLOCK
   15.23 +	unsigned magic;
   15.24 +#endif
   15.25 +} spinlock_t;
   15.26 +
   15.27 +#define SPINLOCK_MAGIC	0xdead4ead
   15.28 +
   15.29 +#ifdef CONFIG_DEBUG_SPINLOCK
   15.30 +#define SPINLOCK_MAGIC_INIT	, SPINLOCK_MAGIC
   15.31 +#else
   15.32 +#define SPINLOCK_MAGIC_INIT	/* */
   15.33 +#endif
   15.34 +
   15.35 +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
   15.36 +
   15.37 +#define spin_lock_init(x)	do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
   15.38 +
   15.39 +/*
   15.40 + * Simple spin lock operations.  There are two variants, one clears IRQ's
   15.41 + * on the local processor, one does not.
   15.42 + *
   15.43 + * We make no fairness assumptions. They have a cost.
   15.44 + */
   15.45 +
   15.46 +#define spin_is_locked(x)	(*(volatile signed char *)(&(x)->lock) <= 0)
   15.47 +#define spin_unlock_wait(x)	do { barrier(); } while(spin_is_locked(x))
   15.48 +
   15.49 +#define spin_lock_string \
   15.50 +	"\n1:\t" \
   15.51 +	"lock ; decb %0\n\t" \
   15.52 +	"jns 3f\n" \
   15.53 +	"2:\t" \
   15.54 +	"rep;nop\n\t" \
   15.55 +	"cmpb $0,%0\n\t" \
   15.56 +	"jle 2b\n\t" \
   15.57 +	"jmp 1b\n" \
   15.58 +	"3:\n\t"
   15.59 +
   15.60 +#define spin_lock_string_flags \
   15.61 +	"\n1:\t" \
   15.62 +	"lock ; decb %0\n\t" \
   15.63 +	"jns 4f\n\t" \
   15.64 +	"2:\t" \
   15.65 +	"testl $0x200, %1\n\t" \
   15.66 +	"jz 3f\n\t" \
   15.67 +	"#sti\n\t" \
   15.68 +	"3:\t" \
   15.69 +	"rep;nop\n\t" \
   15.70 +	"cmpb $0, %0\n\t" \
   15.71 +	"jle 3b\n\t" \
   15.72 +	"#cli\n\t" \
   15.73 +	"jmp 1b\n" \
   15.74 +	"4:\n\t"
   15.75 +
   15.76 +/*
   15.77 + * This works. Despite all the confusion.
   15.78 + * (except on PPro SMP or if we are using OOSTORE)
   15.79 + * (PPro errata 66, 92)
   15.80 + */
   15.81 + 
   15.82 +#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
   15.83 +
   15.84 +#define spin_unlock_string \
   15.85 +	"movb $1,%0" \
   15.86 +		:"=m" (lock->lock) : : "memory"
   15.87 +
   15.88 +
   15.89 +static inline void _raw_spin_unlock(spinlock_t *lock)
   15.90 +{
   15.91 +#ifdef CONFIG_DEBUG_SPINLOCK
   15.92 +	BUG_ON(lock->magic != SPINLOCK_MAGIC);
   15.93 +	BUG_ON(!spin_is_locked(lock));
   15.94 +#endif
   15.95 +	__asm__ __volatile__(
   15.96 +		spin_unlock_string
   15.97 +	);
   15.98 +}
   15.99 +
  15.100 +#else
  15.101 +
  15.102 +#define spin_unlock_string \
  15.103 +	"xchgb %b0, %1" \
  15.104 +		:"=q" (oldval), "=m" (lock->lock) \
  15.105 +		:"0" (oldval) : "memory"
  15.106 +
  15.107 +static inline void _raw_spin_unlock(spinlock_t *lock)
  15.108 +{
  15.109 +	char oldval = 1;
  15.110 +#ifdef CONFIG_DEBUG_SPINLOCK
  15.111 +	BUG_ON(lock->magic != SPINLOCK_MAGIC);
  15.112 +	BUG_ON(!spin_is_locked(lock));
  15.113 +#endif
  15.114 +	__asm__ __volatile__(
  15.115 +		spin_unlock_string
  15.116 +	);
  15.117 +}
  15.118 +
  15.119 +#endif
  15.120 +
  15.121 +static inline int _raw_spin_trylock(spinlock_t *lock)
  15.122 +{
  15.123 +	char oldval;
  15.124 +	__asm__ __volatile__(
  15.125 +		"xchgb %b0,%1"
  15.126 +		:"=q" (oldval), "=m" (lock->lock)
  15.127 +		:"0" (0) : "memory");
  15.128 +	return oldval > 0;
  15.129 +}
  15.130 +
  15.131 +static inline void _raw_spin_lock(spinlock_t *lock)
  15.132 +{
  15.133 +#ifdef CONFIG_DEBUG_SPINLOCK
  15.134 +	if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
  15.135 +		printk("eip: %p\n", __builtin_return_address(0));
  15.136 +		BUG();
  15.137 +	}
  15.138 +#endif
  15.139 +	__asm__ __volatile__(
  15.140 +		spin_lock_string
  15.141 +		:"=m" (lock->lock) : : "memory");
  15.142 +}
  15.143 +
  15.144 +static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
  15.145 +{
  15.146 +#ifdef CONFIG_DEBUG_SPINLOCK
  15.147 +	if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
  15.148 +		printk("eip: %p\n", __builtin_return_address(0));
  15.149 +		BUG();
  15.150 +	}
  15.151 +#endif
  15.152 +	__asm__ __volatile__(
  15.153 +		spin_lock_string_flags
  15.154 +		:"=m" (lock->lock) : "r" (flags) : "memory");
  15.155 +}
  15.156 +
  15.157 +/*
  15.158 + * Read-write spinlocks, allowing multiple readers
  15.159 + * but only one writer.
  15.160 + *
  15.161 + * NOTE! it is quite common to have readers in interrupts
  15.162 + * but no interrupt writers. For those circumstances we
  15.163 + * can "mix" irq-safe locks - any writer needs to get a
  15.164 + * irq-safe write-lock, but readers can get non-irqsafe
  15.165 + * read-locks.
  15.166 + */
  15.167 +typedef struct {
  15.168 +	volatile unsigned int lock;
  15.169 +#ifdef CONFIG_DEBUG_SPINLOCK
  15.170 +	unsigned magic;
  15.171 +#endif
  15.172 +} rwlock_t;
  15.173 +
  15.174 +#define RWLOCK_MAGIC	0xdeaf1eed
  15.175 +
  15.176 +#ifdef CONFIG_DEBUG_SPINLOCK
  15.177 +#define RWLOCK_MAGIC_INIT	, RWLOCK_MAGIC
  15.178 +#else
  15.179 +#define RWLOCK_MAGIC_INIT	/* */
  15.180 +#endif
  15.181 +
  15.182 +#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
  15.183 +
  15.184 +#define rwlock_init(x)	do { *(x) = RW_LOCK_UNLOCKED; } while(0)
  15.185 +
  15.186 +#define rwlock_is_locked(x) ((x)->lock != RW_LOCK_BIAS)
  15.187 +
  15.188 +/*
  15.189 + * On x86, we implement read-write locks as a 32-bit counter
  15.190 + * with the high bit (sign) being the "contended" bit.
  15.191 + *
  15.192 + * The inline assembly is non-obvious. Think about it.
  15.193 + *
  15.194 + * Changed to use the same technique as rw semaphores.  See
  15.195 + * semaphore.h for details.  -ben
  15.196 + */
  15.197 +/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
  15.198 +
  15.199 +static inline void _raw_read_lock(rwlock_t *rw)
  15.200 +{
  15.201 +#ifdef CONFIG_DEBUG_SPINLOCK
  15.202 +	BUG_ON(rw->magic != RWLOCK_MAGIC);
  15.203 +#endif
  15.204 +	__build_read_lock(rw, "__read_lock_failed");
  15.205 +}
  15.206 +
  15.207 +static inline void _raw_write_lock(rwlock_t *rw)
  15.208 +{
  15.209 +#ifdef CONFIG_DEBUG_SPINLOCK
  15.210 +	BUG_ON(rw->magic != RWLOCK_MAGIC);
  15.211 +#endif
  15.212 +	__build_write_lock(rw, "__write_lock_failed");
  15.213 +}
  15.214 +
  15.215 +#define _raw_read_unlock(rw)		asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
  15.216 +#define _raw_write_unlock(rw)	asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
  15.217 +
  15.218 +static inline int _raw_write_trylock(rwlock_t *lock)
  15.219 +{
  15.220 +	atomic_t *count = (atomic_t *)lock;
  15.221 +	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
  15.222 +		return 1;
  15.223 +	atomic_add(RW_LOCK_BIAS, count);
  15.224 +	return 0;
  15.225 +}
  15.226 +
  15.227 +#endif /* __ASM_SPINLOCK_H */
    16.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c	Mon Dec 27 10:12:02 2004 +0000
    16.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.3 @@ -1,599 +0,0 @@
    16.4 -/*
    16.5 - *	Intel SMP support routines.
    16.6 - *
    16.7 - *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
    16.8 - *	(c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
    16.9 - *
   16.10 - *	This code is released under the GNU General Public License version 2 or
   16.11 - *	later.
   16.12 - */
   16.13 -
   16.14 -#include <linux/init.h>
   16.15 -
   16.16 -#include <linux/mm.h>
   16.17 -#include <linux/irq.h>
   16.18 -#include <linux/delay.h>
   16.19 -#include <linux/spinlock.h>
   16.20 -#include <linux/smp_lock.h>
   16.21 -#include <linux/kernel_stat.h>
   16.22 -#include <linux/mc146818rtc.h>
   16.23 -#include <linux/cache.h>
   16.24 -#include <linux/interrupt.h>
   16.25 -
   16.26 -#include <asm/mtrr.h>
   16.27 -#include <asm/tlbflush.h>
   16.28 -#if 0
   16.29 -#include <mach_apic.h>
   16.30 -#endif
   16.31 -#include <asm-xen/evtchn.h>
   16.32 -
   16.33 -#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
   16.34 -
   16.35 -/*
   16.36 - *	Some notes on x86 processor bugs affecting SMP operation:
   16.37 - *
   16.38 - *	Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
   16.39 - *	The Linux implications for SMP are handled as follows:
   16.40 - *
   16.41 - *	Pentium III / [Xeon]
   16.42 - *		None of the E1AP-E3AP errata are visible to the user.
   16.43 - *
   16.44 - *	E1AP.	see PII A1AP
   16.45 - *	E2AP.	see PII A2AP
   16.46 - *	E3AP.	see PII A3AP
   16.47 - *
   16.48 - *	Pentium II / [Xeon]
   16.49 - *		None of the A1AP-A3AP errata are visible to the user.
   16.50 - *
   16.51 - *	A1AP.	see PPro 1AP
   16.52 - *	A2AP.	see PPro 2AP
   16.53 - *	A3AP.	see PPro 7AP
   16.54 - *
   16.55 - *	Pentium Pro
   16.56 - *		None of 1AP-9AP errata are visible to the normal user,
   16.57 - *	except occasional delivery of 'spurious interrupt' as trap #15.
   16.58 - *	This is very rare and a non-problem.
   16.59 - *
   16.60 - *	1AP.	Linux maps APIC as non-cacheable
   16.61 - *	2AP.	worked around in hardware
   16.62 - *	3AP.	fixed in C0 and above steppings microcode update.
   16.63 - *		Linux does not use excessive STARTUP_IPIs.
   16.64 - *	4AP.	worked around in hardware
   16.65 - *	5AP.	symmetric IO mode (normal Linux operation) not affected.
   16.66 - *		'noapic' mode has vector 0xf filled out properly.
   16.67 - *	6AP.	'noapic' mode might be affected - fixed in later steppings
   16.68 - *	7AP.	We do not assume writes to the LVT deassering IRQs
   16.69 - *	8AP.	We do not enable low power mode (deep sleep) during MP bootup
   16.70 - *	9AP.	We do not use mixed mode
   16.71 - *
   16.72 - *	Pentium
   16.73 - *		There is a marginal case where REP MOVS on 100MHz SMP
   16.74 - *	machines with B stepping processors can fail. XXX should provide
   16.75 - *	an L1cache=Writethrough or L1cache=off option.
   16.76 - *
   16.77 - *		B stepping CPUs may hang. There are hardware work arounds
   16.78 - *	for this. We warn about it in case your board doesn't have the work
   16.79 - *	arounds. Basically thats so I can tell anyone with a B stepping
   16.80 - *	CPU and SMP problems "tough".
   16.81 - *
   16.82 - *	Specific items [From Pentium Processor Specification Update]
   16.83 - *
   16.84 - *	1AP.	Linux doesn't use remote read
   16.85 - *	2AP.	Linux doesn't trust APIC errors
   16.86 - *	3AP.	We work around this
   16.87 - *	4AP.	Linux never generated 3 interrupts of the same priority
   16.88 - *		to cause a lost local interrupt.
   16.89 - *	5AP.	Remote read is never used
   16.90 - *	6AP.	not affected - worked around in hardware
   16.91 - *	7AP.	not affected - worked around in hardware
   16.92 - *	8AP.	worked around in hardware - we get explicit CS errors if not
   16.93 - *	9AP.	only 'noapic' mode affected. Might generate spurious
   16.94 - *		interrupts, we log only the first one and count the
   16.95 - *		rest silently.
   16.96 - *	10AP.	not affected - worked around in hardware
   16.97 - *	11AP.	Linux reads the APIC between writes to avoid this, as per
   16.98 - *		the documentation. Make sure you preserve this as it affects
   16.99 - *		the C stepping chips too.
  16.100 - *	12AP.	not affected - worked around in hardware
  16.101 - *	13AP.	not affected - worked around in hardware
  16.102 - *	14AP.	we always deassert INIT during bootup
  16.103 - *	15AP.	not affected - worked around in hardware
  16.104 - *	16AP.	not affected - worked around in hardware
  16.105 - *	17AP.	not affected - worked around in hardware
  16.106 - *	18AP.	not affected - worked around in hardware
  16.107 - *	19AP.	not affected - worked around in BIOS
  16.108 - *
  16.109 - *	If this sounds worrying believe me these bugs are either ___RARE___,
  16.110 - *	or are signal timing bugs worked around in hardware and there's
  16.111 - *	about nothing of note with C stepping upwards.
  16.112 - */
  16.113 -
  16.114 -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
  16.115 -
  16.116 -/*
  16.117 - * the following functions deal with sending IPIs between CPUs.
  16.118 - *
  16.119 - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
  16.120 - */
  16.121 -
  16.122 -static inline int __prepare_ICR (unsigned int shortcut, int vector)
  16.123 -{
  16.124 -	return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
  16.125 -}
  16.126 -
  16.127 -static inline int __prepare_ICR2 (unsigned int mask)
  16.128 -{
  16.129 -	return SET_APIC_DEST_FIELD(mask);
  16.130 -}
  16.131 -
  16.132 -DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
  16.133 -
  16.134 -static inline void __send_IPI_one(unsigned int cpu, int vector)
  16.135 -{
  16.136 -	unsigned int evtchn;
  16.137 -
  16.138 -	evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
  16.139 -	// printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, vector, evtchn);
  16.140 -	if (evtchn) {
  16.141 -#if 0
  16.142 -		shared_info_t *s = HYPERVISOR_shared_info;
  16.143 -		while (synch_test_bit(evtchn, &s->evtchn_pending[0]) ||
  16.144 -		       synch_test_bit(evtchn, &s->evtchn_mask[0]))
  16.145 -			;
  16.146 -#endif
  16.147 -		notify_via_evtchn(evtchn);
  16.148 -	} else
  16.149 -		printk("send_IPI to unbound port %d/%d",
  16.150 -		       cpu, vector);
  16.151 -}
  16.152 -
  16.153 -void __send_IPI_shortcut(unsigned int shortcut, int vector)
  16.154 -{
  16.155 -	int cpu;
  16.156 -
  16.157 -	switch (shortcut) {
  16.158 -	case APIC_DEST_SELF:
  16.159 -		__send_IPI_one(smp_processor_id(), vector);
  16.160 -		break;
  16.161 -	case APIC_DEST_ALLBUT:
  16.162 -		for (cpu = 0; cpu < NR_CPUS; ++cpu) {
  16.163 -			if (cpu == smp_processor_id())
  16.164 -				continue;
  16.165 -			if (cpu_isset(cpu, cpu_online_map)) {
  16.166 -				__send_IPI_one(cpu, vector);
  16.167 -			}
  16.168 -		}
  16.169 -		break;
  16.170 -	default:
  16.171 -		printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
  16.172 -		       vector);
  16.173 -		break;
  16.174 -	}
  16.175 -}
  16.176 -
  16.177 -void fastcall send_IPI_self(int vector)
  16.178 -{
  16.179 -	__send_IPI_shortcut(APIC_DEST_SELF, vector);
  16.180 -}
  16.181 -
  16.182 -/*
  16.183 - * This is only used on smaller machines.
  16.184 - */
  16.185 -void send_IPI_mask_bitmask(cpumask_t mask, int vector)
  16.186 -{
  16.187 -	unsigned long flags;
  16.188 -	unsigned int cpu;
  16.189 -
  16.190 -	local_irq_save(flags);
  16.191 -
  16.192 -	for (cpu = 0; cpu < NR_CPUS; ++cpu) {
  16.193 -		if (cpu_isset(cpu, mask)) {
  16.194 -			__send_IPI_one(cpu, vector);
  16.195 -		}
  16.196 -	}
  16.197 -
  16.198 -	local_irq_restore(flags);
  16.199 -}
  16.200 -
  16.201 -inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
  16.202 -{
  16.203 -
  16.204 -	send_IPI_mask_bitmask(mask, vector);
  16.205 -}
  16.206 -
  16.207 -#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
  16.208 -
  16.209 -/*
  16.210 - *	Smarter SMP flushing macros. 
  16.211 - *		c/o Linus Torvalds.
  16.212 - *
  16.213 - *	These mean you can really definitely utterly forget about
  16.214 - *	writing to user space from interrupts. (Its not allowed anyway).
  16.215 - *
  16.216 - *	Optimizations Manfred Spraul <manfred@colorfullife.com>
  16.217 - */
  16.218 -
  16.219 -static cpumask_t flush_cpumask;
  16.220 -static struct mm_struct * flush_mm;
  16.221 -static unsigned long flush_va;
  16.222 -static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED;
  16.223 -#define FLUSH_ALL	0xffffffff
  16.224 -
  16.225 -/*
  16.226 - * We cannot call mmdrop() because we are in interrupt context, 
  16.227 - * instead update mm->cpu_vm_mask.
  16.228 - *
  16.229 - * We need to reload %cr3 since the page tables may be going
  16.230 - * away from under us..
  16.231 - */
  16.232 -static inline void leave_mm (unsigned long cpu)
  16.233 -{
  16.234 -	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
  16.235 -		BUG();
  16.236 -	cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
  16.237 -	load_cr3(swapper_pg_dir);
  16.238 -}
  16.239 -
  16.240 -/*
  16.241 - *
  16.242 - * The flush IPI assumes that a thread switch happens in this order:
  16.243 - * [cpu0: the cpu that switches]
  16.244 - * 1) switch_mm() either 1a) or 1b)
  16.245 - * 1a) thread switch to a different mm
  16.246 - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
  16.247 - * 	Stop ipi delivery for the old mm. This is not synchronized with
  16.248 - * 	the other cpus, but smp_invalidate_interrupt ignore flush ipis
  16.249 - * 	for the wrong mm, and in the worst case we perform a superflous
  16.250 - * 	tlb flush.
  16.251 - * 1a2) set cpu_tlbstate to TLBSTATE_OK
  16.252 - * 	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
  16.253 - *	was in lazy tlb mode.
  16.254 - * 1a3) update cpu_tlbstate[].active_mm
  16.255 - * 	Now cpu0 accepts tlb flushes for the new mm.
  16.256 - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
  16.257 - * 	Now the other cpus will send tlb flush ipis.
  16.258 - * 1a4) change cr3.
  16.259 - * 1b) thread switch without mm change
  16.260 - *	cpu_tlbstate[].active_mm is correct, cpu0 already handles
  16.261 - *	flush ipis.
  16.262 - * 1b1) set cpu_tlbstate to TLBSTATE_OK
  16.263 - * 1b2) test_and_set the cpu bit in cpu_vm_mask.
  16.264 - * 	Atomically set the bit [other cpus will start sending flush ipis],
  16.265 - * 	and test the bit.
  16.266 - * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
  16.267 - * 2) switch %%esp, ie current
  16.268 - *
  16.269 - * The interrupt must handle 2 special cases:
  16.270 - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
  16.271 - * - the cpu performs speculative tlb reads, i.e. even if the cpu only
  16.272 - *   runs in kernel space, the cpu could load tlb entries for user space
  16.273 - *   pages.
  16.274 - *
  16.275 - * The good news is that cpu_tlbstate is local to each cpu, no
  16.276 - * write/read ordering problems.
  16.277 - */
  16.278 -
  16.279 -/*
  16.280 - * TLB flush IPI:
  16.281 - *
  16.282 - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
  16.283 - * 2) Leave the mm if we are in the lazy tlb mode.
  16.284 - */
  16.285 -
  16.286 -irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
  16.287 -				     struct pt_regs *regs)
  16.288 -{
  16.289 -	unsigned long cpu;
  16.290 -
  16.291 -	cpu = get_cpu();
  16.292 -
  16.293 -	if (!cpu_isset(cpu, flush_cpumask))
  16.294 -		goto out;
  16.295 -		/* 
  16.296 -		 * This was a BUG() but until someone can quote me the
  16.297 -		 * line from the intel manual that guarantees an IPI to
  16.298 -		 * multiple CPUs is retried _only_ on the erroring CPUs
  16.299 -		 * its staying as a return
  16.300 -		 *
  16.301 -		 * BUG();
  16.302 -		 */
  16.303 -		 
  16.304 -	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
  16.305 -		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
  16.306 -			if (flush_va == FLUSH_ALL)
  16.307 -				local_flush_tlb();
  16.308 -			else
  16.309 -				__flush_tlb_one(flush_va);
  16.310 -		} else
  16.311 -			leave_mm(cpu);
  16.312 -	}
  16.313 -	smp_mb__before_clear_bit();
  16.314 -	cpu_clear(cpu, flush_cpumask);
  16.315 -	smp_mb__after_clear_bit();
  16.316 -out:
  16.317 -	put_cpu_no_resched();
  16.318 -
  16.319 -	return IRQ_HANDLED;
  16.320 -}
  16.321 -
  16.322 -static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
  16.323 -						unsigned long va)
  16.324 -{
  16.325 -	cpumask_t tmp;
  16.326 -	/*
  16.327 -	 * A couple of (to be removed) sanity checks:
  16.328 -	 *
  16.329 -	 * - we do not send IPIs to not-yet booted CPUs.
  16.330 -	 * - current CPU must not be in mask
  16.331 -	 * - mask must exist :)
  16.332 -	 */
  16.333 -	BUG_ON(cpus_empty(cpumask));
  16.334 -
  16.335 -	cpus_and(tmp, cpumask, cpu_online_map);
  16.336 -	BUG_ON(!cpus_equal(cpumask, tmp));
  16.337 -	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
  16.338 -	BUG_ON(!mm);
  16.339 -
  16.340 -	/*
  16.341 -	 * i'm not happy about this global shared spinlock in the
  16.342 -	 * MM hot path, but we'll see how contended it is.
  16.343 -	 * Temporarily this turns IRQs off, so that lockups are
  16.344 -	 * detected by the NMI watchdog.
  16.345 -	 */
  16.346 -	spin_lock(&tlbstate_lock);
  16.347 -	
  16.348 -	flush_mm = mm;
  16.349 -	flush_va = va;
  16.350 -#if NR_CPUS <= BITS_PER_LONG
  16.351 -	atomic_set_mask(cpumask, &flush_cpumask);
  16.352 -#else
  16.353 -	{
  16.354 -		int k;
  16.355 -		unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
  16.356 -		unsigned long *cpu_mask = (unsigned long *)&cpumask;
  16.357 -		for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
  16.358 -			atomic_set_mask(cpu_mask[k], &flush_mask[k]);
  16.359 -	}
  16.360 -#endif
  16.361 -	/*
  16.362 -	 * We have to send the IPI only to
  16.363 -	 * CPUs affected.
  16.364 -	 */
  16.365 -	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
  16.366 -
  16.367 -	while (!cpus_empty(flush_cpumask))
  16.368 -		/* nothing. lockup detection does not belong here */
  16.369 -		mb();
  16.370 -
  16.371 -	flush_mm = NULL;
  16.372 -	flush_va = 0;
  16.373 -	spin_unlock(&tlbstate_lock);
  16.374 -}
  16.375 -	
  16.376 -void flush_tlb_current_task(void)
  16.377 -{
  16.378 -	struct mm_struct *mm = current->mm;
  16.379 -	cpumask_t cpu_mask;
  16.380 -
  16.381 -	preempt_disable();
  16.382 -	cpu_mask = mm->cpu_vm_mask;
  16.383 -	cpu_clear(smp_processor_id(), cpu_mask);
  16.384 -
  16.385 -	local_flush_tlb();
  16.386 -	if (!cpus_empty(cpu_mask))
  16.387 -		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
  16.388 -	preempt_enable();
  16.389 -}
  16.390 -
  16.391 -void flush_tlb_mm (struct mm_struct * mm)
  16.392 -{
  16.393 -	cpumask_t cpu_mask;
  16.394 -
  16.395 -	preempt_disable();
  16.396 -	cpu_mask = mm->cpu_vm_mask;
  16.397 -	cpu_clear(smp_processor_id(), cpu_mask);
  16.398 -
  16.399 -	if (current->active_mm == mm) {
  16.400 -		if (current->mm)
  16.401 -			local_flush_tlb();
  16.402 -		else
  16.403 -			leave_mm(smp_processor_id());
  16.404 -	}
  16.405 -	if (!cpus_empty(cpu_mask))
  16.406 -		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
  16.407 -
  16.408 -	preempt_enable();
  16.409 -}
  16.410 -
  16.411 -void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
  16.412 -{
  16.413 -	struct mm_struct *mm = vma->vm_mm;
  16.414 -	cpumask_t cpu_mask;
  16.415 -
  16.416 -	preempt_disable();
  16.417 -	cpu_mask = mm->cpu_vm_mask;
  16.418 -	cpu_clear(smp_processor_id(), cpu_mask);
  16.419 -
  16.420 -	if (current->active_mm == mm) {
  16.421 -		if(current->mm)
  16.422 -			__flush_tlb_one(va);
  16.423 -		else
  16.424 -		 	leave_mm(smp_processor_id());
  16.425 -	}
  16.426 -
  16.427 -	if (!cpus_empty(cpu_mask))
  16.428 -		flush_tlb_others(cpu_mask, mm, va);
  16.429 -
  16.430 -	preempt_enable();
  16.431 -}
  16.432 -
  16.433 -static void do_flush_tlb_all(void* info)
  16.434 -{
  16.435 -	unsigned long cpu = smp_processor_id();
  16.436 -
  16.437 -	__flush_tlb_all();
  16.438 -	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
  16.439 -		leave_mm(cpu);
  16.440 -}
  16.441 -
  16.442 -void flush_tlb_all(void)
  16.443 -{
  16.444 -	on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
  16.445 -}
  16.446 -
  16.447 -/*
  16.448 - * this function sends a 'reschedule' IPI to another CPU.
  16.449 - * it goes straight through and wastes no time serializing
  16.450 - * anything. Worst case is that we lose a reschedule ...
  16.451 - */
  16.452 -void smp_send_reschedule(int cpu)
  16.453 -{
  16.454 -	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
  16.455 -}
  16.456 -
  16.457 -/*
  16.458 - * Structure and data for smp_call_function(). This is designed to minimise
  16.459 - * static memory requirements. It also looks cleaner.
  16.460 - */
  16.461 -static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
  16.462 -
  16.463 -struct call_data_struct {
  16.464 -	void (*func) (void *info);
  16.465 -	void *info;
  16.466 -	atomic_t started;
  16.467 -	atomic_t finished;
  16.468 -	int wait;
  16.469 -};
  16.470 -
  16.471 -static struct call_data_struct * call_data;
  16.472 -
  16.473 -/*
  16.474 - * this function sends a 'generic call function' IPI to all other CPUs
  16.475 - * in the system.
  16.476 - */
  16.477 -
  16.478 -int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
  16.479 -			int wait)
  16.480 -/*
  16.481 - * [SUMMARY] Run a function on all other CPUs.
  16.482 - * <func> The function to run. This must be fast and non-blocking.
  16.483 - * <info> An arbitrary pointer to pass to the function.
  16.484 - * <nonatomic> currently unused.
  16.485 - * <wait> If true, wait (atomically) until function has completed on other CPUs.
  16.486 - * [RETURNS] 0 on success, else a negative status code. Does not return until
  16.487 - * remote CPUs are nearly ready to execute <<func>> or are or have executed.
  16.488 - *
  16.489 - * You must not call this function with disabled interrupts or from a
  16.490 - * hardware interrupt handler or from a bottom half handler.
  16.491 - */
  16.492 -{
  16.493 -	struct call_data_struct data;
  16.494 -	int cpus = num_online_cpus()-1;
  16.495 -
  16.496 -	if (!cpus)
  16.497 -		return 0;
  16.498 -
  16.499 -	/* Can deadlock when called with interrupts disabled */
  16.500 -	WARN_ON(irqs_disabled());
  16.501 -
  16.502 -	data.func = func;
  16.503 -	data.info = info;
  16.504 -	atomic_set(&data.started, 0);
  16.505 -	data.wait = wait;
  16.506 -	if (wait)
  16.507 -		atomic_set(&data.finished, 0);
  16.508 -
  16.509 -	spin_lock(&call_lock);
  16.510 -	call_data = &data;
  16.511 -	mb();
  16.512 -	
  16.513 -	/* Send a message to all other CPUs and wait for them to respond */
  16.514 -	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
  16.515 -
  16.516 -	/* Wait for response */
  16.517 -	while (atomic_read(&data.started) != cpus)
  16.518 -		barrier();
  16.519 -
  16.520 -	if (wait)
  16.521 -		while (atomic_read(&data.finished) != cpus)
  16.522 -			barrier();
  16.523 -	spin_unlock(&call_lock);
  16.524 -
  16.525 -	return 0;
  16.526 -}
  16.527 -
  16.528 -static void stop_this_cpu (void * dummy)
  16.529 -{
  16.530 -	/*
  16.531 -	 * Remove this CPU:
  16.532 -	 */
  16.533 -	cpu_clear(smp_processor_id(), cpu_online_map);
  16.534 -	local_irq_disable();
  16.535 -#if 1
  16.536 -	xxprint("stop_this_cpu disable_local_APIC\n");
  16.537 -#else
  16.538 -	disable_local_APIC();
  16.539 -#endif
  16.540 -	if (cpu_data[smp_processor_id()].hlt_works_ok)
  16.541 -		for(;;) __asm__("hlt");
  16.542 -	for (;;);
  16.543 -}
  16.544 -
  16.545 -/*
  16.546 - * this function calls the 'stop' function on all other CPUs in the system.
  16.547 - */
  16.548 -
  16.549 -void smp_send_stop(void)
  16.550 -{
  16.551 -	smp_call_function(stop_this_cpu, NULL, 1, 0);
  16.552 -
  16.553 -	local_irq_disable();
  16.554 -#if 1
  16.555 -	xxprint("smp_send_stop disable_local_APIC\n");
  16.556 -#else
  16.557 -	disable_local_APIC();
  16.558 -#endif
  16.559 -	local_irq_enable();
  16.560 -}
  16.561 -
  16.562 -/*
  16.563 - * Reschedule call back. Nothing to do,
  16.564 - * all the work is done automatically when
  16.565 - * we return from the interrupt.
  16.566 - */
  16.567 -irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
  16.568 -				     struct pt_regs *regs)
  16.569 -{
  16.570 -
  16.571 -	return IRQ_HANDLED;
  16.572 -}
  16.573 -
  16.574 -#include <linux/kallsyms.h>
  16.575 -irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
  16.576 -					struct pt_regs *regs)
  16.577 -{
  16.578 -	void (*func) (void *info) = call_data->func;
  16.579 -	void *info = call_data->info;
  16.580 -	int wait = call_data->wait;
  16.581 -
  16.582 -	/*
  16.583 -	 * Notify initiating CPU that I've grabbed the data and am
  16.584 -	 * about to execute the function
  16.585 -	 */
  16.586 -	mb();
  16.587 -	atomic_inc(&call_data->started);
  16.588 -	/*
  16.589 -	 * At this point the info structure may be out of scope unless wait==1
  16.590 -	 */
  16.591 -	irq_enter();
  16.592 -	(*func)(info);
  16.593 -	irq_exit();
  16.594 -
  16.595 -	if (wait) {
  16.596 -		mb();
  16.597 -		atomic_inc(&call_data->finished);
  16.598 -	}
  16.599 -
  16.600 -	return IRQ_HANDLED;
  16.601 -}
  16.602 -
    17.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c	Mon Dec 27 10:12:02 2004 +0000
    17.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.3 @@ -1,1364 +0,0 @@
    17.4 -/*
    17.5 - *	x86 SMP booting functions
    17.6 - *
    17.7 - *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
    17.8 - *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
    17.9 - *
   17.10 - *	Much of the core SMP work is based on previous work by Thomas Radke, to
   17.11 - *	whom a great many thanks are extended.
   17.12 - *
   17.13 - *	Thanks to Intel for making available several different Pentium,
   17.14 - *	Pentium Pro and Pentium-II/Xeon MP machines.
   17.15 - *	Original development of Linux SMP code supported by Caldera.
   17.16 - *
   17.17 - *	This code is released under the GNU General Public License version 2 or
   17.18 - *	later.
   17.19 - *
   17.20 - *	Fixes
   17.21 - *		Felix Koop	:	NR_CPUS used properly
   17.22 - *		Jose Renau	:	Handle single CPU case.
   17.23 - *		Alan Cox	:	By repeated request 8) - Total BogoMIPS report.
   17.24 - *		Greg Wright	:	Fix for kernel stacks panic.
   17.25 - *		Erich Boleyn	:	MP v1.4 and additional changes.
   17.26 - *	Matthias Sattler	:	Changes for 2.1 kernel map.
   17.27 - *	Michel Lespinasse	:	Changes for 2.1 kernel map.
   17.28 - *	Michael Chastain	:	Change trampoline.S to gnu as.
   17.29 - *		Alan Cox	:	Dumb bug: 'B' step PPro's are fine
   17.30 - *		Ingo Molnar	:	Added APIC timers, based on code
   17.31 - *					from Jose Renau
   17.32 - *		Ingo Molnar	:	various cleanups and rewrites
   17.33 - *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
   17.34 - *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
   17.35 - *		Martin J. Bligh	: 	Added support for multi-quad systems
   17.36 - *		Dave Jones	:	Report invalid combinations of Athlon CPUs.
   17.37 -*		Rusty Russell	:	Hacked into shape for new "hotplug" boot process. */
   17.38 -
   17.39 -#include <linux/module.h>
   17.40 -#include <linux/config.h>
   17.41 -#include <linux/init.h>
   17.42 -#include <linux/kernel.h>
   17.43 -
   17.44 -#include <linux/mm.h>
   17.45 -#include <linux/sched.h>
   17.46 -#include <linux/kernel_stat.h>
   17.47 -#include <linux/smp_lock.h>
   17.48 -#include <linux/irq.h>
   17.49 -#include <linux/bootmem.h>
   17.50 -
   17.51 -#include <linux/delay.h>
   17.52 -#include <linux/mc146818rtc.h>
   17.53 -#include <asm/tlbflush.h>
   17.54 -#include <asm/desc.h>
   17.55 -#include <asm/arch_hooks.h>
   17.56 -
   17.57 -#if 1
   17.58 -#define Dprintk(args...)
   17.59 -#else
   17.60 -#include <mach_apic.h>
   17.61 -#endif
   17.62 -#include <mach_wakecpu.h>
   17.63 -#include <smpboot_hooks.h>
   17.64 -
   17.65 -/* Set if we find a B stepping CPU */
   17.66 -static int __initdata smp_b_stepping;
   17.67 -
   17.68 -/* Number of siblings per CPU package */
   17.69 -int smp_num_siblings = 1;
   17.70 -int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
   17.71 -
   17.72 -/* bitmap of online cpus */
   17.73 -cpumask_t cpu_online_map;
   17.74 -
   17.75 -static cpumask_t cpu_callin_map;
   17.76 -cpumask_t cpu_callout_map;
   17.77 -static cpumask_t smp_commenced_mask;
   17.78 -
   17.79 -/* Per CPU bogomips and other parameters */
   17.80 -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
   17.81 -
   17.82 -u8 x86_cpu_to_apicid[NR_CPUS] =
   17.83 -			{ [0 ... NR_CPUS-1] = 0xff };
   17.84 -EXPORT_SYMBOL(x86_cpu_to_apicid);
   17.85 -
   17.86 -/* Set when the idlers are all forked */
   17.87 -int smp_threads_ready;
   17.88 -
   17.89 -#if 0
   17.90 -/*
   17.91 - * Trampoline 80x86 program as an array.
   17.92 - */
   17.93 -
   17.94 -extern unsigned char trampoline_data [];
   17.95 -extern unsigned char trampoline_end  [];
   17.96 -static unsigned char *trampoline_base;
   17.97 -static int trampoline_exec;
   17.98 -
   17.99 -/*
  17.100 - * Currently trivial. Write the real->protected mode
  17.101 - * bootstrap into the page concerned. The caller
  17.102 - * has made sure it's suitably aligned.
  17.103 - */
  17.104 -
  17.105 -static unsigned long __init setup_trampoline(void)
  17.106 -{
  17.107 -	memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
  17.108 -	return virt_to_phys(trampoline_base);
  17.109 -}
  17.110 -#endif
  17.111 -
  17.112 -/*
  17.113 - * We are called very early to get the low memory for the
  17.114 - * SMP bootup trampoline page.
  17.115 - */
  17.116 -void __init smp_alloc_memory(void)
  17.117 -{
  17.118 -#if 1
  17.119 -	int cpu;
  17.120 -
  17.121 -	for (cpu = 1; cpu < NR_CPUS; cpu++) {
  17.122 -		cpu_gdt_descr[cpu].address = (unsigned long)
  17.123 -			alloc_bootmem_low_pages(PAGE_SIZE);
  17.124 -		/* XXX free unused pages later */
  17.125 -	}
  17.126 -#else
  17.127 -	trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
  17.128 -	/*
  17.129 -	 * Has to be in very low memory so we can execute
  17.130 -	 * real-mode AP code.
  17.131 -	 */
  17.132 -	if (__pa(trampoline_base) >= 0x9F000)
  17.133 -		BUG();
  17.134 -	/*
  17.135 -	 * Make the SMP trampoline executable:
  17.136 -	 */
  17.137 -	trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
  17.138 -#endif
  17.139 -}
  17.140 -
  17.141 -/*
  17.142 - * The bootstrap kernel entry code has set these up. Save them for
  17.143 - * a given CPU
  17.144 - */
  17.145 -
  17.146 -static void __init smp_store_cpu_info(int id)
  17.147 -{
  17.148 -	struct cpuinfo_x86 *c = cpu_data + id;
  17.149 -
  17.150 -	*c = boot_cpu_data;
  17.151 -	if (id!=0)
  17.152 -		identify_cpu(c);
  17.153 -	/*
  17.154 -	 * Mask B, Pentium, but not Pentium MMX
  17.155 -	 */
  17.156 -	if (c->x86_vendor == X86_VENDOR_INTEL &&
  17.157 -	    c->x86 == 5 &&
  17.158 -	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
  17.159 -	    c->x86_model <= 3)
  17.160 -		/*
  17.161 -		 * Remember we have B step Pentia with bugs
  17.162 -		 */
  17.163 -		smp_b_stepping = 1;
  17.164 -
  17.165 -	/*
  17.166 -	 * Certain Athlons might work (for various values of 'work') in SMP
  17.167 -	 * but they are not certified as MP capable.
  17.168 -	 */
  17.169 -	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
  17.170 -
  17.171 -		/* Athlon 660/661 is valid. */	
  17.172 -		if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
  17.173 -			goto valid_k7;
  17.174 -
  17.175 -		/* Duron 670 is valid */
  17.176 -		if ((c->x86_model==7) && (c->x86_mask==0))
  17.177 -			goto valid_k7;
  17.178 -
  17.179 -		/*
  17.180 -		 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
  17.181 -		 * It's worth noting that the A5 stepping (662) of some Athlon XP's
  17.182 -		 * have the MP bit set.
  17.183 -		 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
  17.184 -		 */
  17.185 -		if (((c->x86_model==6) && (c->x86_mask>=2)) ||
  17.186 -		    ((c->x86_model==7) && (c->x86_mask>=1)) ||
  17.187 -		     (c->x86_model> 7))
  17.188 -			if (cpu_has_mp)
  17.189 -				goto valid_k7;
  17.190 -
  17.191 -		/* If we get here, it's not a certified SMP capable AMD system. */
  17.192 -		tainted |= TAINT_UNSAFE_SMP;
  17.193 -	}
  17.194 -
  17.195 -valid_k7:
  17.196 -	;
  17.197 -}
  17.198 -
  17.199 -#if 0
  17.200 -/*
  17.201 - * TSC synchronization.
  17.202 - *
  17.203 - * We first check whether all CPUs have their TSC's synchronized,
  17.204 - * then we print a warning if not, and always resync.
  17.205 - */
  17.206 -
  17.207 -static atomic_t tsc_start_flag = ATOMIC_INIT(0);
  17.208 -static atomic_t tsc_count_start = ATOMIC_INIT(0);
  17.209 -static atomic_t tsc_count_stop = ATOMIC_INIT(0);
  17.210 -static unsigned long long tsc_values[NR_CPUS];
  17.211 -
  17.212 -#define NR_LOOPS 5
  17.213 -
  17.214 -static void __init synchronize_tsc_bp (void)
  17.215 -{
  17.216 -	int i;
  17.217 -	unsigned long long t0;
  17.218 -	unsigned long long sum, avg;
  17.219 -	long long delta;
  17.220 -	unsigned long one_usec;
  17.221 -	int buggy = 0;
  17.222 -
  17.223 -	printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
  17.224 -
  17.225 -	/* convert from kcyc/sec to cyc/usec */
  17.226 -	one_usec = cpu_khz / 1000;
  17.227 -
  17.228 -	atomic_set(&tsc_start_flag, 1);
  17.229 -	wmb();
  17.230 -
  17.231 -	/*
  17.232 -	 * We loop a few times to get a primed instruction cache,
  17.233 -	 * then the last pass is more or less synchronized and
  17.234 -	 * the BP and APs set their cycle counters to zero all at
  17.235 -	 * once. This reduces the chance of having random offsets
  17.236 -	 * between the processors, and guarantees that the maximum
  17.237 -	 * delay between the cycle counters is never bigger than
  17.238 -	 * the latency of information-passing (cachelines) between
  17.239 -	 * two CPUs.
  17.240 -	 */
  17.241 -	for (i = 0; i < NR_LOOPS; i++) {
  17.242 -		/*
  17.243 -		 * all APs synchronize but they loop on '== num_cpus'
  17.244 -		 */
  17.245 -		while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
  17.246 -			mb();
  17.247 -		atomic_set(&tsc_count_stop, 0);
  17.248 -		wmb();
  17.249 -		/*
  17.250 -		 * this lets the APs save their current TSC:
  17.251 -		 */
  17.252 -		atomic_inc(&tsc_count_start);
  17.253 -
  17.254 -		rdtscll(tsc_values[smp_processor_id()]);
  17.255 -		/*
  17.256 -		 * We clear the TSC in the last loop:
  17.257 -		 */
  17.258 -		if (i == NR_LOOPS-1)
  17.259 -			write_tsc(0, 0);
  17.260 -
  17.261 -		/*
  17.262 -		 * Wait for all APs to leave the synchronization point:
  17.263 -		 */
  17.264 -		while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
  17.265 -			mb();
  17.266 -		atomic_set(&tsc_count_start, 0);
  17.267 -		wmb();
  17.268 -		atomic_inc(&tsc_count_stop);
  17.269 -	}
  17.270 -
  17.271 -	sum = 0;
  17.272 -	for (i = 0; i < NR_CPUS; i++) {
  17.273 -		if (cpu_isset(i, cpu_callout_map)) {
  17.274 -			t0 = tsc_values[i];
  17.275 -			sum += t0;
  17.276 -		}
  17.277 -	}
  17.278 -	avg = sum;
  17.279 -	do_div(avg, num_booting_cpus());
  17.280 -
  17.281 -	sum = 0;
  17.282 -	for (i = 0; i < NR_CPUS; i++) {
  17.283 -		if (!cpu_isset(i, cpu_callout_map))
  17.284 -			continue;
  17.285 -		delta = tsc_values[i] - avg;
  17.286 -		if (delta < 0)
  17.287 -			delta = -delta;
  17.288 -		/*
  17.289 -		 * We report bigger than 2 microseconds clock differences.
  17.290 -		 */
  17.291 -		if (delta > 2*one_usec) {
  17.292 -			long realdelta;
  17.293 -			if (!buggy) {
  17.294 -				buggy = 1;
  17.295 -				printk("\n");
  17.296 -			}
  17.297 -			realdelta = delta;
  17.298 -			do_div(realdelta, one_usec);
  17.299 -			if (tsc_values[i] < avg)
  17.300 -				realdelta = -realdelta;
  17.301 -
  17.302 -			printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
  17.303 -		}
  17.304 -
  17.305 -		sum += delta;
  17.306 -	}
  17.307 -	if (!buggy)
  17.308 -		printk("passed.\n");
  17.309 -}
  17.310 -
  17.311 -static void __init synchronize_tsc_ap (void)
  17.312 -{
  17.313 -	int i;
  17.314 -
  17.315 -	/*
  17.316 -	 * Not every cpu is online at the time
  17.317 -	 * this gets called, so we first wait for the BP to
  17.318 -	 * finish SMP initialization:
  17.319 -	 */
  17.320 -	while (!atomic_read(&tsc_start_flag)) mb();
  17.321 -
  17.322 -	for (i = 0; i < NR_LOOPS; i++) {
  17.323 -		atomic_inc(&tsc_count_start);
  17.324 -		while (atomic_read(&tsc_count_start) != num_booting_cpus())
  17.325 -			mb();
  17.326 -
  17.327 -		rdtscll(tsc_values[smp_processor_id()]);
  17.328 -		if (i == NR_LOOPS-1)
  17.329 -			write_tsc(0, 0);
  17.330 -
  17.331 -		atomic_inc(&tsc_count_stop);
  17.332 -		while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
  17.333 -	}
  17.334 -}
  17.335 -#undef NR_LOOPS
  17.336 -#endif
  17.337 -
  17.338 -extern void calibrate_delay(void);
  17.339 -
  17.340 -static atomic_t init_deasserted;
  17.341 -
  17.342 -void __init smp_callin(void)
  17.343 -{
  17.344 -	int cpuid, phys_id;
  17.345 -	unsigned long timeout;
  17.346 -
  17.347 -#if 0
  17.348 -	/*
  17.349 -	 * If waken up by an INIT in an 82489DX configuration
  17.350 -	 * we may get here before an INIT-deassert IPI reaches
  17.351 -	 * our local APIC.  We have to wait for the IPI or we'll
  17.352 -	 * lock up on an APIC access.
  17.353 -	 */
  17.354 -	wait_for_init_deassert(&init_deasserted);
  17.355 -#endif
  17.356 -
  17.357 -	/*
  17.358 -	 * (This works even if the APIC is not enabled.)
  17.359 -	 */
  17.360 -	phys_id = smp_processor_id();
  17.361 -	cpuid = smp_processor_id();
  17.362 -	if (cpu_isset(cpuid, cpu_callin_map)) {
  17.363 -		printk("huh, phys CPU#%d, CPU#%d already present??\n",
  17.364 -					phys_id, cpuid);
  17.365 -		BUG();
  17.366 -	}
  17.367 -	Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
  17.368 -
  17.369 -	/*
  17.370 -	 * STARTUP IPIs are fragile beasts as they might sometimes
  17.371 -	 * trigger some glue motherboard logic. Complete APIC bus
  17.372 -	 * silence for 1 second, this overestimates the time the
  17.373 -	 * boot CPU is spending to send the up to 2 STARTUP IPIs
  17.374 -	 * by a factor of two. This should be enough.
  17.375 -	 */
  17.376 -
  17.377 -	/*
  17.378 -	 * Waiting 2s total for startup (udelay is not yet working)
  17.379 -	 */
  17.380 -	timeout = jiffies + 2*HZ;
  17.381 -	while (time_before(jiffies, timeout)) {
  17.382 -		/*
  17.383 -		 * Has the boot CPU finished it's STARTUP sequence?
  17.384 -		 */
  17.385 -		if (cpu_isset(cpuid, cpu_callout_map))
  17.386 -			break;
  17.387 -		rep_nop();
  17.388 -	}
  17.389 -
  17.390 -	if (!time_before(jiffies, timeout)) {
  17.391 -		printk("BUG: CPU%d started up but did not get a callout!\n",
  17.392 -			cpuid);
  17.393 -		BUG();
  17.394 -	}
  17.395 -
  17.396 -#if 0
  17.397 -	/*
  17.398 -	 * the boot CPU has finished the init stage and is spinning
  17.399 -	 * on callin_map until we finish. We are free to set up this
  17.400 -	 * CPU, first the APIC. (this is probably redundant on most
  17.401 -	 * boards)
  17.402 -	 */
  17.403 -
  17.404 -	Dprintk("CALLIN, before setup_local_APIC().\n");
  17.405 -	smp_callin_clear_local_apic();
  17.406 -	setup_local_APIC();
  17.407 -#endif
  17.408 -	map_cpu_to_logical_apicid();
  17.409 -
  17.410 -	local_irq_enable();
  17.411 -
  17.412 -	/*
  17.413 -	 * Get our bogomips.
  17.414 -	 */
  17.415 -	calibrate_delay();
  17.416 -	Dprintk("Stack at about %p\n",&cpuid);
  17.417 -
  17.418 -	/*
  17.419 -	 * Save our processor parameters
  17.420 -	 */
  17.421 - 	smp_store_cpu_info(cpuid);
  17.422 -
  17.423 -#if 0
  17.424 -	disable_APIC_timer();
  17.425 -#endif
  17.426 -	local_irq_disable();
  17.427 -	/*
  17.428 -	 * Allow the master to continue.
  17.429 -	 */
  17.430 -	cpu_set(cpuid, cpu_callin_map);
  17.431 -
  17.432 -#if 0
  17.433 -	/*
  17.434 -	 *      Synchronize the TSC with the BP
  17.435 -	 */
  17.436 -	if (cpu_has_tsc && cpu_khz)
  17.437 -		synchronize_tsc_ap();
  17.438 -#endif
  17.439 -}
  17.440 -
  17.441 -int cpucount;
  17.442 -
  17.443 -extern int cpu_idle(void);
  17.444 -
  17.445 -
  17.446 -static irqreturn_t local_debug_interrupt(int irq, void *dev_id,
  17.447 -					 struct pt_regs *regs)
  17.448 -{
  17.449 -
  17.450 -	return IRQ_HANDLED;
  17.451 -}
  17.452 -
  17.453 -static struct irqaction local_irq_debug = {
  17.454 -	local_debug_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "ldebug",
  17.455 -	NULL, NULL
  17.456 -};
  17.457 -
  17.458 -void local_setup_debug(void)
  17.459 -{
  17.460 -	(void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &local_irq_debug);
  17.461 -}
  17.462 -
  17.463 -
  17.464 -extern void local_setup_timer(void);
  17.465 -
  17.466 -/*
  17.467 - * Activate a secondary processor.
  17.468 - */
  17.469 -int __init start_secondary(void *unused)
  17.470 -{
  17.471 -	/*
  17.472 -	 * Dont put anything before smp_callin(), SMP
  17.473 -	 * booting is too fragile that we want to limit the
  17.474 -	 * things done here to the most necessary things.
  17.475 -	 */
  17.476 -	cpu_init();
  17.477 -	smp_callin();
  17.478 -	while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
  17.479 -		rep_nop();
  17.480 -	local_setup_timer();
  17.481 -	local_setup_debug();	/* XXX */
  17.482 -	smp_intr_init();
  17.483 -	local_irq_enable();
  17.484 -	/*
  17.485 -	 * low-memory mappings have been cleared, flush them from
  17.486 -	 * the local TLBs too.
  17.487 -	 */
  17.488 -	local_flush_tlb();
  17.489 -	cpu_set(smp_processor_id(), cpu_online_map);
  17.490 -	wmb();
  17.491 -	if (0) {
  17.492 -		char *msg2 = "delay2\n";
  17.493 -		int timeout;
  17.494 -		for (timeout = 0; timeout < 50000; timeout++) {
  17.495 -			udelay(1000);
  17.496 -			if (timeout == 2000) {
  17.497 -				(void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
  17.498 -				timeout = 0;
  17.499 -			}
  17.500 -		}
  17.501 -	}
  17.502 -	return cpu_idle();
  17.503 -}
  17.504 -
  17.505 -/*
  17.506 - * Everything has been set up for the secondary
  17.507 - * CPUs - they just need to reload everything
  17.508 - * from the task structure
  17.509 - * This function must not return.
  17.510 - */
  17.511 -void __init initialize_secondary(void)
  17.512 -{
  17.513 -	/*
  17.514 -	 * We don't actually need to load the full TSS,
  17.515 -	 * basically just the stack pointer and the eip.
  17.516 -	 */
  17.517 -
  17.518 -	asm volatile(
  17.519 -		"movl %0,%%esp\n\t"
  17.520 -		"jmp *%1"
  17.521 -		:
  17.522 -		:"r" (current->thread.esp),"r" (current->thread.eip));
  17.523 -}
  17.524 -
  17.525 -extern struct {
  17.526 -	void * esp;
  17.527 -	unsigned short ss;
  17.528 -} stack_start;
  17.529 -
  17.530 -#ifdef CONFIG_NUMA
  17.531 -
  17.532 -/* which logical CPUs are on which nodes */
  17.533 -cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
  17.534 -				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
  17.535 -/* which node each logical CPU is on */
  17.536 -int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
  17.537 -EXPORT_SYMBOL(cpu_2_node);
  17.538 -
  17.539 -/* set up a mapping between cpu and node. */
  17.540 -static inline void map_cpu_to_node(int cpu, int node)
  17.541 -{
  17.542 -	printk("Mapping cpu %d to node %d\n", cpu, node);
  17.543 -	cpu_set(cpu, node_2_cpu_mask[node]);
  17.544 -	cpu_2_node[cpu] = node;
  17.545 -}
  17.546 -
  17.547 -/* undo a mapping between cpu and node. */
  17.548 -static inline void unmap_cpu_to_node(int cpu)
  17.549 -{
  17.550 -	int node;
  17.551 -
  17.552 -	printk("Unmapping cpu %d from all nodes\n", cpu);
  17.553 -	for (node = 0; node < MAX_NUMNODES; node ++)
  17.554 -		cpu_clear(cpu, node_2_cpu_mask[node]);
  17.555 -	cpu_2_node[cpu] = 0;
  17.556 -}
  17.557 -#else /* !CONFIG_NUMA */
  17.558 -
  17.559 -#define map_cpu_to_node(cpu, node)	({})
  17.560 -#define unmap_cpu_to_node(cpu)	({})
  17.561 -
  17.562 -#endif /* CONFIG_NUMA */
  17.563 -
  17.564 -u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
  17.565 -
  17.566 -void map_cpu_to_logical_apicid(void)
  17.567 -{
  17.568 -	int cpu = smp_processor_id();
  17.569 -	int apicid = smp_processor_id();
  17.570 -
  17.571 -	cpu_2_logical_apicid[cpu] = apicid;
  17.572 -	map_cpu_to_node(cpu, apicid_to_node(apicid));
  17.573 -}
  17.574 -
  17.575 -void unmap_cpu_to_logical_apicid(int cpu)
  17.576 -{
  17.577 -	cpu_2_logical_apicid[cpu] = BAD_APICID;
  17.578 -	unmap_cpu_to_node(cpu);
  17.579 -}
  17.580 -
  17.581 -#if APIC_DEBUG
  17.582 -static inline void __inquire_remote_apic(int apicid)
  17.583 -{
  17.584 -	int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
  17.585 -	char *names[] = { "ID", "VERSION", "SPIV" };
  17.586 -	int timeout, status;
  17.587 -
  17.588 -	printk("Inquiring remote APIC #%d...\n", apicid);
  17.589 -
  17.590 -	for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
  17.591 -		printk("... APIC #%d %s: ", apicid, names[i]);
  17.592 -
  17.593 -		/*
  17.594 -		 * Wait for idle.
  17.595 -		 */
  17.596 -		apic_wait_icr_idle();
  17.597 -
  17.598 -		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
  17.599 -		apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
  17.600 -
  17.601 -		timeout = 0;
  17.602 -		do {
  17.603 -			udelay(100);
  17.604 -			status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
  17.605 -		} while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
  17.606 -
  17.607 -		switch (status) {
  17.608 -		case APIC_ICR_RR_VALID:
  17.609 -			status = apic_read(APIC_RRR);
  17.610 -			printk("%08x\n", status);
  17.611 -			break;
  17.612 -		default:
  17.613 -			printk("failed\n");
  17.614 -		}
  17.615 -	}
  17.616 -}
  17.617 -#endif
  17.618 -
  17.619 -#if 0
  17.620 -#ifdef WAKE_SECONDARY_VIA_NMI
  17.621 -/* 
  17.622 - * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
  17.623 - * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
  17.624 - * won't ... remember to clear down the APIC, etc later.
  17.625 - */
  17.626 -static int __init
  17.627 -wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
  17.628 -{
  17.629 -	unsigned long send_status = 0, accept_status = 0;
  17.630 -	int timeout, maxlvt;
  17.631 -
  17.632 -	/* Target chip */
  17.633 -	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
  17.634 -
  17.635 -	/* Boot on the stack */
  17.636 -	/* Kick the second */
  17.637 -	apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
  17.638 -
  17.639 -	Dprintk("Waiting for send to finish...\n");
  17.640 -	timeout = 0;
  17.641 -	do {
  17.642 -		Dprintk("+");
  17.643 -		udelay(100);
  17.644 -		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
  17.645 -	} while (send_status && (timeout++ < 1000));
  17.646 -
  17.647 -	/*
  17.648 -	 * Give the other CPU some time to accept the IPI.
  17.649 -	 */
  17.650 -	udelay(200);
  17.651 -	/*
  17.652 -	 * Due to the Pentium erratum 3AP.
  17.653 -	 */
  17.654 -	maxlvt = get_maxlvt();
  17.655 -	if (maxlvt > 3) {
  17.656 -		apic_read_around(APIC_SPIV);
  17.657 -		apic_write(APIC_ESR, 0);
  17.658 -	}
  17.659 -	accept_status = (apic_read(APIC_ESR) & 0xEF);
  17.660 -	Dprintk("NMI sent.\n");
  17.661 -
  17.662 -	if (send_status)
  17.663 -		printk("APIC never delivered???\n");
  17.664 -	if (accept_status)
  17.665 -		printk("APIC delivery error (%lx).\n", accept_status);
  17.666 -
  17.667 -	return (send_status | accept_status);
  17.668 -}
  17.669 -#endif	/* WAKE_SECONDARY_VIA_NMI */
  17.670 -
  17.671 -#ifdef WAKE_SECONDARY_VIA_INIT
  17.672 -static int __init
  17.673 -wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
  17.674 -{
  17.675 -	unsigned long send_status = 0, accept_status = 0;
  17.676 -	int maxlvt, timeout, num_starts, j;
  17.677 -
  17.678 -	/*
  17.679 -	 * Be paranoid about clearing APIC errors.
  17.680 -	 */
  17.681 -	if (APIC_INTEGRATED(apic_version[phys_apicid])) {
  17.682 -		apic_read_around(APIC_SPIV);
  17.683 -		apic_write(APIC_ESR, 0);
  17.684 -		apic_read(APIC_ESR);
  17.685 -	}
  17.686 -
  17.687 -	Dprintk("Asserting INIT.\n");
  17.688 -
  17.689 -	/*
  17.690 -	 * Turn INIT on target chip
  17.691 -	 */
  17.692 -	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
  17.693 -
  17.694 -	/*
  17.695 -	 * Send IPI
  17.696 -	 */
  17.697 -	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
  17.698 -				| APIC_DM_INIT);
  17.699 -
  17.700 -	Dprintk("Waiting for send to finish...\n");
  17.701 -	timeout = 0;
  17.702 -	do {
  17.703 -		Dprintk("+");
  17.704 -		udelay(100);
  17.705 -		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
  17.706 -	} while (send_status && (timeout++ < 1000));
  17.707 -
  17.708 -	mdelay(10);
  17.709 -
  17.710 -	Dprintk("Deasserting INIT.\n");
  17.711 -
  17.712 -	/* Target chip */
  17.713 -	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
  17.714 -
  17.715 -	/* Send IPI */
  17.716 -	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
  17.717 -
  17.718 -	Dprintk("Waiting for send to finish...\n");
  17.719 -	timeout = 0;
  17.720 -	do {
  17.721 -		Dprintk("+");
  17.722 -		udelay(100);
  17.723 -		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
  17.724 -	} while (send_status && (timeout++ < 1000));
  17.725 -
  17.726 -	atomic_set(&init_deasserted, 1);
  17.727 -
  17.728 -	/*
  17.729 -	 * Should we send STARTUP IPIs ?
  17.730 -	 *
  17.731 -	 * Determine this based on the APIC version.
  17.732 -	 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
  17.733 -	 */
  17.734 -	if (APIC_INTEGRATED(apic_version[phys_apicid]))
  17.735 -		num_starts = 2;
  17.736 -	else
  17.737 -		num_starts = 0;
  17.738 -
  17.739 -	/*
  17.740 -	 * Run STARTUP IPI loop.
  17.741 -	 */
  17.742 -	Dprintk("#startup loops: %d.\n", num_starts);
  17.743 -
  17.744 -	maxlvt = get_maxlvt();
  17.745 -
  17.746 -	for (j = 1; j <= num_starts; j++) {
  17.747 -		Dprintk("Sending STARTUP #%d.\n",j);
  17.748 -		apic_read_around(APIC_SPIV);
  17.749 -		apic_write(APIC_ESR, 0);
  17.750 -		apic_read(APIC_ESR);
  17.751 -		Dprintk("After apic_write.\n");
  17.752 -
  17.753 -		/*
  17.754 -		 * STARTUP IPI
  17.755 -		 */
  17.756 -
  17.757 -		/* Target chip */
  17.758 -		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
  17.759 -
  17.760 -		/* Boot on the stack */
  17.761 -		/* Kick the second */
  17.762 -		apic_write_around(APIC_ICR, APIC_DM_STARTUP
  17.763 -					| (start_eip >> 12));
  17.764 -
  17.765 -		/*
  17.766 -		 * Give the other CPU some time to accept the IPI.
  17.767 -		 */
  17.768 -		udelay(300);
  17.769 -
  17.770 -		Dprintk("Startup point 1.\n");
  17.771 -
  17.772 -		Dprintk("Waiting for send to finish...\n");
  17.773 -		timeout = 0;
  17.774 -		do {
  17.775 -			Dprintk("+");
  17.776 -			udelay(100);
  17.777 -			send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
  17.778 -		} while (send_status && (timeout++ < 1000));
  17.779 -
  17.780 -		/*
  17.781 -		 * Give the other CPU some time to accept the IPI.
  17.782 -		 */
  17.783 -		udelay(200);
  17.784 -		/*
  17.785 -		 * Due to the Pentium erratum 3AP.
  17.786 -		 */
  17.787 -		if (maxlvt > 3) {
  17.788 -			apic_read_around(APIC_SPIV);
  17.789 -			apic_write(APIC_ESR, 0);
  17.790 -		}
  17.791 -		accept_status = (apic_read(APIC_ESR) & 0xEF);
  17.792 -		if (send_status || accept_status)
  17.793 -			break;
  17.794 -	}
  17.795 -	Dprintk("After Startup.\n");
  17.796 -
  17.797 -	if (send_status)
  17.798 -		printk("APIC never delivered???\n");
  17.799 -	if (accept_status)
  17.800 -		printk("APIC delivery error (%lx).\n", accept_status);
  17.801 -
  17.802 -	return (send_status | accept_status);
  17.803 -}
  17.804 -#endif	/* WAKE_SECONDARY_VIA_INIT */
  17.805 -#endif
  17.806 -
  17.807 -extern cpumask_t cpu_initialized;
  17.808 -
  17.809 -static int __init do_boot_cpu(int apicid)
  17.810 -/*
  17.811 - * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  17.812 - * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
  17.813 - * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
  17.814 - */
  17.815 -{
  17.816 -	struct task_struct *idle;
  17.817 -	unsigned long boot_error;
  17.818 -	int timeout, cpu;
  17.819 -	unsigned long start_eip;
  17.820 -#if 0
  17.821 -	unsigned short nmi_high = 0, nmi_low = 0;
  17.822 -#endif
  17.823 -	full_execution_context_t ctxt;
  17.824 -	extern void startup_32_smp(void);
  17.825 -	extern void hypervisor_callback(void);
  17.826 -	extern void failsafe_callback(void);
  17.827 -	extern int smp_trap_init(trap_info_t *);
  17.828 -	int i;
  17.829 -
  17.830 -	cpu = ++cpucount;
  17.831 -	/*
  17.832 -	 * We can't use kernel_thread since we must avoid to
  17.833 -	 * reschedule the child.
  17.834 -	 */
  17.835 -	idle = fork_idle(cpu);
  17.836 -	if (IS_ERR(idle))
  17.837 -		panic("failed fork for CPU %d", cpu);
  17.838 -	idle->thread.eip = (unsigned long) start_secondary;
  17.839 -	/* start_eip had better be page-aligned! */
  17.840 -	start_eip = (unsigned long)startup_32_smp;
  17.841 -
  17.842 -	/* So we see what's up   */
  17.843 -	printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
  17.844 -	/* Stack for startup_32 can be just as for start_secondary onwards */
  17.845 -	stack_start.esp = (void *) idle->thread.esp;
  17.846 -
  17.847 -	irq_ctx_init(cpu);
  17.848 -
  17.849 -	/*
  17.850 -	 * This grunge runs the startup process for
  17.851 -	 * the targeted processor.
  17.852 -	 */
  17.853 -
  17.854 -	atomic_set(&init_deasserted, 0);
  17.855 -
  17.856 -#if 1
  17.857 -	if (cpu_gdt_descr[0].size > PAGE_SIZE)
  17.858 -		BUG();
  17.859 -	cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
  17.860 -	memcpy((void *)cpu_gdt_descr[cpu].address,
  17.861 -	       (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
  17.862 -		memset((char *)cpu_gdt_descr[cpu].address +
  17.863 -		       FIRST_RESERVED_GDT_ENTRY * 8, 0,
  17.864 -		       NR_RESERVED_GDT_ENTRIES * 8);
  17.865 -
  17.866 -	memset(&ctxt, 0, sizeof(ctxt));
  17.867 -
  17.868 -	ctxt.cpu_ctxt.ds = __USER_DS;
  17.869 -	ctxt.cpu_ctxt.es = __USER_DS;
  17.870 -	ctxt.cpu_ctxt.fs = 0;
  17.871 -	ctxt.cpu_ctxt.gs = 0;
  17.872 -	ctxt.cpu_ctxt.ss = __KERNEL_DS;
  17.873 -	ctxt.cpu_ctxt.cs = __KERNEL_CS;
  17.874 -	ctxt.cpu_ctxt.eip = start_eip;
  17.875 -	ctxt.cpu_ctxt.esp = idle->thread.esp;
  17.876 -	ctxt.cpu_ctxt.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
  17.877 -
  17.878 -	/* FPU is set up to default initial state. */
  17.879 -	memset(ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
  17.880 -
  17.881 -	/* Virtual IDT is empty at start-of-day. */
  17.882 -	for ( i = 0; i < 256; i++ )
  17.883 -	{
  17.884 -		ctxt.trap_ctxt[i].vector = i;
  17.885 -		ctxt.trap_ctxt[i].cs     = FLAT_GUESTOS_CS;
  17.886 -	}
  17.887 -	ctxt.fast_trap_idx = smp_trap_init(ctxt.trap_ctxt);
  17.888 -
  17.889 -	/* No LDT. */
  17.890 -	ctxt.ldt_ents = 0;
  17.891 -
  17.892 -	{
  17.893 -		unsigned long va;
  17.894 -		int f;
  17.895 -
  17.896 -		for (va = cpu_gdt_descr[cpu].address, f = 0;
  17.897 -		     va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
  17.898 -		     va += PAGE_SIZE, f++) {
  17.899 -			ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
  17.900 -			make_page_readonly((void *)va);
  17.901 -		}
  17.902 -		ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
  17.903 -		flush_page_update_queue();
  17.904 -	}
  17.905 -
  17.906 -	/* Ring 1 stack is the initial stack. */
  17.907 -	ctxt.guestos_ss  = __KERNEL_DS;
  17.908 -	ctxt.guestos_esp = idle->thread.esp;
  17.909 -
  17.910 -	/* Callback handlers. */
  17.911 -	ctxt.event_callback_cs     = __KERNEL_CS;
  17.912 -	ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
  17.913 -	ctxt.failsafe_callback_cs  = __KERNEL_CS;
  17.914 -	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
  17.915 -
  17.916 -	ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir);
  17.917 -
  17.918 -	boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
  17.919 -
  17.920 -	if (!boot_error) {
  17.921 -		/*
  17.922 -		 * allow APs to start initializing.
  17.923 -		 */
  17.924 -		Dprintk("Before Callout %d.\n", cpu);
  17.925 -		cpu_set(cpu, cpu_callout_map);
  17.926 -		Dprintk("After Callout %d.\n", cpu);
  17.927 -
  17.928 -		/*
  17.929 -		 * Wait 5s total for a response
  17.930 -		 */
  17.931 -		for (timeout = 0; timeout < 50000; timeout++) {
  17.932 -			if (cpu_isset(cpu, cpu_callin_map))
  17.933 -				break;	/* It has booted */
  17.934 -			udelay(100);
  17.935 -		}
  17.936 -
  17.937 -		if (cpu_isset(cpu, cpu_callin_map)) {
  17.938 -			/* number CPUs logically, starting from 1 (BSP is 0) */
  17.939 -			Dprintk("OK.\n");
  17.940 -			printk("CPU%d: ", cpu);
  17.941 -			print_cpu_info(&cpu_data[cpu]);
  17.942 -			Dprintk("CPU has booted.\n");
  17.943 -		} else {
  17.944 -			boot_error= 1;
  17.945 -		}
  17.946 -	}
  17.947 -	x86_cpu_to_apicid[cpu] = apicid;
  17.948 -	if (boot_error) {
  17.949 -		/* Try to put things back the way they were before ... */
  17.950 -		unmap_cpu_to_logical_apicid(cpu);
  17.951 -		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
  17.952 -		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
  17.953 -		cpucount--;
  17.954 -	}
  17.955 -
  17.956 -#else
  17.957 -	Dprintk("Setting warm reset code and vector.\n");
  17.958 -
  17.959 -	store_NMI_vector(&nmi_high, &nmi_low);
  17.960 -
  17.961 -	smpboot_setup_warm_reset_vector(start_eip);
  17.962 -
  17.963 -	/*
  17.964 -	 * Starting actual IPI sequence...
  17.965 -	 */
  17.966 -	boot_error = wakeup_secondary_cpu(apicid, start_eip);
  17.967 -
  17.968 -	if (!boot_error) {
  17.969 -		/*
  17.970 -		 * allow APs to start initializing.
  17.971 -		 */
  17.972 -		Dprintk("Before Callout %d.\n", cpu);
  17.973 -		cpu_set(cpu, cpu_callout_map);
  17.974 -		Dprintk("After Callout %d.\n", cpu);
  17.975 -
  17.976 -		/*
  17.977 -		 * Wait 5s total for a response
  17.978 -		 */
  17.979 -		for (timeout = 0; timeout < 50000; timeout++) {
  17.980 -			if (cpu_isset(cpu, cpu_callin_map))
  17.981 -				break;	/* It has booted */
  17.982 -			udelay(100);
  17.983 -		}
  17.984 -
  17.985 -		if (cpu_isset(cpu, cpu_callin_map)) {
  17.986 -			/* number CPUs logically, starting from 1 (BSP is 0) */
  17.987 -			Dprintk("OK.\n");
  17.988 -			printk("CPU%d: ", cpu);
  17.989 -			print_cpu_info(&cpu_data[cpu]);
  17.990 -			Dprintk("CPU has booted.\n");
  17.991 -		} else {
  17.992 -			boot_error= 1;
  17.993 -			if (*((volatile unsigned char *)trampoline_base)
  17.994 -					== 0xA5)
  17.995 -				/* trampoline started but...? */
  17.996 -				printk("Stuck ??\n");
  17.997 -			else
  17.998 -				/* trampoline code not run */
  17.999 -				printk("Not responding.\n");
 17.1000 -			inquire_remote_apic(apicid);
 17.1001 -		}
 17.1002 -	}
 17.1003 -	x86_cpu_to_apicid[cpu] = apicid;
 17.1004 -	if (boot_error) {
 17.1005 -		/* Try to put things back the way they were before ... */
 17.1006 -		unmap_cpu_to_logical_apicid(cpu);
 17.1007 -		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
 17.1008 -		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
 17.1009 -		cpucount--;
 17.1010 -	}
 17.1011 -
 17.1012 -	/* mark "stuck" area as not stuck */
 17.1013 -	*((volatile unsigned long *)trampoline_base) = 0;
 17.1014 -#endif
 17.1015 -
 17.1016 -	return boot_error;
 17.1017 -}
 17.1018 -
 17.1019 -cycles_t cacheflush_time;
 17.1020 -unsigned long cache_decay_ticks;
 17.1021 -
 17.1022 -static void smp_tune_scheduling (void)
 17.1023 -{
 17.1024 -	unsigned long cachesize;       /* kB   */
 17.1025 -	unsigned long bandwidth = 350; /* MB/s */
 17.1026 -	/*
 17.1027 -	 * Rough estimation for SMP scheduling, this is the number of
 17.1028 -	 * cycles it takes for a fully memory-limited process to flush
 17.1029 -	 * the SMP-local cache.
 17.1030 -	 *
 17.1031 -	 * (For a P5 this pretty much means we will choose another idle
 17.1032 -	 *  CPU almost always at wakeup time (this is due to the small
 17.1033 -	 *  L1 cache), on PIIs it's around 50-100 usecs, depending on
 17.1034 -	 *  the cache size)
 17.1035 -	 */
 17.1036 -
 17.1037 -	if (!cpu_khz) {
 17.1038 -		/*
 17.1039 -		 * this basically disables processor-affinity
 17.1040 -		 * scheduling on SMP without a TSC.
 17.1041 -		 */
 17.1042 -		cacheflush_time = 0;
 17.1043 -		return;
 17.1044 -	} else {
 17.1045 -		cachesize = boot_cpu_data.x86_cache_size;
 17.1046 -		if (cachesize == -1) {
 17.1047 -			cachesize = 16; /* Pentiums, 2x8kB cache */
 17.1048 -			bandwidth = 100;
 17.1049 -		}
 17.1050 -
 17.1051 -		cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
 17.1052 -	}
 17.1053 -
 17.1054 -	cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
 17.1055 -
 17.1056 -	printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
 17.1057 -		(long)cacheflush_time/(cpu_khz/1000),
 17.1058 -		((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
 17.1059 -	printk("task migration cache decay timeout: %ld msecs.\n",
 17.1060 -		cache_decay_ticks);
 17.1061 -}
 17.1062 -
 17.1063 -/*
 17.1064 - * Cycle through the processors sending APIC IPIs to boot each.
 17.1065 - */
 17.1066 -
 17.1067 -#if 0
 17.1068 -static int boot_cpu_logical_apicid;
 17.1069 -#endif
 17.1070 -/* Where the IO area was mapped on multiquad, always 0 otherwise */
 17.1071 -void *xquad_portio;
 17.1072 -
 17.1073 -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
 17.1074 -
 17.1075 -static void __init smp_boot_cpus(unsigned int max_cpus)
 17.1076 -{
 17.1077 -	int cpu, kicked;
 17.1078 -	unsigned long bogosum = 0;
 17.1079 -#if 0
 17.1080 -	int apicid, bit;
 17.1081 -#endif
 17.1082 -
 17.1083 -	/*
 17.1084 -	 * Setup boot CPU information
 17.1085 -	 */
 17.1086 -	smp_store_cpu_info(0); /* Final full version of the data */
 17.1087 -	printk("CPU%d: ", 0);
 17.1088 -	print_cpu_info(&cpu_data[0]);
 17.1089 -
 17.1090 -#if 0
 17.1091 -	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
 17.1092 -	boot_cpu_logical_apicid = logical_smp_processor_id();
 17.1093 -	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
 17.1094 -#else
 17.1095 -	// boot_cpu_physical_apicid = 0;
 17.1096 -	// boot_cpu_logical_apicid = 0;
 17.1097 -	x86_cpu_to_apicid[0] = 0;
 17.1098 -#endif
 17.1099 -
 17.1100 -	current_thread_info()->cpu = 0;
 17.1101 -	smp_tune_scheduling();
 17.1102 -	cpus_clear(cpu_sibling_map[0]);
 17.1103 -	cpu_set(0, cpu_sibling_map[0]);
 17.1104 -
 17.1105 -	/*
 17.1106 -	 * If we couldn't find an SMP configuration at boot time,
 17.1107 -	 * get out of here now!
 17.1108 -	 */
 17.1109 -	if (!smp_found_config /* && !acpi_lapic) */) {
 17.1110 -		printk(KERN_NOTICE "SMP motherboard not detected.\n");
 17.1111 -		smpboot_clear_io_apic_irqs();
 17.1112 -#if 0
 17.1113 -		phys_cpu_present_map = physid_mask_of_physid(0);
 17.1114 -		if (APIC_init_uniprocessor())
 17.1115 -			printk(KERN_NOTICE "Local APIC not detected."
 17.1116 -					   " Using dummy APIC emulation.\n");
 17.1117 -#endif
 17.1118 -		map_cpu_to_logical_apicid();
 17.1119 -		return;
 17.1120 -	}
 17.1121 -
 17.1122 -#if 0
 17.1123 -	/*
 17.1124 -	 * Should not be necessary because the MP table should list the boot
 17.1125 -	 * CPU too, but we do it for the sake of robustness anyway.
 17.1126 -	 * Makes no sense to do this check in clustered apic mode, so skip it
 17.1127 -	 */
 17.1128 -	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
 17.1129 -		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
 17.1130 -				boot_cpu_physical_apicid);
 17.1131 -		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
 17.1132 -	}
 17.1133 -
 17.1134 -	/*
 17.1135 -	 * If we couldn't find a local APIC, then get out of here now!
 17.1136 -	 */
 17.1137 -	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
 17.1138 -		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
 17.1139 -			boot_cpu_physical_apicid);
 17.1140 -		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
 17.1141 -		smpboot_clear_io_apic_irqs();
 17.1142 -		phys_cpu_present_map = physid_mask_of_physid(0);
 17.1143 -		return;
 17.1144 -	}
 17.1145 -
 17.1146 -	verify_local_APIC();
 17.1147 -#endif
 17.1148 -
 17.1149 -	/*
 17.1150 -	 * If SMP should be disabled, then really disable it!
 17.1151 -	 */
 17.1152 -	if (!max_cpus) {
 17.1153 -		HYPERVISOR_shared_info->n_vcpu = 1;
 17.1154 -		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
 17.1155 -		smpboot_clear_io_apic_irqs();
 17.1156 -#if 0
 17.1157 -		phys_cpu_present_map = physid_mask_of_physid(0);
 17.1158 -#endif
 17.1159 -		return;
 17.1160 -	}
 17.1161 -
 17.1162 -	smp_intr_init();
 17.1163 -
 17.1164 -#if 0
 17.1165 -	connect_bsp_APIC();
 17.1166 -	setup_local_APIC();
 17.1167 -#endif
 17.1168 -	map_cpu_to_logical_apicid();
 17.1169 -#if 0
 17.1170 -
 17.1171 -
 17.1172 -	setup_portio_remap();
 17.1173 -
 17.1174 -	/*
 17.1175 -	 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
 17.1176 -	 *
 17.1177 -	 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
 17.1178 -	 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
 17.1179 -	 * clustered apic ID.
 17.1180 -	 */
 17.1181 -	Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
 17.1182 -#endif
 17.1183 -	Dprintk("CPU present map: %lx\n",
 17.1184 -		(1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
 17.1185 -
 17.1186 -	kicked = 1;
 17.1187 -	for (cpu = 1; kicked < NR_CPUS &&
 17.1188 -		     cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
 17.1189 -		if (max_cpus <= cpucount+1)
 17.1190 -			continue;
 17.1191 -
 17.1192 -		if (do_boot_cpu(cpu))
 17.1193 -			printk("CPU #%d not responding - cannot use it.\n",
 17.1194 -								cpu);
 17.1195 -		else
 17.1196 -			++kicked;
 17.1197 -	}
 17.1198 -
 17.1199 -#if 0
 17.1200 -	/*
 17.1201 -	 * Cleanup possible dangling ends...
 17.1202 -	 */
 17.1203 -	smpboot_restore_warm_reset_vector();
 17.1204 -#endif
 17.1205 -
 17.1206 -	/*
 17.1207 -	 * Allow the user to impress friends.
 17.1208 -	 */
 17.1209 -	Dprintk("Before bogomips.\n");
 17.1210 -	for (cpu = 0; cpu < NR_CPUS; cpu++)
 17.1211 -		if (cpu_isset(cpu, cpu_callout_map))
 17.1212 -			bogosum += cpu_data[cpu].loops_per_jiffy;
 17.1213 -	printk(KERN_INFO
 17.1214 -		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
 17.1215 -		cpucount+1,
 17.1216 -		bogosum/(500000/HZ),
 17.1217 -		(bogosum/(5000/HZ))%100);
 17.1218 -	
 17.1219 -	Dprintk("Before bogocount - setting activated=1.\n");
 17.1220 -
 17.1221 -	if (smp_b_stepping)
 17.1222 -		printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
 17.1223 -
 17.1224 -	/*
 17.1225 -	 * Don't taint if we are running SMP kernel on a single non-MP
 17.1226 -	 * approved Athlon
 17.1227 -	 */
 17.1228 -	if (tainted & TAINT_UNSAFE_SMP) {
 17.1229 -		if (cpucount)
 17.1230 -			printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
 17.1231 -		else
 17.1232 -			tainted &= ~TAINT_UNSAFE_SMP;
 17.1233 -	}
 17.1234 -
 17.1235 -	Dprintk("Boot done.\n");
 17.1236 -
 17.1237 -	/*
 17.1238 -	 * construct cpu_sibling_map[], so that we can tell sibling CPUs
 17.1239 -	 * efficiently.
 17.1240 -	 */
 17.1241 -	for (cpu = 0; cpu < NR_CPUS; cpu++)
 17.1242 -		cpus_clear(cpu_sibling_map[cpu]);
 17.1243 -
 17.1244 -	for (cpu = 0; cpu < NR_CPUS; cpu++) {
 17.1245 -		int siblings = 0;
 17.1246 -		int i;
 17.1247 -		if (!cpu_isset(cpu, cpu_callout_map))
 17.1248 -			continue;
 17.1249 -
 17.1250 -		if (smp_num_siblings > 1) {
 17.1251 -			for (i = 0; i < NR_CPUS; i++) {
 17.1252 -				if (!cpu_isset(i, cpu_callout_map))
 17.1253 -					continue;
 17.1254 -				if (phys_proc_id[cpu] == phys_proc_id[i]) {
 17.1255 -					siblings++;
 17.1256 -					cpu_set(i, cpu_sibling_map[cpu]);
 17.1257 -				}
 17.1258 -			}
 17.1259 -		} else {
 17.1260 -			siblings++;
 17.1261 -			cpu_set(cpu, cpu_sibling_map[cpu]);
 17.1262 -		}
 17.1263 -
 17.1264 -		if (siblings != smp_num_siblings)
 17.1265 -			printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
 17.1266 -	}
 17.1267 -
 17.1268 -#if 0
 17.1269 -	if (nmi_watchdog == NMI_LOCAL_APIC)
 17.1270 -		check_nmi_watchdog();
 17.1271 -
 17.1272 -	smpboot_setup_io_apic();
 17.1273 -
 17.1274 -	setup_boot_APIC_clock();
 17.1275 -
 17.1276 -	/*
 17.1277 -	 * Synchronize the TSC with the AP
 17.1278 -	 */
 17.1279 -	if (cpu_has_tsc && cpucount && cpu_khz)
 17.1280 -		synchronize_tsc_bp();
 17.1281 -#endif
 17.1282 -}
 17.1283 -
 17.1284 -/* These are wrappers to interface to the new boot process.  Someone
 17.1285 -   who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
 17.1286 -void __init smp_prepare_cpus(unsigned int max_cpus)
 17.1287 -{
 17.1288 -	smp_boot_cpus(max_cpus);
 17.1289 -}
 17.1290 -
 17.1291 -void __devinit smp_prepare_boot_cpu(void)
 17.1292 -{
 17.1293 -	cpu_set(smp_processor_id(), cpu_online_map);
 17.1294 -	cpu_set(smp_processor_id(), cpu_callout_map);
 17.1295 -}
 17.1296 -
 17.1297 -int __devinit __cpu_up(unsigned int cpu)
 17.1298 -{
 17.1299 -	/* This only works at boot for x86.  See "rewrite" above. */
 17.1300 -	if (cpu_isset(cpu, smp_commenced_mask)) {
 17.1301 -		local_irq_enable();
 17.1302 -		return -ENOSYS;
 17.1303 -	}
 17.1304 -
 17.1305 -	/* In case one didn't come up */
 17.1306 -	if (!cpu_isset(cpu, cpu_callin_map)) {
 17.1307 -		local_irq_enable();
 17.1308 -		return -EIO;
 17.1309 -	}
 17.1310 -
 17.1311 -	local_irq_enable();
 17.1312 -	/* Unleash the CPU! */
 17.1313 -	cpu_set(cpu, smp_commenced_mask);
 17.1314 -	while (!cpu_isset(cpu, cpu_online_map))
 17.1315 -		mb();
 17.1316 -	return 0;
 17.1317 -}
 17.1318 -
 17.1319 -void __init smp_cpus_done(unsigned int max_cpus)
 17.1320 -{
 17.1321 -#if 1
 17.1322 -#else
 17.1323 -#ifdef CONFIG_X86_IO_APIC
 17.1324 -	setup_ioapic_dest();
 17.1325 -#endif
 17.1326 -	zap_low_mappings();
 17.1327 -	/*
 17.1328 -	 * Disable executability of the SMP trampoline:
 17.1329 -	 */
 17.1330 -	set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
 17.1331 -#endif
 17.1332 -}
 17.1333 -
 17.1334 -extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
 17.1335 -
 17.1336 -static struct irqaction reschedule_irq = {
 17.1337 -	smp_reschedule_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "reschedule",
 17.1338 -	NULL, NULL
 17.1339 -};
 17.1340 -
 17.1341 -extern irqreturn_t smp_invalidate_interrupt(int, void *, struct pt_regs *);
 17.1342 -
 17.1343 -static struct irqaction invalidate_irq = {
 17.1344 -	smp_invalidate_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "invalidate",
 17.1345 -	NULL, NULL
 17.1346 -};
 17.1347 -
 17.1348 -extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
 17.1349 -
 17.1350 -static struct irqaction call_function_irq = {
 17.1351 -	smp_call_function_interrupt, SA_INTERRUPT, CPU_MASK_NONE,
 17.1352 -	"call_function", NULL, NULL
 17.1353 -};
 17.1354 -
 17.1355 -void __init smp_intr_init(void)
 17.1356 -{
 17.1357 -
 17.1358 -	(void)setup_irq(
 17.1359 -	    bind_ipi_on_cpu_to_irq(smp_processor_id(), RESCHEDULE_VECTOR),
 17.1360 -	    &reschedule_irq);
 17.1361 -	(void)setup_irq(
 17.1362 -	    bind_ipi_on_cpu_to_irq(smp_processor_id(), INVALIDATE_TLB_VECTOR),
 17.1363 -	    &invalidate_irq);
 17.1364 -	(void)setup_irq(
 17.1365 -	    bind_ipi_on_cpu_to_irq(smp_processor_id(), CALL_FUNCTION_VECTOR),
 17.1366 -	    &call_function_irq);
 17.1367 -}
    18.1 --- a/linux-2.6.9-xen-sparse/arch/xen/kernel/smp.c	Mon Dec 27 10:12:02 2004 +0000
    18.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.3 @@ -1,19 +0,0 @@
    18.4 -/* Copyright (C) 2004, Christian Limpach */
    18.5 -
    18.6 -#include <linux/init.h>
    18.7 -#include <linux/kernel.h>
    18.8 -#include <linux/threads.h>
    18.9 -
   18.10 -unsigned int __initdata maxcpus = NR_CPUS;
   18.11 -
   18.12 -
   18.13 -/*
   18.14 - * the frequency of the profiling timer can be changed
   18.15 - * by writing a multiplier value into /proc/profile.
   18.16 - */
   18.17 -int setup_profiling_timer(unsigned int multiplier)
   18.18 -{
   18.19 -	printk("setup_profiling_timer\n");
   18.20 -
   18.21 -	return 0;
   18.22 -}
    19.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/blktap/Makefile	Mon Dec 27 10:12:02 2004 +0000
    19.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.3 @@ -1,3 +0,0 @@
    19.4 -
    19.5 -obj-y	:= blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o 
    19.6 -
    20.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.c	Mon Dec 27 10:12:02 2004 +0000
    20.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.3 @@ -1,86 +0,0 @@
    20.4 -/******************************************************************************
    20.5 - * blktap.c
    20.6 - * 
    20.7 - * XenLinux virtual block-device tap.
    20.8 - * 
    20.9 - * Copyright (c) 2004, Andrew Warfield
   20.10 - *
   20.11 - * Based on the original split block driver:
   20.12 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   20.13 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   20.14 - * Copyright (c) 2004, Christian Limpach
   20.15 - * 
   20.16 - * Note that unlike the split block driver code, this driver has been developed
   20.17 - * strictly for Linux 2.6
   20.18 - */
   20.19 -
   20.20 -#include "blktap.h"
   20.21 -
   20.22 -int __init xlblk_init(void)
   20.23 -{
   20.24 -    ctrl_msg_t               cmsg;
   20.25 -    blkif_fe_driver_status_t fe_st;
   20.26 -    blkif_be_driver_status_t be_st;
   20.27 -
   20.28 -    printk(KERN_INFO "Initialising Xen block tap device\n");
   20.29 -
   20.30 -    DPRINTK("   tap - Backend connection init:\n");
   20.31 -
   20.32 -
   20.33 -    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
   20.34 -                                    CALLBACK_IN_BLOCKING_CONTEXT);
   20.35 -
   20.36 -    /* Send a driver-UP notification to the domain controller. */
   20.37 -    cmsg.type      = CMSG_BLKIF_FE;
   20.38 -    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
   20.39 -    cmsg.length    = sizeof(blkif_fe_driver_status_t);
   20.40 -    fe_st.status   = BLKIF_DRIVER_STATUS_UP;
   20.41 -    memcpy(cmsg.msg, &fe_st, sizeof(fe_st));
   20.42 -    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
   20.43 -
   20.44 -    DPRINTK("   tap - Frontend connection init:\n");
   20.45 -    
   20.46 -    active_reqs_init();
   20.47 -    
   20.48 -    ptfe_blkif.status = DISCONNECTED;
   20.49 -
   20.50 -    (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, 
   20.51 -                                    CALLBACK_IN_BLOCKING_CONTEXT);
   20.52 -
   20.53 -    /* Send a driver-UP notification to the domain controller. */
   20.54 -    cmsg.type      = CMSG_BLKIF_BE;
   20.55 -    cmsg.subtype   = CMSG_BLKIF_BE_DRIVER_STATUS;
   20.56 -    cmsg.length    = sizeof(blkif_be_driver_status_t);
   20.57 -    be_st.status   = BLKIF_DRIVER_STATUS_UP;
   20.58 -    memcpy(cmsg.msg, &be_st, sizeof(be_st));
   20.59 -    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
   20.60 -
   20.61 -    DPRINTK("   tap - Userland channel init:\n");
   20.62 -
   20.63 -    blktap_init();
   20.64 -
   20.65 -    DPRINTK("Blkif tap device initialized.\n");
   20.66 -
   20.67 -    return 0;
   20.68 -}
   20.69 -
   20.70 -void blkdev_suspend(void)
   20.71 -{
   20.72 -}
   20.73 -
   20.74 -void blkdev_resume(void)
   20.75 -{
   20.76 -    ctrl_msg_t               cmsg;
   20.77 -    blkif_fe_driver_status_t st;    
   20.78 -
   20.79 -    /* Send a driver-UP notification to the domain controller. */
   20.80 -    cmsg.type      = CMSG_BLKIF_FE;
   20.81 -    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS;
   20.82 -    cmsg.length    = sizeof(blkif_fe_driver_status_t);
   20.83 -    st.status      = BLKIF_DRIVER_STATUS_UP;
   20.84 -    memcpy(cmsg.msg, &st, sizeof(st));
   20.85 -    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
   20.86 -}
   20.87 -
   20.88 -
   20.89 -__initcall(xlblk_init);
    21.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.h	Mon Dec 27 10:12:02 2004 +0000
    21.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.3 @@ -1,254 +0,0 @@
    21.4 -/*
    21.5 - * blktap.h
    21.6 - * 
    21.7 - * Interfaces for the Xen block tap driver.
    21.8 - * 
    21.9 - * (c) 2004, Andrew Warfield, University of Cambridge
   21.10 - * 
   21.11 - */
   21.12 -
   21.13 -#ifndef __BLKTAP_H__
   21.14 -#define __BLKTAP_H__
   21.15 -
   21.16 -#include <linux/version.h>
   21.17 -#include <linux/blkdev.h>
   21.18 -#include <linux/config.h>
   21.19 -#include <linux/sched.h>
   21.20 -#include <linux/interrupt.h>
   21.21 -#include <asm-xen/ctrl_if.h>
   21.22 -#include <linux/slab.h>
   21.23 -#include <linux/blkdev.h>
   21.24 -#include <asm/io.h>
   21.25 -#include <asm/setup.h>
   21.26 -#include <asm/pgalloc.h>
   21.27 -#include <asm-xen/hypervisor.h>
   21.28 -#include <asm-xen/xen-public/io/blkif.h>
   21.29 -
   21.30 -/* -------[ debug / pretty printing ]--------------------------------- */
   21.31 -
   21.32 -#if 0
   21.33 -#define ASSERT(_p) \
   21.34 -    if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
   21.35 -    __LINE__, __FILE__); *(int*)0=0; }
   21.36 -#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
   21.37 -                           __FILE__ , __LINE__ , ## _a )
   21.38 -#else
   21.39 -#define ASSERT(_p) ((void)0)
   21.40 -#define DPRINTK(_f, _a...) ((void)0)
   21.41 -#endif
   21.42 -
   21.43 -#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
   21.44 -
   21.45 -/* -------[ connection / request tracking ]--------------------------- */
   21.46 -
   21.47 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   21.48 -#define VMALLOC_VMADDR(x) ((unsigned long)(x))
   21.49 -#endif
   21.50 -
   21.51 -extern spinlock_t blkif_io_lock;
   21.52 -
   21.53 -typedef struct blkif_st {
   21.54 -    /* Unique identifier for this interface. */
   21.55 -    domid_t          domid;
   21.56 -    unsigned int     handle;
   21.57 -    /* Physical parameters of the comms window. */
   21.58 -    unsigned long    shmem_frame;
   21.59 -    unsigned int     evtchn;
   21.60 -    int              irq;
   21.61 -    /* Comms information. */
   21.62 -    blkif_ring_t    *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
   21.63 -    BLKIF_RING_IDX     blk_req_cons;  /* Request consumer. */
   21.64 -    BLKIF_RING_IDX     blk_resp_prod; /* Private version of resp. producer. */
   21.65 -    
   21.66 -    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
   21.67 -    /*
   21.68 -     * DISCONNECT response is deferred until pending requests are ack'ed.
   21.69 -     * We therefore need to store the id from the original request.
   21.70 -     */    u8               disconnect_rspid;
   21.71 -    struct blkif_st *hash_next;
   21.72 -    struct list_head blkdev_list;
   21.73 -    spinlock_t       blk_ring_lock;
   21.74 -    atomic_t         refcnt;
   21.75 -    
   21.76 -    struct work_struct work;
   21.77 -} blkif_t;
   21.78 -
   21.79 -typedef struct {
   21.80 -    blkif_t       *blkif;
   21.81 -    unsigned long  id;
   21.82 -    int            nr_pages;
   21.83 -    unsigned long  mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
   21.84 -    unsigned long  virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
   21.85 -    int            next_free;
   21.86 -} active_req_t;
   21.87 -
   21.88 -
   21.89 -/* -------[ block ring structs ]-------------------------------------- */
   21.90 -
   21.91 -/* Types of ring. */
   21.92 -#define BLKIF_REQ_RING_TYPE 1
   21.93 -#define BLKIF_RSP_RING_TYPE 2
   21.94 -
   21.95 -/* generic ring struct. */
   21.96 -typedef struct blkif_generic_ring_struct {
   21.97 -    int type;
   21.98 -} blkif_generic_ring_t;
   21.99 -
  21.100 -/* A requestor's view of a ring. */
  21.101 -typedef struct blkif_req_ring_struct {
  21.102 -
  21.103 -    int type;                    /* Will be BLKIF_REQ_RING_TYPE        */
  21.104 -    BLKIF_RING_IDX req_prod;     /* PRIVATE req_prod index             */
  21.105 -    BLKIF_RING_IDX rsp_cons;     /* Response consumer index            */
  21.106 -    blkif_ring_t *ring;          /* Pointer to shared ring struct      */
  21.107 -
  21.108 -} blkif_req_ring_t;
  21.109 -
  21.110 -#define BLKIF_REQ_RING_INIT { BLKIF_REQ_RING_TYPE, 0, 0, 0 }
  21.111 -
  21.112 -/* A responder's view of a ring. */
  21.113 -typedef struct blkif_rsp_ring_struct {
  21.114 -
  21.115 -    int type;       
  21.116 -    BLKIF_RING_IDX rsp_prod;     /* PRIVATE rsp_prod index             */
  21.117 -    BLKIF_RING_IDX req_cons;     /* Request consumer index             */
  21.118 -    blkif_ring_t *ring;          /* Pointer to shared ring struct      */
  21.119 -
  21.120 -} blkif_rsp_ring_t;
  21.121 -
  21.122 -#define BLKIF_RSP_RING_INIT = { BLKIF_RSP_RING_TYPE, 0, 0, 0 }
  21.123 -
  21.124 -#define RING(a) (blkif_generic_ring_t *)(a)
  21.125 -
  21.126 -inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring);
  21.127 -
  21.128 -
  21.129 -/* -------[ interposition -> character device interface ]------------- */
  21.130 -
  21.131 -/* /dev/xen/blktap resides at device number major=10, minor=200        */ 
  21.132 -#define BLKTAP_MINOR 202
  21.133 -
  21.134 -/* size of the extra VMA area to map in attached pages. */
  21.135 -#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
  21.136 -
  21.137 -/* blktap IOCTLs:                                                      */
  21.138 -#define BLKTAP_IOCTL_KICK_FE         1
  21.139 -#define BLKTAP_IOCTL_KICK_BE         2
  21.140 -#define BLKTAP_IOCTL_SETMODE         3
  21.141 -
  21.142 -/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
  21.143 -#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
  21.144 -#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
  21.145 -#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
  21.146 -#define BLKTAP_MODE_COPY_FE          0x00000004
  21.147 -#define BLKTAP_MODE_COPY_BE          0x00000008
  21.148 -#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
  21.149 -#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
  21.150 -
  21.151 -#define BLKTAP_MODE_INTERPOSE \
  21.152 -           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
  21.153 -
  21.154 -#define BLKTAP_MODE_COPY_BOTH \
  21.155 -           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
  21.156 -
  21.157 -#define BLKTAP_MODE_COPY_BOTH_PAGES \
  21.158 -           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
  21.159 -
  21.160 -static inline int BLKTAP_MODE_VALID(unsigned long arg)
  21.161 -{
  21.162 -    return (
  21.163 -        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
  21.164 -        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
  21.165 -        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
  21.166 -        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
  21.167 -        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
  21.168 -        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
  21.169 -        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
  21.170 -        );
  21.171 -}
  21.172 -
  21.173 -
  21.174 -
  21.175 -/* -------[ Mappings to User VMA ]------------------------------------ */
  21.176 -#define MAX_PENDING_REQS 64
  21.177 -#define BATCH_PER_DOMAIN 16
  21.178 -extern struct vm_area_struct *blktap_vma;
  21.179 -
  21.180 -/* The following are from blkback.c and should probably be put in a
  21.181 - * header and included from there.
  21.182 - * The mmap area described here is where attached data pages eill be mapped.
  21.183 - */
  21.184 - 
  21.185 -extern unsigned long mmap_vstart;
  21.186 -#define MMAP_PAGES_PER_REQUEST \
  21.187 -    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
  21.188 -#define MMAP_PAGES             \
  21.189 -    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
  21.190 -#define MMAP_VADDR(_req,_seg)                        \
  21.191 -    (mmap_vstart +                                   \
  21.192 -     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
  21.193 -     ((_seg) * PAGE_SIZE))
  21.194 -
  21.195 -/* immediately before the mmap area, we have a bunch of pages reserved
  21.196 - * for shared memory rings.
  21.197 - */
  21.198 -
  21.199 -#define RING_PAGES 128 
  21.200 -extern unsigned long rings_vstart;
  21.201 -
  21.202 -/* -------[ Here be globals ]----------------------------------------- */
  21.203 -
  21.204 -extern unsigned long blktap_mode;
  21.205 -
  21.206 -
  21.207 -/* blkif struct, containing ring to FE domain */
  21.208 -extern blkif_t ptfe_blkif; 
  21.209 -
  21.210 -/* Connection to a single backend domain. */
  21.211 -extern blkif_ring_t *blk_ptbe_ring;   /* Ring from the PT to the BE dom    */ 
  21.212 -extern BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */
  21.213 -extern BLKIF_RING_IDX ptbe_req_prod;  /* Private request producer.         */
  21.214 -
  21.215 -/* Rings up to user space. */ 
  21.216 -extern blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT;
  21.217 -extern blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT;
  21.218 -
  21.219 -/* Event channel to backend domain. */
  21.220 -extern unsigned int blkif_ptbe_evtchn;
  21.221 -
  21.222 -/* User ring status... this will soon vanish into a ring struct. */
  21.223 -extern unsigned long blktap_ring_ok;
  21.224 -
  21.225 -/* -------[ ...and function prototypes. ]----------------------------- */
  21.226 -
  21.227 -/* init function for character device interface.                       */
  21.228 -int blktap_init(void);
  21.229 -
  21.230 -/* interfaces to the char driver, passing messages to and from apps.   */
  21.231 -void blktap_kick_user(void);
  21.232 -int blktap_write_to_ring(blkif_request_t *req);
  21.233 -
  21.234 -
  21.235 -/* user ring access functions: */
  21.236 -int blktap_write_fe_ring(blkif_request_t *req);
  21.237 -int blktap_write_be_ring(blkif_response_t *rsp);
  21.238 -int blktap_read_fe_ring(void);
  21.239 -int blktap_read_be_ring(void);
  21.240 -
  21.241 -/* and the helpers they call: */
  21.242 -inline int write_resp_to_fe_ring(blkif_response_t *rsp);
  21.243 -inline void kick_fe_domain(void);
  21.244 -
  21.245 -inline int write_req_to_be_ring(blkif_request_t *req);
  21.246 -inline void kick_be_domain(void);
  21.247 -
  21.248 -/* Interrupt handlers. */
  21.249 -irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
  21.250 -                                  struct pt_regs *ptregs);
  21.251 -irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs);
  21.252 -
  21.253 -/* Control message receiver. */
  21.254 -extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
  21.255 -
  21.256 -#define __BLKINT_H__
  21.257 -#endif
    22.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c	Mon Dec 27 10:12:02 2004 +0000
    22.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.3 @@ -1,358 +0,0 @@
    22.4 -/******************************************************************************
    22.5 - * blktap_controlmsg.c
    22.6 - * 
    22.7 - * XenLinux virtual block-device tap.
    22.8 - * Control interfaces to the frontend and backend drivers.
    22.9 - * 
   22.10 - * Copyright (c) 2004, Andrew Warfield
   22.11 - *
   22.12 - */
   22.13 - 
   22.14 -#include "blktap.h"
   22.15 -
   22.16 -#define BLKIF_STATE_CLOSED       0
   22.17 -#define BLKIF_STATE_DISCONNECTED 1
   22.18 -#define BLKIF_STATE_CONNECTED    2
   22.19 -
   22.20 -static char *blkif_state_name[] = {
   22.21 -    [BLKIF_STATE_CLOSED]       = "closed",
   22.22 -    [BLKIF_STATE_DISCONNECTED] = "disconnected",
   22.23 -    [BLKIF_STATE_CONNECTED]    = "connected",
   22.24 -};
   22.25 -
   22.26 -static char * blkif_status_name[] = {
   22.27 -    [BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
   22.28 -    [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
   22.29 -    [BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
   22.30 -    [BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
   22.31 -};
   22.32 -static unsigned int blkif_pt_state = BLKIF_STATE_CLOSED;
   22.33 -static unsigned blkif_ptbe_irq;
   22.34 -unsigned int blkif_ptbe_evtchn;
   22.35 -
   22.36 -/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
   22.37 -
   22.38 -
   22.39 -void blkif_ptfe_create(blkif_be_create_t *create)
   22.40 -{
   22.41 -    blkif_t      *blkif;
   22.42 -    domid_t       domid  = create->domid;
   22.43 -    unsigned int  handle = create->blkif_handle;
   22.44 -
   22.45 -
   22.46 -    /* May want to store info on the connecting domain here. */
   22.47 -
   22.48 -    DPRINTK("PT got BE_CREATE\n");
   22.49 -    blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */
   22.50 -
   22.51 -    /* blkif struct init code from blkback.c */
   22.52 -    memset(blkif, 0, sizeof(*blkif));
   22.53 -    blkif->domid  = domid;
   22.54 -    blkif->handle = handle;
   22.55 -    blkif->status = DISCONNECTED;    
   22.56 -    spin_lock_init(&blkif->blk_ring_lock);
   22.57 -    atomic_set(&blkif->refcnt, 0);
   22.58 -
   22.59 -    create->status = BLKIF_BE_STATUS_OKAY;
   22.60 -}
   22.61 -
   22.62 -
   22.63 -void blkif_ptfe_destroy(blkif_be_destroy_t *destroy)
   22.64 -{
   22.65 -    /* Clear anything that we initialized above. */
   22.66 -
   22.67 -    DPRINTK("PT got BE_DESTROY\n");
   22.68 -    destroy->status = BLKIF_BE_STATUS_OKAY;
   22.69 -}
   22.70 -
   22.71 -void blkif_ptfe_connect(blkif_be_connect_t *connect)
   22.72 -{
   22.73 -    domid_t       domid  = connect->domid;
   22.74 -    /*unsigned int  handle = connect->blkif_handle;*/
   22.75 -    unsigned int  evtchn = connect->evtchn;
   22.76 -    unsigned long shmem_frame = connect->shmem_frame;
   22.77 -    struct vm_struct *vma;
   22.78 -    pgprot_t      prot;
   22.79 -    int           error;
   22.80 -    blkif_t      *blkif;
   22.81 -
   22.82 -    DPRINTK("PT got BE_CONNECT\n");
   22.83 -
   22.84 -    blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */
   22.85 -
   22.86 -    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
   22.87 -    {
   22.88 -        connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
   22.89 -        return;
   22.90 -    }
   22.91 -
   22.92 -    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
   22.93 -    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
   22.94 -                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
   22.95 -                                    prot, domid);
   22.96 -    if ( error != 0 )
   22.97 -    {
   22.98 -        WPRINTK("BE_CONNECT: error! (%d)\n", error);
   22.99 -        if ( error == -ENOMEM ) 
  22.100 -            connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
  22.101 -        else if ( error == -EFAULT ) {
  22.102 -            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
  22.103 -            WPRINTK("BE_CONNECT: MAPPING error!\n");
  22.104 -        }
  22.105 -        else
  22.106 -            connect->status = BLKIF_BE_STATUS_ERROR;
  22.107 -        vfree(vma->addr);
  22.108 -        return;
  22.109 -    }
  22.110 -
  22.111 -    if ( blkif->status != DISCONNECTED )
  22.112 -    {
  22.113 -        connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
  22.114 -        vfree(vma->addr);
  22.115 -        return;
  22.116 -    }
  22.117 -
  22.118 -    blkif->evtchn        = evtchn;
  22.119 -    blkif->irq           = bind_evtchn_to_irq(evtchn);
  22.120 -    blkif->shmem_frame   = shmem_frame;
  22.121 -    blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
  22.122 -    blkif->status        = CONNECTED;
  22.123 -    /*blkif_get(blkif);*/
  22.124 -
  22.125 -    request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
  22.126 -
  22.127 -    connect->status = BLKIF_BE_STATUS_OKAY;
  22.128 -}
  22.129 -
  22.130 -void blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect)
  22.131 -{
  22.132 -    /*
  22.133 -     * don't actually set the passthrough to disconnected.
  22.134 -     * We just act as a pipe, and defer to the real ends to handle things like
  22.135 -     * recovery.
  22.136 -     */
  22.137 -
  22.138 -    DPRINTK("PT got BE_DISCONNECT\n");
  22.139 -
  22.140 -    disconnect->status = BLKIF_BE_STATUS_OKAY;
  22.141 -    return;
  22.142 -}
  22.143 -
  22.144 -/*-----[ Control Messages to/from Backend VM ]----------------------------*/
  22.145 -
  22.146 -/* Tell the controller to bring up the interface. */
  22.147 -static void blkif_ptbe_send_interface_connect(void)
  22.148 -{
  22.149 -    ctrl_msg_t cmsg = {
  22.150 -        .type    = CMSG_BLKIF_FE,
  22.151 -        .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
  22.152 -        .length  = sizeof(blkif_fe_interface_connect_t),
  22.153 -    };
  22.154 -    blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
  22.155 -    msg->handle      = 0;
  22.156 -    msg->shmem_frame = virt_to_machine(blk_ptbe_ring) >> PAGE_SHIFT;
  22.157 -    
  22.158 -    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
  22.159 -}
  22.160 -
  22.161 -static void blkif_ptbe_close(void)
  22.162 -{
  22.163 -}
  22.164 -
  22.165 -/* Move from CLOSED to DISCONNECTED state. */
  22.166 -static void blkif_ptbe_disconnect(void)
  22.167 -{
  22.168 -    blk_ptbe_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
  22.169 -    blk_ptbe_ring->req_prod = blk_ptbe_ring->resp_prod 
  22.170 -                            = ptbe_resp_cons = ptbe_req_prod = 0;
  22.171 -    blkif_pt_state  = BLKIF_STATE_DISCONNECTED;
  22.172 -    DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n");
  22.173 -    blkif_ptbe_send_interface_connect();
  22.174 -}
  22.175 -
  22.176 -static void blkif_ptbe_connect(blkif_fe_interface_status_t *status)
  22.177 -{
  22.178 -    int err = 0;
  22.179 -    
  22.180 -    blkif_ptbe_evtchn = status->evtchn;
  22.181 -    blkif_ptbe_irq    = bind_evtchn_to_irq(blkif_ptbe_evtchn);
  22.182 -
  22.183 -    err = request_irq(blkif_ptbe_irq, blkif_ptbe_int, 
  22.184 -                      SA_SAMPLE_RANDOM, "blkif", NULL);
  22.185 -    if ( err ) {
  22.186 -	WPRINTK("blkfront request_irq failed (%d)\n", err);
  22.187 -        return;
  22.188 -    } else {
  22.189 -	/* transtion to connected in case we need to do a 
  22.190 -           a partion probe on a whole disk */
  22.191 -        blkif_pt_state = BLKIF_STATE_CONNECTED;
  22.192 -    }
  22.193 -}
  22.194 -
  22.195 -static void unexpected(blkif_fe_interface_status_t *status)
  22.196 -{
  22.197 -    WPRINTK(" TAP: Unexpected blkif status %s in state %s\n", 
  22.198 -           blkif_status_name[status->status],
  22.199 -           blkif_state_name[blkif_pt_state]);
  22.200 -}
  22.201 -
  22.202 -static void blkif_ptbe_status(
  22.203 -    blkif_fe_interface_status_t *status)
  22.204 -{
  22.205 -    if ( status->handle != 0 )
  22.206 -    {
  22.207 -        DPRINTK("Status change on unsupported blkif %d\n",
  22.208 -               status->handle);
  22.209 -        return;
  22.210 -    }
  22.211 -
  22.212 -    DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]);
  22.213 -    
  22.214 -    switch ( status->status )
  22.215 -    {
  22.216 -    case BLKIF_INTERFACE_STATUS_CLOSED:
  22.217 -        switch ( blkif_pt_state )
  22.218 -        {
  22.219 -        case BLKIF_STATE_CLOSED:
  22.220 -            unexpected(status);
  22.221 -            break;
  22.222 -        case BLKIF_STATE_DISCONNECTED:
  22.223 -        case BLKIF_STATE_CONNECTED:
  22.224 -            unexpected(status);
  22.225 -            blkif_ptbe_close();
  22.226 -            break;
  22.227 -        }
  22.228 -        break;
  22.229 -        
  22.230 -    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
  22.231 -        switch ( blkif_pt_state )
  22.232 -        {
  22.233 -        case BLKIF_STATE_CLOSED:
  22.234 -            blkif_ptbe_disconnect();
  22.235 -            break;
  22.236 -        case BLKIF_STATE_DISCONNECTED:
  22.237 -        case BLKIF_STATE_CONNECTED:
  22.238 -            printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n");
  22.239 -            unexpected(status);
  22.240 -            break;
  22.241 -        }
  22.242 -        break;
  22.243 -        
  22.244 -    case BLKIF_INTERFACE_STATUS_CONNECTED:
  22.245 -        switch ( blkif_pt_state )
  22.246 -        {
  22.247 -        case BLKIF_STATE_CLOSED:
  22.248 -            unexpected(status);
  22.249 -            blkif_ptbe_disconnect();
  22.250 -            blkif_ptbe_connect(status);
  22.251 -            break;
  22.252 -        case BLKIF_STATE_DISCONNECTED:
  22.253 -            blkif_ptbe_connect(status);
  22.254 -            break;
  22.255 -        case BLKIF_STATE_CONNECTED:
  22.256 -            unexpected(status);
  22.257 -            blkif_ptbe_connect(status);
  22.258 -            break;
  22.259 -        }
  22.260 -        break;
  22.261 -
  22.262 -   case BLKIF_INTERFACE_STATUS_CHANGED:
  22.263 -        switch ( blkif_pt_state )
  22.264 -        {
  22.265 -        case BLKIF_STATE_CLOSED:
  22.266 -        case BLKIF_STATE_DISCONNECTED:
  22.267 -            unexpected(status);
  22.268 -            break;
  22.269 -        case BLKIF_STATE_CONNECTED:
  22.270 -            /* vbd_update(); */
  22.271 -            /* tap doesn't really get state changes... */
  22.272 -            unexpected(status);
  22.273 -            break;
  22.274 -        }
  22.275 -       break;
  22.276 -       
  22.277 -    default:
  22.278 -        DPRINTK("Status change to unknown value %d\n", status->status);
  22.279 -        break;
  22.280 -    }
  22.281 -}
  22.282 -
  22.283 -/*-----[ All control messages enter here: ]-------------------------------*/
  22.284 -
  22.285 -void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
  22.286 -{
  22.287 -    switch ( msg->type )
  22.288 -    {
  22.289 -    case CMSG_BLKIF_FE:
  22.290 -
  22.291 -        switch ( msg->subtype )
  22.292 -        {
  22.293 -        case CMSG_BLKIF_FE_INTERFACE_STATUS:
  22.294 -            if ( msg->length != sizeof(blkif_fe_interface_status_t) )
  22.295 -                goto parse_error;
  22.296 -            blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]);
  22.297 -            break;        
  22.298 -
  22.299 -        default:
  22.300 -            goto parse_error;
  22.301 -        }
  22.302 -
  22.303 -    case CMSG_BLKIF_BE:
  22.304 -        
  22.305 -        switch ( msg->subtype )
  22.306 -        {
  22.307 -        case CMSG_BLKIF_BE_CREATE:
  22.308 -            if ( msg->length != sizeof(blkif_be_create_t) )
  22.309 -                goto parse_error;
  22.310 -            blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]);
  22.311 -            break; 
  22.312 -        case CMSG_BLKIF_BE_DESTROY:
  22.313 -            if ( msg->length != sizeof(blkif_be_destroy_t) )
  22.314 -                goto parse_error;
  22.315 -            blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]);
  22.316 -            break;        
  22.317 -        case CMSG_BLKIF_BE_CONNECT:
  22.318 -            if ( msg->length != sizeof(blkif_be_connect_t) )
  22.319 -                goto parse_error;
  22.320 -            blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]);
  22.321 -            break;        
  22.322 -        case CMSG_BLKIF_BE_DISCONNECT:
  22.323 -            if ( msg->length != sizeof(blkif_be_disconnect_t) )
  22.324 -                goto parse_error;
  22.325 -            blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0]);
  22.326 -            break;        
  22.327 -
  22.328 -        /* We just ignore anything to do with vbds for now. */
  22.329 -        
  22.330 -        case CMSG_BLKIF_BE_VBD_CREATE:
  22.331 -            DPRINTK("PT got VBD_CREATE\n");
  22.332 -            ((blkif_be_vbd_create_t *)&msg->msg[0])->status 
  22.333 -                = BLKIF_BE_STATUS_OKAY;
  22.334 -            break;
  22.335 -        case CMSG_BLKIF_BE_VBD_DESTROY:
  22.336 -            DPRINTK("PT got VBD_DESTROY\n");
  22.337 -            ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status
  22.338 -                = BLKIF_BE_STATUS_OKAY;
  22.339 -            break;
  22.340 -        case CMSG_BLKIF_BE_VBD_GROW:
  22.341 -            DPRINTK("PT got VBD_GROW\n");
  22.342 -            ((blkif_be_vbd_grow_t *)&msg->msg[0])->status
  22.343 -                = BLKIF_BE_STATUS_OKAY;
  22.344 -            break;
  22.345 -        case CMSG_BLKIF_BE_VBD_SHRINK:
  22.346 -            DPRINTK("PT got VBD_SHRINK\n");
  22.347 -            ((blkif_be_vbd_shrink_t *)&msg->msg[0])->status
  22.348 -                = BLKIF_BE_STATUS_OKAY;
  22.349 -            break;
  22.350 -        default:
  22.351 -            goto parse_error;
  22.352 -        }
  22.353 -    }
  22.354 -
  22.355 -    ctrl_if_send_response(msg);
  22.356 -    return;
  22.357 -
  22.358 - parse_error:
  22.359 -    msg->length = 0;
  22.360 -    ctrl_if_send_response(msg);
  22.361 -}
    23.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_datapath.c	Mon Dec 27 10:12:02 2004 +0000
    23.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.3 @@ -1,517 +0,0 @@
    23.4 -/******************************************************************************
    23.5 - * blktap_datapath.c
    23.6 - * 
    23.7 - * XenLinux virtual block-device tap.
    23.8 - * Block request routing data path.
    23.9 - * 
   23.10 - * Copyright (c) 2004, Andrew Warfield
   23.11 - *
   23.12 - */
   23.13 - 
   23.14 -#include "blktap.h"
   23.15 -
   23.16 -/*-----[ The data paths ]-------------------------------------------------*/
   23.17 - 
   23.18 -/* Connections to the frontend domains.*/
   23.19 -blkif_t   ptfe_blkif; 
   23.20 - 
   23.21 -/* Connection to a single backend domain. */
   23.22 -blkif_ring_t *blk_ptbe_ring;   /* Ring from the PT to the BE dom    */ 
   23.23 -BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */
   23.24 -BLKIF_RING_IDX ptbe_req_prod;  /* Private request producer.         */
   23.25 -
   23.26 -/* Rings up to user space. */ 
   23.27 -blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT;
   23.28 -blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT;
   23.29 -
   23.30 -/*-----[ Ring helpers ]---------------------------------------------------*/
   23.31 -
   23.32 -inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring)
   23.33 -{
   23.34 -    if (ring->type == BLKIF_REQ_RING_TYPE) {
   23.35 -        blkif_req_ring_t *r = (blkif_req_ring_t *)ring;
   23.36 -        return ( ( r->req_prod - r->rsp_cons ) == BLKIF_RING_SIZE );
   23.37 -    }
   23.38 -    
   23.39 -    /* for now assume that there is always room in the response path. */
   23.40 -    return 0;
   23.41 -}
   23.42 -
   23.43 -/*-----[ Tracking active requests ]---------------------------------------*/
   23.44 -
   23.45 -/* this must be the same as MAX_PENDING_REQS in blkback.c */
   23.46 -#define MAX_ACTIVE_REQS 64
   23.47 -
   23.48 -active_req_t  active_reqs[MAX_ACTIVE_REQS];
   23.49 -unsigned char active_req_ring[MAX_ACTIVE_REQS];
   23.50 -spinlock_t    active_req_lock = SPIN_LOCK_UNLOCKED;
   23.51 -typedef unsigned int ACTIVE_RING_IDX;
   23.52 -ACTIVE_RING_IDX active_prod, active_cons;
   23.53 -#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
   23.54 -#define ACTIVE_IDX(_ar) (_ar - active_reqs)
   23.55 -
   23.56 -inline active_req_t *get_active_req(void) 
   23.57 -{
   23.58 -    ASSERT(active_cons != active_prod);    
   23.59 -    return &active_reqs[MASK_ACTIVE_IDX(active_cons++)];
   23.60 -}
   23.61 -
   23.62 -inline void free_active_req(active_req_t *ar) 
   23.63 -{
   23.64 -    unsigned long flags;
   23.65 -        
   23.66 -    spin_lock_irqsave(&active_req_lock, flags);
   23.67 -    active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
   23.68 -    spin_unlock_irqrestore(&active_req_lock, flags);
   23.69 -}
   23.70 -
   23.71 -inline void active_reqs_init(void)
   23.72 -{
   23.73 -    ACTIVE_RING_IDX i;
   23.74 -    
   23.75 -    active_cons = 0;
   23.76 -    active_prod = MAX_ACTIVE_REQS;
   23.77 -    memset(active_reqs, 0, sizeof(active_reqs));
   23.78 -    for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
   23.79 -        active_req_ring[i] = i;
   23.80 -}
   23.81 -
   23.82 -/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
   23.83 -
   23.84 -irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
   23.85 -{
   23.86 -    /* we have pending messages from the real frontend. */
   23.87 -
   23.88 -    blkif_request_t *req_s, *req_d;
   23.89 -    BLKIF_RING_IDX fe_rp;
   23.90 -    unsigned long flags;
   23.91 -    int notify;
   23.92 -    unsigned long i;
   23.93 -    active_req_t *ar;
   23.94 -    
   23.95 -    DPRINTK("PT got FE interrupt.\n");
   23.96 -    
   23.97 -    /* lock both rings */
   23.98 -    spin_lock_irqsave(&blkif_io_lock, flags);
   23.99 -
  23.100 -    /* While there are REQUESTS on FERing: */
  23.101 -    fe_rp = ptfe_blkif.blk_ring_base->req_prod;
  23.102 -    rmb();
  23.103 -    notify = (ptfe_blkif.blk_req_cons != fe_rp);
  23.104 -
  23.105 -    for (i = ptfe_blkif.blk_req_cons; i != fe_rp; i++) {
  23.106 -
  23.107 -        /* Get the next request */
  23.108 -        req_s = &ptfe_blkif.blk_ring_base->ring[MASK_BLKIF_IDX(i)].req;
  23.109 -        
  23.110 -        /* This is a new request:  
  23.111 -         * Assign an active request record, and remap the id. 
  23.112 -         */
  23.113 -        ar = get_active_req();
  23.114 -        ar->id = req_s->id;
  23.115 -        req_s->id = ACTIVE_IDX(ar);
  23.116 -        DPRINTK("%3lu < %3lu\n", req_s->id, ar->id);
  23.117 -
  23.118 -        /* FE -> BE interposition point is here. */
  23.119 -        
  23.120 -        /* ------------------------------------------------------------- */
  23.121 -        /* BLKIF_OP_PROBE_HACK:                                          */
  23.122 -        /* Until we have grant tables, we need to allow the backent to   */
  23.123 -        /* map pages that are either from this domain, or more commonly  */
  23.124 -        /* from the real front end.  We achieve this in a terrible way,  */
  23.125 -        /* by passing the front end's domid allong with PROBE messages   */
  23.126 -        /* Once grant tables appear, this should all go away.            */
  23.127 -
  23.128 -        if (req_s->operation == BLKIF_OP_PROBE) {
  23.129 -            DPRINTK("Adding FE domid to PROBE request.\n");
  23.130 -            (domid_t)(req_s->frame_and_sects[1]) = ptfe_blkif.domid;
  23.131 -        }
  23.132 -
  23.133 -        /* ------------------------------------------------------------- */
  23.134 -
  23.135 -        /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
  23.136 -        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
  23.137 -             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
  23.138 -            
  23.139 -            /* Copy the response message to UFERing */
  23.140 -            /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
  23.141 -            /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
  23.142 -
  23.143 -            /* XXX: mapping/copying of attached pages is still not done! */
  23.144 -
  23.145 -            DPRINTK("req->UFERing\n"); 
  23.146 -            blktap_write_fe_ring(req_s);
  23.147 -
  23.148 -
  23.149 -        }
  23.150 -
  23.151 -        /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
  23.152 -        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
  23.153 -               (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
  23.154 -            
  23.155 -            /* be included to prevent noise from the fe when its off */
  23.156 -            /* copy the request message to the BERing */
  23.157 -
  23.158 -            DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", 
  23.159 -                    (unsigned)MASK_BLKIF_IDX(i), 
  23.160 -                    (unsigned)MASK_BLKIF_IDX(ptbe_req_prod));
  23.161 -
  23.162 -            req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req;
  23.163 -            
  23.164 -            memcpy(req_d, req_s, sizeof(blkif_request_t));
  23.165 -
  23.166 -            ptbe_req_prod++;
  23.167 -        }
  23.168 -    }
  23.169 -
  23.170 -    ptfe_blkif.blk_req_cons = i;
  23.171 -
  23.172 -    /* If we have forwarded any responses, notify the appropriate ends. */
  23.173 -    if (notify) {
  23.174 -
  23.175 -        /* we have sent stuff to the be, notify it. */
  23.176 -        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
  23.177 -               (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
  23.178 -            wmb();
  23.179 -            blk_ptbe_ring->req_prod = ptbe_req_prod;
  23.180 -
  23.181 -            notify_via_evtchn(blkif_ptbe_evtchn);
  23.182 -            DPRINTK(" -- and notified.\n");
  23.183 -        }
  23.184 -
  23.185 -        /* we sent stuff to the app, notify it. */
  23.186 -        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
  23.187 -             (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
  23.188 -
  23.189 -            blktap_kick_user();
  23.190 -        }
  23.191 -    }
  23.192 -
  23.193 -    /* unlock rings */
  23.194 -    spin_unlock_irqrestore(&blkif_io_lock, flags);
  23.195 -
  23.196 -    return IRQ_HANDLED;
  23.197 -}
  23.198 -
  23.199 -inline int write_req_to_be_ring(blkif_request_t *req)
  23.200 -{
  23.201 -    blkif_request_t *req_d;
  23.202 -
  23.203 -    req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req;
  23.204 -    memcpy(req_d, req, sizeof(blkif_request_t));
  23.205 -    ptbe_req_prod++;
  23.206 -
  23.207 -    return 0;
  23.208 -}
  23.209 -
  23.210 -inline void kick_be_domain(void) {
  23.211 -    wmb();
  23.212 -    blk_ptbe_ring->req_prod = ptbe_req_prod;
  23.213 -    notify_via_evtchn(blkif_ptbe_evtchn);
  23.214 -}
  23.215 -
  23.216 -/*-----[ Data to/from Backend (server) VM ]------------------------------*/
  23.217 -
  23.218 -
  23.219 -irqreturn_t blkif_ptbe_int(int irq, void *dev_id, 
  23.220 -                                  struct pt_regs *ptregs)
  23.221 -{
  23.222 -    blkif_response_t  *resp_s, *resp_d;
  23.223 -    BLKIF_RING_IDX be_rp;
  23.224 -    unsigned long flags;
  23.225 -    int notify;
  23.226 -    unsigned long i;
  23.227 -    active_req_t *ar;
  23.228 -
  23.229 -    DPRINTK("PT got BE interrupt.\n");
  23.230 -
  23.231 -    /* lock both rings */
  23.232 -    spin_lock_irqsave(&blkif_io_lock, flags);
  23.233 -    
  23.234 -    /* While there are RESPONSES on BERing: */
  23.235 -    be_rp = blk_ptbe_ring->resp_prod;
  23.236 -    rmb();
  23.237 -    notify = (ptbe_resp_cons != be_rp);
  23.238 -    
  23.239 -    for ( i = ptbe_resp_cons; i != be_rp; i++ )
  23.240 -    {
  23.241 -        /* BE -> FE interposition point is here. */
  23.242 -        
  23.243 -        /* Get the next response */
  23.244 -        resp_s = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(i)].resp;
  23.245 -    
  23.246 -       
  23.247 -        /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
  23.248 -        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
  23.249 -             (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
  23.250 -
  23.251 -            /* Copy the response message to UBERing */
  23.252 -            /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
  23.253 -            /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
  23.254 -
  23.255 -            /* XXX: copy/map the attached page! */
  23.256 -
  23.257 -            DPRINTK("rsp->UBERing\n"); 
  23.258 -            blktap_write_be_ring(resp_s);
  23.259 -
  23.260 -        }
  23.261 -       
  23.262 -        /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
  23.263 -        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
  23.264 -               (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
  23.265 -            
  23.266 -            /* (fe included to prevent random interference from the BE) */
  23.267 -            /* Copy the response message to FERing */
  23.268 -         
  23.269 -            DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", 
  23.270 -                    (unsigned) MASK_BLKIF_IDX(i), 
  23.271 -                    (unsigned) MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod));
  23.272 -
  23.273 -            /* remap id, and free the active req. blkif lookup goes here too.*/
  23.274 -            ar = &active_reqs[resp_s->id];
  23.275 -            DPRINTK("%3lu > %3lu\n", resp_s->id, ar->id);
  23.276 -            resp_s->id = ar->id;
  23.277 -            free_active_req(ar);
  23.278 -           
  23.279 -            resp_d = &ptfe_blkif.blk_ring_base->ring[
  23.280 -                MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp;
  23.281 -
  23.282 -            memcpy(resp_d, resp_s, sizeof(blkif_response_t));
  23.283 -            
  23.284 -            ptfe_blkif.blk_resp_prod++;
  23.285 -
  23.286 -        }
  23.287 -    }
  23.288 -
  23.289 -    ptbe_resp_cons = i;
  23.290 -    
  23.291 -    /* If we have forwarded any responses, notify the apropriate domains. */
  23.292 -    if (notify) {
  23.293 -
  23.294 -        /* we have sent stuff to the fe.  notify it. */
  23.295 -        if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
  23.296 -               (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
  23.297 -            wmb();
  23.298 -            ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod;
  23.299 -        
  23.300 -            notify_via_evtchn(ptfe_blkif.evtchn);
  23.301 -            DPRINTK(" -- and notified.\n");
  23.302 -        }
  23.303 -
  23.304 -        /* we sent stuff to the app, notify it. */
  23.305 -        if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
  23.306 -             (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
  23.307 -
  23.308 -            blktap_kick_user();
  23.309 -        }
  23.310 -    }
  23.311 -
  23.312 -    spin_unlock_irqrestore(&blkif_io_lock, flags);
  23.313 -    return IRQ_HANDLED;
  23.314 -}
  23.315 -
  23.316 -inline int write_resp_to_fe_ring(blkif_response_t *rsp)
  23.317 -{
  23.318 -    blkif_response_t *resp_d;
  23.319 -    active_req_t *ar;
  23.320 -    
  23.321 -    /* remap id, and free the active req. blkif lookup goes here too.*/
  23.322 -    ar = &active_reqs[rsp->id];
  23.323 -    DPRINTK("%3lu > %3lu\n", rsp->id, ar->id);
  23.324 -    rsp->id = ar->id;
  23.325 -    free_active_req(ar);
  23.326 -            
  23.327 -    resp_d = &ptfe_blkif.blk_ring_base->ring[
  23.328 -        MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp;
  23.329 -
  23.330 -    memcpy(resp_d, rsp, sizeof(blkif_response_t));
  23.331 -    ptfe_blkif.blk_resp_prod++;
  23.332 -
  23.333 -    return 0;
  23.334 -}
  23.335 -
  23.336 -inline void kick_fe_domain(void) {
  23.337 -    wmb();
  23.338 -    ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod;
  23.339 -    notify_via_evtchn(ptfe_blkif.evtchn);
  23.340 -    
  23.341 -}
  23.342 -
  23.343 -static inline void flush_requests(void)
  23.344 -{
  23.345 -    wmb(); /* Ensure that the frontend can see the requests. */
  23.346 -    blk_ptbe_ring->req_prod = ptbe_req_prod;
  23.347 -    notify_via_evtchn(blkif_ptbe_evtchn);
  23.348 -}
  23.349 -
  23.350 -/*-----[ Data to/from user space ]----------------------------------------*/
  23.351 -
  23.352 -
  23.353 -int blktap_write_fe_ring(blkif_request_t *req)
  23.354 -{
  23.355 -    blkif_request_t *target;
  23.356 -    int error, i;
  23.357 -
  23.358 -    /*
  23.359 -     * This is called to pass a request from the real frontend domain's
  23.360 -     * blkif ring to the character device.
  23.361 -     */
  23.362 -
  23.363 -    if ( ! blktap_ring_ok ) {
  23.364 -        DPRINTK("blktap: fe_ring not ready for a request!\n");
  23.365 -        return 0;
  23.366 -    }
  23.367 -
  23.368 -    if ( BLKTAP_RING_FULL(RING(&fe_ring)) ) {
  23.369 -        DPRINTK("blktap: fe_ring is full, can't add.\n");
  23.370 -        return 0;
  23.371 -    }
  23.372 -
  23.373 -    target = &fe_ring.ring->ring[MASK_BLKIF_IDX(fe_ring.req_prod)].req;
  23.374 -    memcpy(target, req, sizeof(*req));
  23.375 -
  23.376 -/* maybe move this stuff out into a seperate func ------------------- */
  23.377 -
  23.378 -    /*
  23.379 -     * For now, map attached page into a fixed position into the vma.
  23.380 -     * XXX: make this map to a free page.
  23.381 -     */
  23.382 -
  23.383 -    /* Attempt to map the foreign pages directly in to the application */
  23.384 -    for (i=0; i<target->nr_segments; i++) {
  23.385 -
  23.386 -        /* get an unused virtual address from the char device */
  23.387 -        /* store the old page address */
  23.388 -        /* replace the address with the virtual address */
  23.389 -
  23.390 -        /* blktap_vma->vm_start+((2+i)*PAGE_SIZE) */
  23.391 -
  23.392 -        error = direct_remap_area_pages(blktap_vma->vm_mm, 
  23.393 -                                        MMAP_VADDR(req->id, i), 
  23.394 -                                        target->frame_and_sects[0] & PAGE_MASK,
  23.395 -                                        PAGE_SIZE,
  23.396 -                                        blktap_vma->vm_page_prot,
  23.397 -                                        ptfe_blkif.domid);
  23.398 -        if ( error != 0 ) {
  23.399 -            printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
  23.400 -            return 0;
  23.401 -        }
  23.402 -    }
  23.403 -    /* fix the address of the attached page in the message. */
  23.404 -    /* TODO:      preserve the segment number stuff here... */
  23.405 -    /* target->frame_and_sects[0] = blktap_vma->vm_start + PAGE_SIZE;*/
  23.406 -/* ------------------------------------------------------------------ */
  23.407 -
  23.408 -    
  23.409 -    fe_ring.req_prod++;
  23.410 -
  23.411 -    return 0;
  23.412 -}
  23.413 -
  23.414 -int blktap_write_be_ring(blkif_response_t *rsp)
  23.415 -{
  23.416 -    blkif_response_t *target;
  23.417 -
  23.418 -    /*
  23.419 -     * This is called to pass a request from the real backend domain's
  23.420 -     * blkif ring to the character device.
  23.421 -     */
  23.422 -
  23.423 -    if ( ! blktap_ring_ok ) {
  23.424 -        DPRINTK("blktap: be_ring not ready for a request!\n");
  23.425 -        return 0;
  23.426 -    }
  23.427 -
  23.428 -    if ( BLKTAP_RING_FULL(RING(&be_ring)) ) {
  23.429 -        DPRINTK("blktap: be_ring is full, can't add.\n");
  23.430 -        return 0;
  23.431 -    }
  23.432 -
  23.433 -    target = &be_ring.ring->ring[MASK_BLKIF_IDX(be_ring.rsp_prod)].resp;
  23.434 -    memcpy(target, rsp, sizeof(*rsp));
  23.435 -
  23.436 -
  23.437 -    /* XXX: map attached pages and fix-up addresses in the copied address. */
  23.438 -
  23.439 -    be_ring.rsp_prod++;
  23.440 -
  23.441 -    return 0;
  23.442 -}
  23.443 -
  23.444 -int blktap_read_fe_ring(void)
  23.445 -{
  23.446 -    /* This is called to read responses from the UFE ring. */
  23.447 -
  23.448 -    BLKIF_RING_IDX fe_rp;
  23.449 -    unsigned long i;
  23.450 -    int notify;
  23.451 -
  23.452 -    DPRINTK("blktap_read_fe_ring()\n");
  23.453 -
  23.454 -    fe_rp = fe_ring.ring->resp_prod;
  23.455 -    rmb();
  23.456 -    notify = (fe_rp != fe_ring.rsp_cons);
  23.457 -
  23.458 -    /* if we are forwarding from UFERring to FERing */
  23.459 -    if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
  23.460 -
  23.461 -        /* for each outstanding message on the UFEring  */
  23.462 -        for ( i = fe_ring.rsp_cons; i != fe_rp; i++ ) {
  23.463 -
  23.464 -            /* XXX: remap pages on that message as necessary */
  23.465 -            /* copy the message to the UBEring */
  23.466 -
  23.467 -            DPRINTK("resp->fe_ring\n");
  23.468 -            write_resp_to_fe_ring(&fe_ring.ring->ring[MASK_BLKIF_IDX(i)].resp);
  23.469 -        }
  23.470 -    
  23.471 -        fe_ring.rsp_cons = fe_rp;
  23.472 -
  23.473 -        /* notify the fe if necessary */
  23.474 -        if ( notify ) {
  23.475 -            DPRINTK("kick_fe_domain()\n");
  23.476 -            kick_fe_domain();
  23.477 -        }
  23.478 -    }
  23.479 -
  23.480 -    return 0;
  23.481 -}
  23.482 -
  23.483 -int blktap_read_be_ring(void)
  23.484 -{
  23.485 -    /* This is called to read responses from the UBE ring. */
  23.486 -
  23.487 -    BLKIF_RING_IDX be_rp;
  23.488 -    unsigned long i;
  23.489 -    int notify;
  23.490 -
  23.491 -    DPRINTK("blktap_read_be_ring()\n");
  23.492 -
  23.493 -    be_rp = be_ring.ring->req_prod;
  23.494 -    rmb();
  23.495 -    notify = (be_rp != be_ring.req_cons);
  23.496 -
  23.497 -    /* if we are forwarding from UFERring to FERing */
  23.498 -    if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
  23.499 -
  23.500 -        /* for each outstanding message on the UFEring  */
  23.501 -        for ( i = be_ring.req_cons; i != be_rp; i++ ) {
  23.502 -
  23.503 -            /* XXX: remap pages on that message as necessary */
  23.504 -            /* copy the message to the UBEring */
  23.505 -
  23.506 -            DPRINTK("req->be_ring\n");
  23.507 -            write_req_to_be_ring(&be_ring.ring->ring[MASK_BLKIF_IDX(i)].req);
  23.508 -        }
  23.509 -    
  23.510 -        be_ring.req_cons = be_rp;
  23.511 -
  23.512 -        /* notify the fe if necessary */
  23.513 -        if ( notify ) {
  23.514 -            DPRINTK("kick_be_domain()\n");
  23.515 -            kick_be_domain();
  23.516 -        }
  23.517 -    }
  23.518 -
  23.519 -    return 0;
  23.520 -}
    24.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_userdev.c	Mon Dec 27 10:12:02 2004 +0000
    24.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.3 @@ -1,243 +0,0 @@
    24.4 -/******************************************************************************
    24.5 - * blktap_userdev.c
    24.6 - * 
    24.7 - * XenLinux virtual block-device tap.
    24.8 - * Control interface between the driver and a character device.
    24.9 - * 
   24.10 - * Copyright (c) 2004, Andrew Warfield
   24.11 - *
   24.12 - */
   24.13 -
   24.14 -#include <linux/config.h>
   24.15 -#include <linux/module.h>
   24.16 -#include <linux/kernel.h>
   24.17 -#include <linux/fs.h>
   24.18 -#include <linux/mm.h>
   24.19 -#include <linux/miscdevice.h>
   24.20 -#include <linux/errno.h>
   24.21 -#include <linux/major.h>
   24.22 -#include <linux/gfp.h>
   24.23 -#include <linux/poll.h>
   24.24 -#include <asm/pgalloc.h>
   24.25 -
   24.26 -#include "blktap.h"
   24.27 -
   24.28 -
   24.29 -unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH;
   24.30 -
   24.31 -/* Only one process may open /dev/xen/blktap at any time. */
   24.32 -static unsigned long blktap_dev_inuse;
   24.33 -unsigned long blktap_ring_ok; /* make this ring->state */
   24.34 -
   24.35 -/* for poll: */
   24.36 -static wait_queue_head_t blktap_wait;
   24.37 -
   24.38 -/* Where things are inside the device mapping. */
   24.39 -struct vm_area_struct *blktap_vma;
   24.40 -unsigned long mmap_vstart;
   24.41 -unsigned long rings_vstart;
   24.42 -
   24.43 -/* -------[ blktap vm ops ]------------------------------------------- */
   24.44 -
   24.45 -static struct page *blktap_nopage(struct vm_area_struct *vma,
   24.46 -                                             unsigned long address,
   24.47 -                                             int *type)
   24.48 -{
   24.49 -    /*
   24.50 -     * if the page has not been mapped in by the driver then generate
   24.51 -     * a SIGBUS to the domain.
   24.52 -     */
   24.53 -
   24.54 -    force_sig(SIGBUS, current);
   24.55 -
   24.56 -    return 0;
   24.57 -}
   24.58 -
   24.59 -struct vm_operations_struct blktap_vm_ops = {
   24.60 -    nopage:   blktap_nopage,
   24.61 -};
   24.62 -
   24.63 -/* -------[ blktap file ops ]----------------------------------------- */
   24.64 -
   24.65 -static int blktap_open(struct inode *inode, struct file *filp)
   24.66 -{
   24.67 -    if ( test_and_set_bit(0, &blktap_dev_inuse) )
   24.68 -        return -EBUSY;
   24.69 -
   24.70 -    printk(KERN_ALERT "blktap open.\n");
   24.71 -
   24.72 -    /* Allocate the fe ring. */
   24.73 -    fe_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL);
   24.74 -    if (fe_ring.ring == NULL)
   24.75 -        goto fail_nomem;
   24.76 -
   24.77 -    SetPageReserved(virt_to_page(fe_ring.ring));
   24.78 -    
   24.79 -    fe_ring.ring->req_prod = fe_ring.ring->resp_prod
   24.80 -                           = fe_ring.req_prod
   24.81 -                           = fe_ring.rsp_cons
   24.82 -                           = 0;
   24.83 -
   24.84 -    /* Allocate the be ring. */
   24.85 -    be_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL);
   24.86 -    if (be_ring.ring == NULL)
   24.87 -        goto fail_free_fe;
   24.88 -
   24.89 -    SetPageReserved(virt_to_page(be_ring.ring));
   24.90 -    
   24.91 -    be_ring.ring->req_prod = be_ring.ring->resp_prod
   24.92 -                           = be_ring.rsp_prod
   24.93 -                           = be_ring.req_cons
   24.94 -                           = 0;
   24.95 -
   24.96 -    DPRINTK(KERN_ALERT "blktap open.\n");
   24.97 -
   24.98 -    return 0;
   24.99 -
  24.100 - fail_free_fe:
  24.101 -    free_page( (unsigned long) fe_ring.ring);
  24.102 -
  24.103 - fail_nomem:
  24.104 -    return -ENOMEM;
  24.105 -}
  24.106 -
  24.107 -static int blktap_release(struct inode *inode, struct file *filp)
  24.108 -{
  24.109 -    blktap_dev_inuse = 0;
  24.110 -    blktap_ring_ok = 0;
  24.111 -
  24.112 -    printk(KERN_ALERT "blktap closed.\n");
  24.113 -
  24.114 -    /* Free the ring page. */
  24.115 -    ClearPageReserved(virt_to_page(fe_ring.ring));
  24.116 -    free_page((unsigned long) fe_ring.ring);
  24.117 -
  24.118 -    ClearPageReserved(virt_to_page(be_ring.ring));
  24.119 -    free_page((unsigned long) be_ring.ring);
  24.120 -    
  24.121 -    return 0;
  24.122 -}
  24.123 -
  24.124 -static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
  24.125 -{
  24.126 -    int size;
  24.127 -
  24.128 -    printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
  24.129 -           vma->vm_start, vma->vm_end);
  24.130 -
  24.131 -    vma->vm_ops = &blktap_vm_ops;
  24.132 -
  24.133 -    size = vma->vm_end - vma->vm_start;
  24.134 -    if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
  24.135 -        printk(KERN_INFO 
  24.136 -               "blktap: you _must_ map exactly %d pages!\n",
  24.137 -               MMAP_PAGES + RING_PAGES);
  24.138 -        return -EAGAIN;
  24.139 -    }
  24.140 -
  24.141 -    size >>= PAGE_SHIFT;
  24.142 -    printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
  24.143 -    
  24.144 -    rings_vstart = vma->vm_start;
  24.145 -    mmap_vstart  = rings_vstart + (RING_PAGES << PAGE_SHIFT);
  24.146 -    
  24.147 -    /* Map the ring pages to the start of the region and reserve it. */
  24.148 -
  24.149 -    /* not sure if I really need to do this... */
  24.150 -    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  24.151 -
  24.152 -    DPRINTK("Mapping be_ring page %lx.\n", __pa(be_ring.ring));
  24.153 -    if (remap_page_range(vma, vma->vm_start, __pa(be_ring.ring), PAGE_SIZE, 
  24.154 -                         vma->vm_page_prot)) {
  24.155 -        printk(KERN_ERR "be_ring: remap_page_range failure!\n");
  24.156 -    }
  24.157 -
  24.158 -    DPRINTK("Mapping fe_ring page %lx.\n", __pa(fe_ring.ring));
  24.159 -    if (remap_page_range(vma, vma->vm_start + PAGE_SIZE, __pa(fe_ring.ring), 
  24.160 -                         PAGE_SIZE, vma->vm_page_prot)) {
  24.161 -        printk(KERN_ERR "fe_ring: remap_page_range failure!\n");
  24.162 -    }
  24.163 -
  24.164 -    blktap_vma = vma;
  24.165 -    blktap_ring_ok = 1;
  24.166 -
  24.167 -    return 0;
  24.168 -}
  24.169 -
  24.170 -static int blktap_ioctl(struct inode *inode, struct file *filp,
  24.171 -                        unsigned int cmd, unsigned long arg)
  24.172 -{
  24.173 -    switch(cmd) {
  24.174 -    case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
  24.175 -        return blktap_read_fe_ring();
  24.176 -
  24.177 -    case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */
  24.178 -        return blktap_read_be_ring();
  24.179 -
  24.180 -    case BLKTAP_IOCTL_SETMODE:
  24.181 -        if (BLKTAP_MODE_VALID(arg)) {
  24.182 -            blktap_mode = arg;
  24.183 -            /* XXX: may need to flush rings here. */
  24.184 -            printk(KERN_INFO "blktap: set mode to %lx\n", arg);
  24.185 -            return 0;
  24.186 -        }
  24.187 -        /* XXX: return a more meaningful error case here. */
  24.188 -    }
  24.189 -    return -ENOIOCTLCMD;
  24.190 -}
  24.191 -
  24.192 -static unsigned int blktap_poll(struct file *file, poll_table *wait)
  24.193 -{
  24.194 -        poll_wait(file, &blktap_wait, wait);
  24.195 -
  24.196 -        if ( (fe_ring.req_prod != fe_ring.ring->req_prod) ||
  24.197 -             (be_ring.rsp_prod != be_ring.ring->resp_prod) ) {
  24.198 -
  24.199 -            fe_ring.ring->req_prod = fe_ring.req_prod;
  24.200 -            be_ring.ring->resp_prod = be_ring.rsp_prod;
  24.201 -            return POLLIN | POLLRDNORM;
  24.202 -        }
  24.203 -
  24.204 -        return 0;
  24.205 -}
  24.206 -
  24.207 -void blktap_kick_user(void)
  24.208 -{
  24.209 -    /* blktap_ring->req_prod = blktap_req_prod; */
  24.210 -    wake_up_interruptible(&blktap_wait);
  24.211 -}
  24.212 -
  24.213 -static struct file_operations blktap_fops = {
  24.214 -    owner:    THIS_MODULE,
  24.215 -    poll:     blktap_poll,
  24.216 -    ioctl:    blktap_ioctl,
  24.217 -    open:     blktap_open,
  24.218 -    release:  blktap_release,
  24.219 -    mmap:     blktap_mmap,
  24.220 -};
  24.221 -
  24.222 -/* -------[ blktap module setup ]------------------------------------- */
  24.223 -
  24.224 -static struct miscdevice blktap_miscdev = {
  24.225 -    .minor        = BLKTAP_MINOR,
  24.226 -    .name         = "blktap",
  24.227 -    .fops         = &blktap_fops,
  24.228 -    .devfs_name   = "misc/blktap",
  24.229 -};
  24.230 -
  24.231 -int blktap_init(void)
  24.232 -{
  24.233 -    int err;
  24.234 -
  24.235 -    err = misc_register(&blktap_miscdev);
  24.236 -    if ( err != 0 )
  24.237 -    {
  24.238 -        printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
  24.239 -        return err;
  24.240 -    }
  24.241 -
  24.242 -    init_waitqueue_head(&blktap_wait);
  24.243 -
  24.244 -
  24.245 -    return 0;
  24.246 -}
    25.1 --- a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/hardirq.h	Mon Dec 27 10:12:02 2004 +0000
    25.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.3 @@ -1,61 +0,0 @@
    25.4 -#ifndef __ASM_HARDIRQ_H
    25.5 -#define __ASM_HARDIRQ_H
    25.6 -
    25.7 -#include <linux/config.h>
    25.8 -#include <linux/threads.h>
    25.9 -#include <linux/irq.h>
   25.10 -
   25.11 -typedef struct {
   25.12 -	unsigned int __softirq_pending;
   25.13 -	unsigned long idle_timestamp;
   25.14 -	unsigned int __nmi_count;	/* arch dependent */
   25.15 -	unsigned int apic_timer_irqs;	/* arch dependent */
   25.16 -} ____cacheline_aligned irq_cpustat_t;
   25.17 -
   25.18 -#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
   25.19 -
   25.20 -/*
   25.21 - * We put the hardirq and softirq counter into the preemption
   25.22 - * counter. The bitmask has the following meaning:
   25.23 - *
   25.24 - * - bits 0-7 are the preemption count (max preemption depth: 256)
   25.25 - * - bits 8-15 are the softirq count (max # of softirqs: 256)
   25.26 - * - bits 16-24 are the hardirq count (max # of hardirqs: 512)
   25.27 - *
   25.28 - * - ( bit 26 is the PREEMPT_ACTIVE flag. )
   25.29 - *
   25.30 - * PREEMPT_MASK: 0x000000ff
   25.31 - * SOFTIRQ_MASK: 0x0000ff00
   25.32 - * HARDIRQ_MASK: 0x01ff0000
   25.33 - */
   25.34 -
   25.35 -#define PREEMPT_BITS	8
   25.36 -#define SOFTIRQ_BITS	8
   25.37 -#define HARDIRQ_BITS	9
   25.38 -
   25.39 -#define PREEMPT_SHIFT	0
   25.40 -#define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
   25.41 -#define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
   25.42 -
   25.43 -/*
   25.44 - * The hardirq mask has to be large enough to have
   25.45 - * space for potentially all IRQ sources in the system
   25.46 - * nesting on a single CPU:
   25.47 - */
   25.48 -#if (1 << HARDIRQ_BITS) < NR_IRQS
   25.49 -# error HARDIRQ_BITS is too low!
   25.50 -#endif
   25.51 -
   25.52 -#define nmi_enter()		(irq_enter())
   25.53 -#define nmi_exit()		(preempt_count() -= HARDIRQ_OFFSET)
   25.54 -
   25.55 -#define irq_enter()		(preempt_count() += HARDIRQ_OFFSET)
   25.56 -#define irq_exit()							\
   25.57 -do {									\
   25.58 -		preempt_count() -= IRQ_EXIT_OFFSET;			\
   25.59 -		if (!in_interrupt() && softirq_pending(smp_processor_id())) \
   25.60 -			do_softirq();					\
   25.61 -		preempt_enable_no_resched();				\
   25.62 -} while (0)
   25.63 -
   25.64 -#endif /* __ASM_HARDIRQ_H */
    26.1 --- a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h	Mon Dec 27 10:12:02 2004 +0000
    26.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.3 @@ -1,59 +0,0 @@
    26.4 -/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
    26.5 - * which needs to alter them. */
    26.6 -
    26.7 -static inline void smpboot_clear_io_apic_irqs(void)
    26.8 -{
    26.9 -#if 1
   26.10 -	printk("smpboot_clear_io_apic_irqs\n");
   26.11 -#else
   26.12 -	io_apic_irqs = 0;
   26.13 -#endif
   26.14 -}
   26.15 -
   26.16 -static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
   26.17 -{
   26.18 -#if 1
   26.19 -	printk("smpboot_setup_warm_reset_vector\n");
   26.20 -#else
   26.21 -	CMOS_WRITE(0xa, 0xf);
   26.22 -	local_flush_tlb();
   26.23 -	Dprintk("1.\n");
   26.24 -	*((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
   26.25 -	Dprintk("2.\n");
   26.26 -	*((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
   26.27 -	Dprintk("3.\n");
   26.28 -#endif
   26.29 -}
   26.30 -
   26.31 -static inline void smpboot_restore_warm_reset_vector(void)
   26.32 -{
   26.33 -	/*
   26.34 -	 * Install writable page 0 entry to set BIOS data area.
   26.35 -	 */
   26.36 -	local_flush_tlb();
   26.37 -
   26.38 -	/*
   26.39 -	 * Paranoid:  Set warm reset code and vector here back
   26.40 -	 * to default values.
   26.41 -	 */
   26.42 -	CMOS_WRITE(0, 0xf);
   26.43 -
   26.44 -	*((volatile long *) phys_to_virt(0x467)) = 0;
   26.45 -}
   26.46 -
   26.47 -static inline void smpboot_setup_io_apic(void)
   26.48 -{
   26.49 -#if 1
   26.50 -	printk("smpboot_setup_io_apic\n");
   26.51 -#else
   26.52 -	/*
   26.53 -	 * Here we can be sure that there is an IO-APIC in the system. Let's
   26.54 -	 * go and set it up:
   26.55 -	 */
   26.56 -	if (!skip_ioapic_setup && nr_ioapics)
   26.57 -		setup_IO_APIC();
   26.58 -#endif
   26.59 -}
   26.60 -
   26.61 -
   26.62 -#define	smp_found_config	(HYPERVISOR_shared_info->n_vcpu > 1)
    27.1 --- a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/spinlock.h	Mon Dec 27 10:12:02 2004 +0000
    27.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.3 @@ -1,224 +0,0 @@
    27.4 -#ifndef __ASM_SPINLOCK_H
    27.5 -#define __ASM_SPINLOCK_H
    27.6 -
    27.7 -#include <asm/atomic.h>
    27.8 -#include <asm/rwlock.h>
    27.9 -#include <asm/page.h>
   27.10 -#include <linux/config.h>
   27.11 -#include <linux/compiler.h>
   27.12 -
   27.13 -asmlinkage int printk(const char * fmt, ...)
   27.14 -	__attribute__ ((format (printf, 1, 2)));
   27.15 -
   27.16 -/*
   27.17 - * Your basic SMP spinlocks, allowing only a single CPU anywhere
   27.18 - */
   27.19 -
   27.20 -typedef struct {
   27.21 -	volatile unsigned int lock;
   27.22 -#ifdef CONFIG_DEBUG_SPINLOCK
   27.23 -	unsigned magic;
   27.24 -#endif
   27.25 -} spinlock_t;
   27.26 -
   27.27 -#define SPINLOCK_MAGIC	0xdead4ead
   27.28 -
   27.29 -#ifdef CONFIG_DEBUG_SPINLOCK
   27.30 -#define SPINLOCK_MAGIC_INIT	, SPINLOCK_MAGIC
   27.31 -#else
   27.32 -#define SPINLOCK_MAGIC_INIT	/* */
   27.33 -#endif
   27.34 -
   27.35 -#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
   27.36 -
   27.37 -#define spin_lock_init(x)	do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
   27.38 -
   27.39 -/*
   27.40 - * Simple spin lock operations.  There are two variants, one clears IRQ's
   27.41 - * on the local processor, one does not.
   27.42 - *
   27.43 - * We make no fairness assumptions. They have a cost.
   27.44 - */
   27.45 -
   27.46 -#define spin_is_locked(x)	(*(volatile signed char *)(&(x)->lock) <= 0)
   27.47 -#define spin_unlock_wait(x)	do { barrier(); } while(spin_is_locked(x))
   27.48 -
   27.49 -#define spin_lock_string \
   27.50 -	"\n1:\t" \
   27.51 -	"lock ; decb %0\n\t" \
   27.52 -	"jns 3f\n" \
   27.53 -	"2:\t" \
   27.54 -	"rep;nop\n\t" \
   27.55 -	"cmpb $0,%0\n\t" \
   27.56 -	"jle 2b\n\t" \
   27.57 -	"jmp 1b\n" \
   27.58 -	"3:\n\t"
   27.59 -
   27.60 -#define spin_lock_string_flags \
   27.61 -	"\n1:\t" \
   27.62 -	"lock ; decb %0\n\t" \
   27.63 -	"jns 4f\n\t" \
   27.64 -	"2:\t" \
   27.65 -	"testl $0x200, %1\n\t" \
   27.66 -	"jz 3f\n\t" \
   27.67 -	"#sti\n\t" \
   27.68 -	"3:\t" \
   27.69 -	"rep;nop\n\t" \
   27.70 -	"cmpb $0, %0\n\t" \
   27.71 -	"jle 3b\n\t" \
   27.72 -	"#cli\n\t" \
   27.73 -	"jmp 1b\n" \
   27.74 -	"4:\n\t"
   27.75 -
   27.76 -/*
   27.77 - * This works. Despite all the confusion.
   27.78 - * (except on PPro SMP or if we are using OOSTORE)
   27.79 - * (PPro errata 66, 92)
   27.80 - */
   27.81 - 
   27.82 -#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
   27.83 -
   27.84 -#define spin_unlock_string \
   27.85 -	"movb $1,%0" \
   27.86 -		:"=m" (lock->lock) : : "memory"
   27.87 -
   27.88 -
   27.89 -static inline void _raw_spin_unlock(spinlock_t *lock)
   27.90 -{
   27.91 -#ifdef CONFIG_DEBUG_SPINLOCK
   27.92 -	BUG_ON(lock->magic != SPINLOCK_MAGIC);
   27.93 -	BUG_ON(!spin_is_locked(lock));
   27.94 -#endif
   27.95 -	__asm__ __volatile__(
   27.96 -		spin_unlock_string
   27.97 -	);
   27.98 -}
   27.99 -
  27.100 -#else
  27.101 -
  27.102 -#define spin_unlock_string \
  27.103 -	"xchgb %b0, %1" \
  27.104 -		:"=q" (oldval), "=m" (lock->lock) \
  27.105 -		:"0" (oldval) : "memory"
  27.106 -
  27.107 -static inline void _raw_spin_unlock(spinlock_t *lock)
  27.108 -{
  27.109 -	char oldval = 1;
  27.110 -#ifdef CONFIG_DEBUG_SPINLOCK
  27.111 -	BUG_ON(lock->magic != SPINLOCK_MAGIC);
  27.112 -	BUG_ON(!spin_is_locked(lock));
  27.113 -#endif
  27.114 -	__asm__ __volatile__(
  27.115 -		spin_unlock_string
  27.116 -	);
  27.117 -}
  27.118 -
  27.119 -#endif
  27.120 -
  27.121 -static inline int _raw_spin_trylock(spinlock_t *lock)
  27.122 -{
  27.123 -	char oldval;
  27.124 -	__asm__ __volatile__(
  27.125 -		"xchgb %b0,%1"
  27.126 -		:"=q" (oldval), "=m" (lock->lock)
  27.127 -		:"0" (0) : "memory");
  27.128 -	return oldval > 0;
  27.129 -}
  27.130 -
  27.131 -static inline void _raw_spin_lock(spinlock_t *lock)
  27.132 -{
  27.133 -#ifdef CONFIG_DEBUG_SPINLOCK
  27.134 -	if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
  27.135 -		printk("eip: %p\n", __builtin_return_address(0));
  27.136 -		BUG();
  27.137 -	}
  27.138 -#endif
  27.139 -	__asm__ __volatile__(
  27.140 -		spin_lock_string
  27.141 -		:"=m" (lock->lock) : : "memory");
  27.142 -}
  27.143 -
  27.144 -static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
  27.145 -{
  27.146 -#ifdef CONFIG_DEBUG_SPINLOCK
  27.147 -	if (unlikely(lock->magic != SPINLOCK_MAGIC)) {
  27.148 -		printk("eip: %p\n", __builtin_return_address(0));
  27.149 -		BUG();
  27.150 -	}
  27.151 -#endif
  27.152 -	__asm__ __volatile__(
  27.153 -		spin_lock_string_flags
  27.154 -		:"=m" (lock->lock) : "r" (flags) : "memory");
  27.155 -}
  27.156 -
  27.157 -/*
  27.158 - * Read-write spinlocks, allowing multiple readers
  27.159 - * but only one writer.
  27.160 - *
  27.161 - * NOTE! it is quite common to have readers in interrupts
  27.162 - * but no interrupt writers. For those circumstances we
  27.163 - * can "mix" irq-safe locks - any writer needs to get a
  27.164 - * irq-safe write-lock, but readers can get non-irqsafe
  27.165 - * read-locks.
  27.166 - */
  27.167 -typedef struct {
  27.168 -	volatile unsigned int lock;
  27.169 -#ifdef CONFIG_DEBUG_SPINLOCK
  27.170 -	unsigned magic;
  27.171 -#endif
  27.172 -} rwlock_t;
  27.173 -
  27.174 -#define RWLOCK_MAGIC	0xdeaf1eed
  27.175 -
  27.176 -#ifdef CONFIG_DEBUG_SPINLOCK
  27.177 -#define RWLOCK_MAGIC_INIT	, RWLOCK_MAGIC
  27.178 -#else
  27.179 -#define RWLOCK_MAGIC_INIT	/* */
  27.180 -#endif
  27.181 -
  27.182 -#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
  27.183 -
  27.184 -#define rwlock_init(x)	do { *(x) = RW_LOCK_UNLOCKED; } while(0)
  27.185 -
  27.186 -#define rwlock_is_locked(x) ((x)->lock != RW_LOCK_BIAS)
  27.187 -
  27.188 -/*
  27.189 - * On x86, we implement read-write locks as a 32-bit counter
  27.190 - * with the high bit (sign) being the "contended" bit.
  27.191 - *
  27.192 - * The inline assembly is non-obvious. Think about it.
  27.193 - *
  27.194 - * Changed to use the same technique as rw semaphores.  See
  27.195 - * semaphore.h for details.  -ben
  27.196 - */
  27.197 -/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
  27.198 -
  27.199 -static inline void _raw_read_lock(rwlock_t *rw)
  27.200 -{
  27.201 -#ifdef CONFIG_DEBUG_SPINLOCK
  27.202 -	BUG_ON(rw->magic != RWLOCK_MAGIC);
  27.203 -#endif
  27.204 -	__build_read_lock(rw, "__read_lock_failed");
  27.205 -}
  27.206 -
  27.207 -static inline void _raw_write_lock(rwlock_t *rw)
  27.208 -{
  27.209 -#ifdef CONFIG_DEBUG_SPINLOCK
  27.210 -	BUG_ON(rw->magic != RWLOCK_MAGIC);
  27.211 -#endif
  27.212 -	__build_write_lock(rw, "__write_lock_failed");
  27.213 -}
  27.214 -
  27.215 -#define _raw_read_unlock(rw)		asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
  27.216 -#define _raw_write_unlock(rw)	asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
  27.217 -
  27.218 -static inline int _raw_write_trylock(rwlock_t *lock)
  27.219 -{
  27.220 -	atomic_t *count = (atomic_t *)lock;
  27.221 -	if (atomic_sub_and_test(RW_LOCK_BIAS, count))
  27.222 -		return 1;
  27.223 -	atomic_add(RW_LOCK_BIAS, count);
  27.224 -	return 0;
  27.225 -}
  27.226 -
  27.227 -#endif /* __ASM_SPINLOCK_H */