ia64/xen-unstable

changeset 2926:e8a2c91dfc4f

bitkeeper revision 1.1159.1.399 (4191f4cbUvkMenqLkHaeRu45RztOdA)

More SMP guest support.
author cl349@freefall.cl.cam.ac.uk
date Wed Nov 10 11:00:27 2004 +0000 (2004-11-10)
parents 7ed93ab784b6
children b12c5094e28c
files linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/common.c linux-2.6.9-xen-sparse/arch/xen/i386/kernel/head.S linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c linux-2.6.9-xen-sparse/arch/xen/kernel/evtchn.c linux-2.6.9-xen-sparse/drivers/xen/blkback/blkback.c linux-2.6.9-xen-sparse/drivers/xen/blkfront/blkfront.c linux-2.6.9-xen-sparse/drivers/xen/evtchn/evtchn.c linux-2.6.9-xen-sparse/drivers/xen/netback/netback.c linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/system.h linux-2.6.9-xen-sparse/include/asm-xen/evtchn.h linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h tools/libxc/xc_linux_restore.c xen/arch/x86/domain.c xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_32/mm.c xen/common/domain.c xen/common/keyhandler.c xen/common/sched_bvt.c xen/common/schedule.c xen/include/asm-x86/processor.h xen/include/public/xen.h xen/include/xen/event.h xen/include/xen/sched.h
line diff
     1.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/common.c	Tue Nov 09 13:14:11 2004 +0000
     1.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/common.c	Wed Nov 10 11:00:27 2004 +0000
     1.3 @@ -549,17 +549,6 @@ void __init cpu_init (void)
     1.4  	}
     1.5  
     1.6  	/*
     1.7 -	 * Initialize the per-CPU GDT with the boot GDT,
     1.8 -	 * and set up the GDT descriptor:
     1.9 -	 */
    1.10 -	if (cpu) {
    1.11 -		cpu_gdt_descr[cpu].size = GDT_SIZE;
    1.12 -		cpu_gdt_descr[cpu].address = 0;	/* XXXcl alloc page */
    1.13 -		BUG();		/* XXXcl SMP */
    1.14 -		memcpy((void *)cpu_gdt_descr[cpu].address,
    1.15 -		    (void *)cpu_gdt_descr[0].address, GDT_SIZE);
    1.16 -	}
    1.17 -	/*
    1.18  	 * Set up the per-thread TLS descriptor cache:
    1.19  	 */
    1.20  	memcpy(thread->tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
     2.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/head.S	Tue Nov 09 13:14:11 2004 +0000
     2.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/head.S	Wed Nov 10 11:00:27 2004 +0000
     2.3 @@ -33,14 +33,19 @@
     2.4  ENTRY(startup_32)
     2.5  	cld
     2.6  
     2.7 -	/* Set up the stack pointer */
     2.8 -	lss stack_start,%esp
     2.9 -
    2.10  	/* Copy the necessary stuff from xen_start_info structure. */
    2.11  	mov  $xen_start_info_union,%edi
    2.12  	mov  $128,%ecx
    2.13  	rep movsl
    2.14  
    2.15 +#ifdef CONFIG_SMP
    2.16 +ENTRY(startup_32_smp)
    2.17 +	cld
    2.18 +#endif /* CONFIG_SMP */
    2.19 +
    2.20 +	/* Set up the stack pointer */
    2.21 +	lss stack_start,%esp
    2.22 +
    2.23  checkCPUtype:
    2.24  
    2.25  	/* get vendor info */
    2.26 @@ -63,11 +68,22 @@ checkCPUtype:
    2.27  	movb %cl,X86_MASK
    2.28  	movl %edx,X86_CAPABILITY
    2.29  
    2.30 +	incb ready
    2.31 +
    2.32  	xorl %eax,%eax		# Clear FS/GS and LDT
    2.33  	movl %eax,%fs
    2.34  	movl %eax,%gs
    2.35  	cld		# gcc2 wants the direction flag cleared at all times
    2.36  
    2.37 +#ifdef CONFIG_SMP
    2.38 +	movb ready, %cl	
    2.39 +	cmpb $1,%cl
    2.40 +	je 1f			# the first CPU calls start_kernel
    2.41 +				# all other CPUs call initialize_secondary
    2.42 +	call initialize_secondary
    2.43 +	jmp L6
    2.44 +1:
    2.45 +#endif /* CONFIG_SMP */
    2.46  	call start_kernel
    2.47  L6:
    2.48  	jmp L6			# main should never return here, but
    2.49 @@ -90,6 +106,8 @@ ENTRY(stack_start)
    2.50  	.long init_thread_union+THREAD_SIZE
    2.51  	.long __BOOT_DS
    2.52  
    2.53 +ready:	.byte 0
    2.54 +
    2.55  # XXXcl
    2.56  .globl idt_descr
    2.57  .globl cpu_gdt_descr
     3.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c	Tue Nov 09 13:14:11 2004 +0000
     3.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smp.c	Wed Nov 10 11:00:27 2004 +0000
     3.3 @@ -26,6 +26,8 @@
     3.4  #include <mach_apic.h>
     3.5  #endif
     3.6  
     3.7 +#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
     3.8 +
     3.9  /*
    3.10   *	Some notes on x86 processor bugs affecting SMP operation:
    3.11   *
    3.12 @@ -126,7 +128,7 @@ static inline int __prepare_ICR2 (unsign
    3.13  void __send_IPI_shortcut(unsigned int shortcut, int vector)
    3.14  {
    3.15  #if 1
    3.16 -	printk("__send_IPI_shortcut\n");
    3.17 +	xxprint("__send_IPI_shortcut\n");
    3.18  #else
    3.19  	/*
    3.20  	 * Subtle. In the case of the 'never do double writes' workaround
    3.21 @@ -165,7 +167,8 @@ void fastcall send_IPI_self(int vector)
    3.22  void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
    3.23  {
    3.24  #if 1
    3.25 -	printk("send_IPI_mask_bitmask\n");
    3.26 +	xxprint("send_IPI_mask_bitmask\n");
    3.27 +	dump_stack();
    3.28  #else
    3.29  	unsigned long mask = cpus_addr(cpumask)[0];
    3.30  	unsigned long cfg;
    3.31 @@ -201,7 +204,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu
    3.32  inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
    3.33  {
    3.34  #if 1
    3.35 -	printk("send_IPI_mask_sequence\n");
    3.36 +	xxprint("send_IPI_mask_sequence\n");
    3.37  #else
    3.38  	unsigned long cfg, flags;
    3.39  	unsigned int query_cpu;
    3.40 @@ -349,7 +352,7 @@ asmlinkage void smp_invalidate_interrupt
    3.41  			leave_mm(cpu);
    3.42  	}
    3.43  #if 1
    3.44 -	printk("smp_invalidate_interrupt ack_APIC_irq\n");
    3.45 +	xxprint("smp_invalidate_interrupt ack_APIC_irq\n");
    3.46  #else
    3.47  	ack_APIC_irq();
    3.48  #endif
    3.49 @@ -405,6 +408,7 @@ static void flush_tlb_others(cpumask_t c
    3.50  	 */
    3.51  	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
    3.52  
    3.53 +	xxprint("flush_tlb_others lockup");
    3.54  	while (!cpus_empty(flush_cpumask))
    3.55  		/* nothing. lockup detection does not belong here */
    3.56  		mb();
    3.57 @@ -574,7 +578,7 @@ static void stop_this_cpu (void * dummy)
    3.58  	cpu_clear(smp_processor_id(), cpu_online_map);
    3.59  	local_irq_disable();
    3.60  #if 1
    3.61 -	printk("stop_this_cpu disable_local_APIC\n");
    3.62 +	xxprint("stop_this_cpu disable_local_APIC\n");
    3.63  #else
    3.64  	disable_local_APIC();
    3.65  #endif
    3.66 @@ -593,7 +597,7 @@ void smp_send_stop(void)
    3.67  
    3.68  	local_irq_disable();
    3.69  #if 1
    3.70 -	printk("smp_send_stop disable_local_APIC\n");
    3.71 +	xxprint("smp_send_stop disable_local_APIC\n");
    3.72  #else
    3.73  	disable_local_APIC();
    3.74  #endif
    3.75 @@ -608,7 +612,7 @@ void smp_send_stop(void)
    3.76  asmlinkage void smp_reschedule_interrupt(void)
    3.77  {
    3.78  #if 1
    3.79 -	printk("smp_reschedule_interrupt: ack_APIC_irq\n");
    3.80 +	xxprint("smp_reschedule_interrupt: ack_APIC_irq\n");
    3.81  #else
    3.82  	ack_APIC_irq();
    3.83  #endif
    3.84 @@ -621,7 +625,7 @@ asmlinkage void smp_call_function_interr
    3.85  	int wait = call_data->wait;
    3.86  
    3.87  #if 1
    3.88 -	printk("smp_call_function_interrupt: ack_APIC_irq\n");
    3.89 +	xxprint("smp_call_function_interrupt: ack_APIC_irq\n");
    3.90  #else
    3.91  	ack_APIC_irq();
    3.92  #endif
     4.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c	Tue Nov 09 13:14:11 2004 +0000
     4.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/smpboot.c	Wed Nov 10 11:00:27 2004 +0000
     4.3 @@ -51,16 +51,17 @@
     4.4  #include <asm/desc.h>
     4.5  #include <asm/arch_hooks.h>
     4.6  
     4.7 -#if 0
     4.8 +#if 1
     4.9 +#define Dprintk(args...)
    4.10 +#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
    4.11 +#else
    4.12  #include <mach_apic.h>
    4.13  #endif
    4.14  #include <mach_wakecpu.h>
    4.15  #include <smpboot_hooks.h>
    4.16  
    4.17 -#if 0
    4.18  /* Set if we find a B stepping CPU */
    4.19  static int __initdata smp_b_stepping;
    4.20 -#endif
    4.21  
    4.22  /* Number of siblings per CPU package */
    4.23  int smp_num_siblings = 1;
    4.24 @@ -113,7 +114,14 @@ static unsigned long __init setup_trampo
    4.25  void __init smp_alloc_memory(void)
    4.26  {
    4.27  #if 1
    4.28 -	printk("smp_alloc_memory\n");
    4.29 +	int cpu;
    4.30 +
    4.31 +	xxprint("smp_alloc_memory\n");
    4.32 +	for (cpu = 1; cpu < NR_CPUS; cpu++) {
    4.33 +		cpu_gdt_descr[cpu].address = (unsigned long)
    4.34 +			alloc_bootmem_low_pages(PAGE_SIZE);
    4.35 +		/* XXX free unused pages later */
    4.36 +	}
    4.37  #else
    4.38  	trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
    4.39  	/*
    4.40 @@ -134,7 +142,6 @@ void __init smp_alloc_memory(void)
    4.41   * a given CPU
    4.42   */
    4.43  
    4.44 -#if 0
    4.45  static void __init smp_store_cpu_info(int id)
    4.46  {
    4.47  	struct cpuinfo_x86 *c = cpu_data + id;
    4.48 @@ -187,7 +194,6 @@ static void __init smp_store_cpu_info(in
    4.49  valid_k7:
    4.50  	;
    4.51  }
    4.52 -#endif
    4.53  
    4.54  #if 0
    4.55  /*
    4.56 @@ -328,18 +334,16 @@ static void __init synchronize_tsc_ap (v
    4.57  #undef NR_LOOPS
    4.58  
    4.59  extern void calibrate_delay(void);
    4.60 +#endif
    4.61  
    4.62  static atomic_t init_deasserted;
    4.63 -#endif
    4.64  
    4.65  void __init smp_callin(void)
    4.66  {
    4.67 -#if 1
    4.68 -	printk("smp_callin\n");
    4.69 -#else
    4.70  	int cpuid, phys_id;
    4.71  	unsigned long timeout;
    4.72  
    4.73 +#if 0
    4.74  	/*
    4.75  	 * If waken up by an INIT in an 82489DX configuration
    4.76  	 * we may get here before an INIT-deassert IPI reaches
    4.77 @@ -347,11 +351,12 @@ void __init smp_callin(void)
    4.78  	 * lock up on an APIC access.
    4.79  	 */
    4.80  	wait_for_init_deassert(&init_deasserted);
    4.81 +#endif
    4.82  
    4.83  	/*
    4.84  	 * (This works even if the APIC is not enabled.)
    4.85  	 */
    4.86 -	phys_id = GET_APIC_ID(apic_read(APIC_ID));
    4.87 +	phys_id = smp_processor_id();
    4.88  	cpuid = smp_processor_id();
    4.89  	if (cpu_isset(cpuid, cpu_callin_map)) {
    4.90  		printk("huh, phys CPU#%d, CPU#%d already present??\n",
    4.91 @@ -387,6 +392,7 @@ void __init smp_callin(void)
    4.92  		BUG();
    4.93  	}
    4.94  
    4.95 +#if 0
    4.96  	/*
    4.97  	 * the boot CPU has finished the init stage and is spinning
    4.98  	 * on callin_map until we finish. We are free to set up this
    4.99 @@ -405,6 +411,7 @@ void __init smp_callin(void)
   4.100  	 * Get our bogomips.
   4.101  	 */
   4.102  	calibrate_delay();
   4.103 +#endif
   4.104  	Dprintk("Stack at about %p\n",&cpuid);
   4.105  
   4.106  	/*
   4.107 @@ -412,13 +419,16 @@ void __init smp_callin(void)
   4.108  	 */
   4.109   	smp_store_cpu_info(cpuid);
   4.110  
   4.111 +#if 0
   4.112  	disable_APIC_timer();
   4.113  	local_irq_disable();
   4.114 +#endif
   4.115  	/*
   4.116  	 * Allow the master to continue.
   4.117  	 */
   4.118  	cpu_set(cpuid, cpu_callin_map);
   4.119  
   4.120 +#if 0
   4.121  	/*
   4.122  	 *      Synchronize the TSC with the BP
   4.123  	 */
   4.124 @@ -436,8 +446,48 @@ extern int cpu_idle(void);
   4.125   */
   4.126  int __init start_secondary(void *unused)
   4.127  {
   4.128 +	/*
   4.129 +	 * Dont put anything before smp_callin(), SMP
   4.130 +	 * booting is too fragile that we want to limit the
   4.131 +	 * things done here to the most necessary things.
   4.132 +	 */
   4.133 +	cpu_init();
   4.134 +	smp_callin();
   4.135 +	while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
   4.136 +		rep_nop();
   4.137  #if 1
   4.138 -	printk("start_secondary\n");
   4.139 +	if (0) {
   4.140 +		char *msg = "start_secondary\n";
   4.141 +		char *msg2 = "delay2\n";
   4.142 +		int timeout;
   4.143 +		(void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg);
   4.144 +		for (timeout = 0; timeout < 50000; timeout++) {
   4.145 +			udelay(100);
   4.146 +			if (timeout == 20000) {
   4.147 +				(void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
   4.148 +				timeout = 0;
   4.149 +			}
   4.150 +		}
   4.151 +	}
   4.152 +	// enable_APIC_timer();
   4.153 +	/*
   4.154 +	 * low-memory mappings have been cleared, flush them from
   4.155 +	 * the local TLBs too.
   4.156 +	 */
   4.157 +	// local_flush_tlb();
   4.158 +	cpu_set(smp_processor_id(), cpu_online_map);
   4.159 +	wmb();
   4.160 +	if (10) {
   4.161 +		char *msg2 = "delay2\n";
   4.162 +		int timeout;
   4.163 +		for (timeout = 0; timeout < 50000; timeout++) {
   4.164 +			udelay(1000);
   4.165 +			if (timeout == 2000) {
   4.166 +				(void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
   4.167 +				timeout = 0;
   4.168 +			}
   4.169 +		}
   4.170 +	}
   4.171  	return cpu_idle();
   4.172  #else
   4.173  	/*
   4.174 @@ -531,7 +581,7 @@ u8 cpu_2_logical_apicid[NR_CPUS] = { [0 
   4.175  void map_cpu_to_logical_apicid(void)
   4.176  {
   4.177  #if 1
   4.178 -	printk("map_cpu_to_logical_apicid\n");
   4.179 +	xxprint("map_cpu_to_logical_apicid\n");
   4.180  #else
   4.181  	int cpu = smp_processor_id();
   4.182  	int apicid = logical_smp_processor_id();
   4.183 @@ -642,7 +692,7 @@ static int __init
   4.184  wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
   4.185  {
   4.186  #if 1
   4.187 -	printk("wakeup_secondary_cpu\n");
   4.188 +	xxprint("wakeup_secondary_cpu\n");
   4.189  	return 0;
   4.190  #else
   4.191  	unsigned long send_status = 0, accept_status = 0;
   4.192 @@ -780,7 +830,6 @@ wakeup_secondary_cpu(int phys_apicid, un
   4.193  
   4.194  extern cpumask_t cpu_initialized;
   4.195  
   4.196 -#if 0
   4.197  static int __init do_boot_cpu(int apicid)
   4.198  /*
   4.199   * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
   4.200 @@ -792,7 +841,14 @@ static int __init do_boot_cpu(int apicid
   4.201  	unsigned long boot_error;
   4.202  	int timeout, cpu;
   4.203  	unsigned long start_eip;
   4.204 +#if 0
   4.205  	unsigned short nmi_high = 0, nmi_low = 0;
   4.206 +#endif
   4.207 +	full_execution_context_t ctxt;
   4.208 +	extern void startup_32_smp(void);
   4.209 +	extern void hypervisor_callback(void);
   4.210 +	extern void failsafe_callback(void);
   4.211 +	int i;
   4.212  
   4.213  	cpu = ++cpucount;
   4.214  	/*
   4.215 @@ -804,7 +860,7 @@ static int __init do_boot_cpu(int apicid
   4.216  		panic("failed fork for CPU %d", cpu);
   4.217  	idle->thread.eip = (unsigned long) start_secondary;
   4.218  	/* start_eip had better be page-aligned! */
   4.219 -	start_eip = setup_trampoline();
   4.220 +	start_eip = (unsigned long)startup_32_smp;
   4.221  
   4.222  	/* So we see what's up   */
   4.223  	printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
   4.224 @@ -820,6 +876,107 @@ static int __init do_boot_cpu(int apicid
   4.225  
   4.226  	atomic_set(&init_deasserted, 0);
   4.227  
   4.228 +#if 1
   4.229 +	if (cpu_gdt_descr[0].size > PAGE_SIZE)
   4.230 +		BUG();
   4.231 +	cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
   4.232 +	memcpy((void *)cpu_gdt_descr[cpu].address,
   4.233 +	       (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
   4.234 +		memset((char *)cpu_gdt_descr[cpu].address +
   4.235 +		       FIRST_RESERVED_GDT_ENTRY * 8, 0,
   4.236 +		       NR_RESERVED_GDT_ENTRIES * 8);
   4.237 +
   4.238 +	memset(&ctxt, 0, sizeof(ctxt));
   4.239 +
   4.240 +	ctxt.cpu_ctxt.ds = __USER_DS;
   4.241 +	ctxt.cpu_ctxt.es = __USER_DS;
   4.242 +	ctxt.cpu_ctxt.fs = 0;
   4.243 +	ctxt.cpu_ctxt.gs = 0;
   4.244 +	ctxt.cpu_ctxt.ss = __KERNEL_DS;
   4.245 +	ctxt.cpu_ctxt.cs = __KERNEL_CS;
   4.246 +	ctxt.cpu_ctxt.eip = start_eip;
   4.247 +	ctxt.cpu_ctxt.esp = idle->thread.esp;
   4.248 +	ctxt.cpu_ctxt.eflags = (1<<9) | (1<<2);
   4.249 +
   4.250 +	/* FPU is set up to default initial state. */
   4.251 +	memset(ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
   4.252 +
   4.253 +	/* Virtual IDT is empty at start-of-day. */
   4.254 +	for ( i = 0; i < 256; i++ )
   4.255 +	{
   4.256 +		ctxt.trap_ctxt[i].vector = i;
   4.257 +		ctxt.trap_ctxt[i].cs     = FLAT_GUESTOS_CS;
   4.258 +	}
   4.259 +	ctxt.fast_trap_idx = 0;
   4.260 +
   4.261 +	/* No LDT. */
   4.262 +	ctxt.ldt_ents = 0;
   4.263 +
   4.264 +	{
   4.265 +		unsigned long va;
   4.266 +		int f;
   4.267 +
   4.268 +		for (va = cpu_gdt_descr[cpu].address, f = 0;
   4.269 +		     va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
   4.270 +		     va += PAGE_SIZE, f++) {
   4.271 +			ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
   4.272 +			protect_page(swapper_pg_dir, (void *)va, PROT_ON);
   4.273 +		}
   4.274 +		ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
   4.275 +		flush_page_update_queue();
   4.276 +	}
   4.277 +
   4.278 +	/* Ring 1 stack is the initial stack. */
   4.279 +	ctxt.guestos_ss  = __KERNEL_DS;
   4.280 +	ctxt.guestos_esp = idle->thread.esp;
   4.281 +
   4.282 +	/* Callback handlers. */
   4.283 +	ctxt.event_callback_cs     = __KERNEL_CS;
   4.284 +	ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
   4.285 +	ctxt.failsafe_callback_cs  = __KERNEL_CS;
   4.286 +	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
   4.287 +
   4.288 +	ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir);
   4.289 +
   4.290 +	boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
   4.291 +
   4.292 +	if (!boot_error) {
   4.293 +		/*
   4.294 +		 * allow APs to start initializing.
   4.295 +		 */
   4.296 +		Dprintk("Before Callout %d.\n", cpu);
   4.297 +		cpu_set(cpu, cpu_callout_map);
   4.298 +		Dprintk("After Callout %d.\n", cpu);
   4.299 +
   4.300 +		/*
   4.301 +		 * Wait 5s total for a response
   4.302 +		 */
   4.303 +		for (timeout = 0; timeout < 50000; timeout++) {
   4.304 +			if (cpu_isset(cpu, cpu_callin_map))
   4.305 +				break;	/* It has booted */
   4.306 +			udelay(100);
   4.307 +		}
   4.308 +
   4.309 +		if (cpu_isset(cpu, cpu_callin_map)) {
   4.310 +			/* number CPUs logically, starting from 1 (BSP is 0) */
   4.311 +			Dprintk("OK.\n");
   4.312 +			printk("CPU%d: ", cpu);
   4.313 +			print_cpu_info(&cpu_data[cpu]);
   4.314 +			Dprintk("CPU has booted.\n");
   4.315 +		} else {
   4.316 +			boot_error= 1;
   4.317 +		}
   4.318 +	}
   4.319 +	x86_cpu_to_apicid[cpu] = apicid;
   4.320 +	if (boot_error) {
   4.321 +		/* Try to put things back the way they were before ... */
   4.322 +		// unmap_cpu_to_logical_apicid(cpu);
   4.323 +		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
   4.324 +		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
   4.325 +		cpucount--;
   4.326 +	}
   4.327 +
   4.328 +#else
   4.329  	Dprintk("Setting warm reset code and vector.\n");
   4.330  
   4.331  	store_NMI_vector(&nmi_high, &nmi_low);
   4.332 @@ -877,14 +1034,13 @@ static int __init do_boot_cpu(int apicid
   4.333  
   4.334  	/* mark "stuck" area as not stuck */
   4.335  	*((volatile unsigned long *)trampoline_base) = 0;
   4.336 +#endif
   4.337  
   4.338  	return boot_error;
   4.339  }
   4.340  
   4.341  cycles_t cacheflush_time;
   4.342 -#endif
   4.343  unsigned long cache_decay_ticks;
   4.344 -#if 0
   4.345  
   4.346  static void smp_tune_scheduling (void)
   4.347  {
   4.348 @@ -931,6 +1087,7 @@ static void smp_tune_scheduling (void)
   4.349   * Cycle through the processors sending APIC IPIs to boot each.
   4.350   */
   4.351  
   4.352 +#if 0
   4.353  static int boot_cpu_logical_apicid;
   4.354  #endif
   4.355  /* Where the IO area was mapped on multiquad, always 0 otherwise */
   4.356 @@ -940,11 +1097,11 @@ cpumask_t cpu_sibling_map[NR_CPUS] __cac
   4.357  
   4.358  static void __init smp_boot_cpus(unsigned int max_cpus)
   4.359  {
   4.360 -#if 1
   4.361 -	printk("smp_boot_cpus %d\n", max_cpus);
   4.362 -#else
   4.363 -	int apicid, cpu, bit, kicked;
   4.364 +	int cpu, kicked;
   4.365  	unsigned long bogosum = 0;
   4.366 +#if 0
   4.367 +	int apicid, bit;
   4.368 +#endif
   4.369  
   4.370  	/*
   4.371  	 * Setup boot CPU information
   4.372 @@ -953,9 +1110,15 @@ static void __init smp_boot_cpus(unsigne
   4.373  	printk("CPU%d: ", 0);
   4.374  	print_cpu_info(&cpu_data[0]);
   4.375  
   4.376 +#if 0
   4.377  	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
   4.378  	boot_cpu_logical_apicid = logical_smp_processor_id();
   4.379  	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
   4.380 +#else
   4.381 +	// boot_cpu_physical_apicid = 0;
   4.382 +	// boot_cpu_logical_apicid = 0;
   4.383 +	x86_cpu_to_apicid[0] = 0;
   4.384 +#endif
   4.385  
   4.386  	current_thread_info()->cpu = 0;
   4.387  	smp_tune_scheduling();
   4.388 @@ -966,17 +1129,20 @@ static void __init smp_boot_cpus(unsigne
   4.389  	 * If we couldn't find an SMP configuration at boot time,
   4.390  	 * get out of here now!
   4.391  	 */
   4.392 -	if (!smp_found_config && !acpi_lapic) {
   4.393 +	if (!smp_found_config /* && !acpi_lapic) */) {
   4.394  		printk(KERN_NOTICE "SMP motherboard not detected.\n");
   4.395  		smpboot_clear_io_apic_irqs();
   4.396 +#if 0
   4.397  		phys_cpu_present_map = physid_mask_of_physid(0);
   4.398  		if (APIC_init_uniprocessor())
   4.399  			printk(KERN_NOTICE "Local APIC not detected."
   4.400  					   " Using dummy APIC emulation.\n");
   4.401 +#endif
   4.402  		map_cpu_to_logical_apicid();
   4.403  		return;
   4.404  	}
   4.405  
   4.406 +#if 0
   4.407  	/*
   4.408  	 * Should not be necessary because the MP table should list the boot
   4.409  	 * CPU too, but we do it for the sake of robustness anyway.
   4.410 @@ -1001,18 +1167,22 @@ static void __init smp_boot_cpus(unsigne
   4.411  	}
   4.412  
   4.413  	verify_local_APIC();
   4.414 +#endif
   4.415  
   4.416  	/*
   4.417  	 * If SMP should be disabled, then really disable it!
   4.418  	 */
   4.419  	if (!max_cpus) {
   4.420 -		smp_found_config = 0;
   4.421 +		HYPERVISOR_shared_info->n_vcpu = 1;
   4.422  		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
   4.423  		smpboot_clear_io_apic_irqs();
   4.424 +#if 0
   4.425  		phys_cpu_present_map = physid_mask_of_physid(0);
   4.426 +#endif
   4.427  		return;
   4.428  	}
   4.429  
   4.430 +#if 0
   4.431  	connect_bsp_APIC();
   4.432  	setup_local_APIC();
   4.433  	map_cpu_to_logical_apicid();
   4.434 @@ -1028,32 +1198,29 @@ static void __init smp_boot_cpus(unsigne
   4.435  	 * clustered apic ID.
   4.436  	 */
   4.437  	Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
   4.438 +#endif
   4.439 +	Dprintk("CPU present map: %lx\n",
   4.440 +		(1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
   4.441  
   4.442  	kicked = 1;
   4.443 -	for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
   4.444 -		apicid = cpu_present_to_apicid(bit);
   4.445 -		/*
   4.446 -		 * Don't even attempt to start the boot CPU!
   4.447 -		 */
   4.448 -		if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
   4.449 -			continue;
   4.450 -
   4.451 -		if (!check_apicid_present(bit))
   4.452 -			continue;
   4.453 +	for (cpu = 1; kicked < NR_CPUS &&
   4.454 +		     cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
   4.455  		if (max_cpus <= cpucount+1)
   4.456  			continue;
   4.457  
   4.458 -		if (do_boot_cpu(apicid))
   4.459 +		if (do_boot_cpu(cpu))
   4.460  			printk("CPU #%d not responding - cannot use it.\n",
   4.461 -								apicid);
   4.462 +								cpu);
   4.463  		else
   4.464  			++kicked;
   4.465  	}
   4.466  
   4.467 +#if 0
   4.468  	/*
   4.469  	 * Cleanup possible dangling ends...
   4.470  	 */
   4.471  	smpboot_restore_warm_reset_vector();
   4.472 +#endif
   4.473  
   4.474  	/*
   4.475  	 * Allow the user to impress friends.
   4.476 @@ -1117,6 +1284,7 @@ static void __init smp_boot_cpus(unsigne
   4.477  			printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
   4.478  	}
   4.479  
   4.480 +#if 0
   4.481  	if (nmi_watchdog == NMI_LOCAL_APIC)
   4.482  		check_nmi_watchdog();
   4.483  
   4.484 @@ -1130,6 +1298,7 @@ static void __init smp_boot_cpus(unsigne
   4.485  	if (cpu_has_tsc && cpucount && cpu_khz)
   4.486  		synchronize_tsc_bp();
   4.487  #endif
   4.488 +	xxprint("smp_boot_cpus done\n");
   4.489  }
   4.490  
   4.491  /* These are wrappers to interface to the new boot process.  Someone
   4.492 @@ -1147,6 +1316,7 @@ void __devinit smp_prepare_boot_cpu(void
   4.493  
   4.494  int __devinit __cpu_up(unsigned int cpu)
   4.495  {
   4.496 +	xxprint("__cpu_up\n");
   4.497  	/* This only works at boot for x86.  See "rewrite" above. */
   4.498  	if (cpu_isset(cpu, smp_commenced_mask)) {
   4.499  		local_irq_enable();
   4.500 @@ -1164,13 +1334,13 @@ int __devinit __cpu_up(unsigned int cpu)
   4.501  	cpu_set(cpu, smp_commenced_mask);
   4.502  	while (!cpu_isset(cpu, cpu_online_map))
   4.503  		mb();
   4.504 +	xxprint("__cpu_up ok\n");
   4.505  	return 0;
   4.506  }
   4.507  
   4.508  void __init smp_cpus_done(unsigned int max_cpus)
   4.509  {
   4.510  #if 1
   4.511 -	printk("smp_cpus_done %d\n", max_cpus);
   4.512  #else
   4.513  #ifdef CONFIG_X86_IO_APIC
   4.514  	setup_ioapic_dest();
   4.515 @@ -1186,7 +1356,7 @@ void __init smp_cpus_done(unsigned int m
   4.516  void __init smp_intr_init(void)
   4.517  {
   4.518  #if 1
   4.519 -	printk("smp_intr_init\n");
   4.520 +	xxprint("smp_intr_init\n");
   4.521  #else
   4.522  	/*
   4.523  	 * IRQ0 must be given a fixed assignment and initialized,
     5.1 --- a/linux-2.6.9-xen-sparse/arch/xen/kernel/evtchn.c	Tue Nov 09 13:14:11 2004 +0000
     5.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/kernel/evtchn.c	Wed Nov 10 11:00:27 2004 +0000
     5.3 @@ -42,6 +42,8 @@
     5.4  #include <asm-xen/xen-public/physdev.h>
     5.5  #include <asm-xen/ctrl_if.h>
     5.6  #include <asm-xen/hypervisor.h>
     5.7 +#define XEN_EVTCHN_MASK_OPS
     5.8 +#include <asm-xen/evtchn.h>
     5.9  
    5.10  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
    5.11  EXPORT_SYMBOL(force_evtchn_callback);
    5.12 @@ -89,14 +91,15 @@ void evtchn_do_upcall(struct pt_regs *re
    5.13      int            irq;
    5.14      unsigned long  flags;
    5.15      shared_info_t *s = HYPERVISOR_shared_info;
    5.16 +    vcpu_info_t   *vcpu_info = &s->vcpu_data[smp_processor_id()];
    5.17  
    5.18      local_irq_save(flags);
    5.19      
    5.20 -    while ( s->vcpu_data[0].evtchn_upcall_pending )
    5.21 +    while ( vcpu_info->evtchn_upcall_pending )
    5.22      {
    5.23 -        s->vcpu_data[0].evtchn_upcall_pending = 0;
    5.24 +        vcpu_info->evtchn_upcall_pending = 0;
    5.25          /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
    5.26 -        l1 = xchg(&s->evtchn_pending_sel, 0);
    5.27 +        l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
    5.28          while ( (l1i = ffs(l1)) != 0 )
    5.29          {
    5.30              l1i--;
    5.31 @@ -411,6 +414,22 @@ static struct irqaction misdirect_action
    5.32      NULL
    5.33  };
    5.34  
    5.35 +static irqreturn_t xen_dbg(int irq, void *dev_id, struct pt_regs *regs)
    5.36 +{
    5.37 +     char *msg = "debug\n";
    5.38 +     (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg);
    5.39 +     return IRQ_HANDLED;
    5.40 +}
    5.41 +
    5.42 +static struct irqaction xen_action = {
    5.43 +    xen_dbg, 
    5.44 +    SA_INTERRUPT, 
    5.45 +    CPU_MASK_CPU0, 
    5.46 +    "xen-dbg", 
    5.47 +    NULL, 
    5.48 +    NULL
    5.49 +};
    5.50 +
    5.51  void irq_suspend(void)
    5.52  {
    5.53      int pirq, virq, irq, evtchn;
    5.54 @@ -508,6 +527,9 @@ void __init init_IRQ(void)
    5.55  
    5.56      (void)setup_irq(bind_virq_to_irq(VIRQ_MISDIRECT), &misdirect_action);
    5.57  
    5.58 +    printk("debug_int\n");
    5.59 +	(void)setup_irq(bind_virq_to_irq(VIRQ_DEBUG), &xen_action);
    5.60 +
    5.61      /* This needs to be done early, but after the IRQ subsystem is alive. */
    5.62      ctrl_if_init();
    5.63  }
     6.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/blkback/blkback.c	Tue Nov 09 13:14:11 2004 +0000
     6.2 +++ b/linux-2.6.9-xen-sparse/drivers/xen/blkback/blkback.c	Wed Nov 10 11:00:27 2004 +0000
     6.3 @@ -11,6 +11,7 @@
     6.4   */
     6.5  
     6.6  #include "common.h"
     6.7 +#include <asm-xen/evtchn.h>
     6.8  
     6.9  /*
    6.10   * These are rather arbitrary. They are fairly large because adjacent requests
     7.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/blkfront/blkfront.c	Tue Nov 09 13:14:11 2004 +0000
     7.2 +++ b/linux-2.6.9-xen-sparse/drivers/xen/blkfront/blkfront.c	Wed Nov 10 11:00:27 2004 +0000
     7.3 @@ -44,6 +44,7 @@
     7.4  #include <linux/interrupt.h>
     7.5  #include <scsi/scsi.h>
     7.6  #include <asm-xen/ctrl_if.h>
     7.7 +#include <asm-xen/evtchn.h>
     7.8  
     7.9  typedef unsigned char byte; /* from linux/ide.h */
    7.10  
     8.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/evtchn/evtchn.c	Tue Nov 09 13:14:11 2004 +0000
     8.2 +++ b/linux-2.6.9-xen-sparse/drivers/xen/evtchn/evtchn.c	Wed Nov 10 11:00:27 2004 +0000
     8.3 @@ -43,6 +43,7 @@
     8.4  #include <linux/poll.h>
     8.5  #include <linux/irq.h>
     8.6  #include <linux/init.h>
     8.7 +#define XEN_EVTCHN_MASK_OPS
     8.8  #include <asm-xen/evtchn.h>
     8.9  
    8.10  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
     9.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/netback/netback.c	Tue Nov 09 13:14:11 2004 +0000
     9.2 +++ b/linux-2.6.9-xen-sparse/drivers/xen/netback/netback.c	Wed Nov 10 11:00:27 2004 +0000
     9.3 @@ -11,6 +11,7 @@
     9.4   */
     9.5  
     9.6  #include "common.h"
     9.7 +#include <asm-xen/evtchn.h>
     9.8  
     9.9  static void netif_page_release(struct page *page);
    9.10  static void netif_skb_release(struct sk_buff *skb);
    10.1 --- a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h	Tue Nov 09 13:14:11 2004 +0000
    10.2 +++ b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h	Wed Nov 10 11:00:27 2004 +0000
    10.3 @@ -54,3 +54,6 @@ static inline void smpboot_setup_io_apic
    10.4  		setup_IO_APIC();
    10.5  #endif
    10.6  }
    10.7 +
    10.8 +
    10.9 +#define	smp_found_config	(HYPERVISOR_shared_info->n_vcpu > 1)
    11.1 --- a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/system.h	Tue Nov 09 13:14:11 2004 +0000
    11.2 +++ b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/system.h	Wed Nov 10 11:00:27 2004 +0000
    11.3 @@ -8,7 +8,6 @@
    11.4  #include <asm/segment.h>
    11.5  #include <asm/cpufeature.h>
    11.6  #include <asm-xen/hypervisor.h>
    11.7 -#include <asm-xen/evtchn.h>
    11.8  
    11.9  #ifdef __KERNEL__
   11.10  
    12.1 --- a/linux-2.6.9-xen-sparse/include/asm-xen/evtchn.h	Tue Nov 09 13:14:11 2004 +0000
    12.2 +++ b/linux-2.6.9-xen-sparse/include/asm-xen/evtchn.h	Wed Nov 10 11:00:27 2004 +0000
    12.3 @@ -36,20 +36,20 @@
    12.4  #include <asm/ptrace.h>
    12.5  #include <asm/synch_bitops.h>
    12.6  #include <asm-xen/xen-public/event_channel.h>
    12.7 +#include <linux/smp.h>
    12.8  
    12.9  /*
   12.10   * LOW-LEVEL DEFINITIONS
   12.11   */
   12.12  
   12.13 -/* Force a proper event-channel callback from Xen. */
   12.14 -void force_evtchn_callback(void);
   12.15 -
   12.16  /* Entry point for notifications into Linux subsystems. */
   12.17  void evtchn_do_upcall(struct pt_regs *regs);
   12.18  
   12.19  /* Entry point for notifications into the userland character device. */
   12.20  void evtchn_device_upcall(int port);
   12.21  
   12.22 +#ifdef XEN_EVTCHN_MASK_OPS
   12.23 +
   12.24  static inline void mask_evtchn(int port)
   12.25  {
   12.26      shared_info_t *s = HYPERVISOR_shared_info;
   12.27 @@ -59,6 +59,7 @@ static inline void mask_evtchn(int port)
   12.28  static inline void unmask_evtchn(int port)
   12.29  {
   12.30      shared_info_t *s = HYPERVISOR_shared_info;
   12.31 +    vcpu_info_t *vcpu_info = &s->vcpu_data[smp_processor_id()];
   12.32  
   12.33      synch_clear_bit(port, &s->evtchn_mask[0]);
   12.34  
   12.35 @@ -67,14 +68,16 @@ static inline void unmask_evtchn(int por
   12.36       * a real IO-APIC we 'lose the interrupt edge' if the channel is masked.
   12.37       */
   12.38      if (  synch_test_bit        (port,    &s->evtchn_pending[0]) && 
   12.39 -         !synch_test_and_set_bit(port>>5, &s->evtchn_pending_sel) )
   12.40 +         !synch_test_and_set_bit(port>>5, &vcpu_info->evtchn_pending_sel) )
   12.41      {
   12.42 -        s->vcpu_data[0].evtchn_upcall_pending = 1;
   12.43 -        if ( !s->vcpu_data[0].evtchn_upcall_mask )
   12.44 +        vcpu_info->evtchn_upcall_pending = 1;
   12.45 +        if ( !vcpu_info->evtchn_upcall_mask )
   12.46              force_evtchn_callback();
   12.47      }
   12.48  }
   12.49  
   12.50 +#endif /* XEN_EVTCHN_MASK_OPS */
   12.51 +
   12.52  static inline void clear_evtchn(int port)
   12.53  {
   12.54      shared_info_t *s = HYPERVISOR_shared_info;
    13.1 --- a/linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h	Tue Nov 09 13:14:11 2004 +0000
    13.2 +++ b/linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h	Wed Nov 10 11:00:27 2004 +0000
    13.3 @@ -48,6 +48,10 @@ union xen_start_info_union
    13.4  extern union xen_start_info_union xen_start_info_union;
    13.5  #define xen_start_info (xen_start_info_union.xen_start_info)
    13.6  
    13.7 +/* arch/xen/kernel/evtchn.c */
    13.8 +/* Force a proper event-channel callback from Xen. */
    13.9 +void force_evtchn_callback(void);
   13.10 +
   13.11  /* arch/xen/kernel/process.c */
   13.12  void xen_cpu_idle (void);
   13.13  
   13.14 @@ -647,4 +651,20 @@ HYPERVISOR_vm_assist(
   13.15      return ret;
   13.16  }
   13.17  
   13.18 +static inline int
   13.19 +HYPERVISOR_boot_vcpu(
   13.20 +    unsigned long vcpu, full_execution_context_t *ctxt)
   13.21 +{
   13.22 +    int ret;
   13.23 +    unsigned long ign1, ign2;
   13.24 +
   13.25 +    __asm__ __volatile__ (
   13.26 +        TRAP_INSTR
   13.27 +        : "=a" (ret), "=b" (ign1), "=c" (ign2)
   13.28 +	: "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt)
   13.29 +	: "memory");
   13.30 +
   13.31 +    return ret;
   13.32 +}
   13.33 +
   13.34  #endif /* __HYPERVISOR_H__ */
    14.1 --- a/tools/libxc/xc_linux_restore.c	Tue Nov 09 13:14:11 2004 +0000
    14.2 +++ b/tools/libxc/xc_linux_restore.c	Wed Nov 10 11:00:27 2004 +0000
    14.3 @@ -94,7 +94,8 @@ int xc_linux_restore(int xc_handle, XcIO
    14.4  
    14.5      /* The new domain's shared-info frame number. */
    14.6      unsigned long shared_info_frame;
    14.7 -    unsigned char shared_info[PAGE_SIZE]; /* saved contents from file */
    14.8 +    unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
    14.9 +    shared_info_t *shared_info = (shared_info_t *)shared_info_page;
   14.10      
   14.11      /* A copy of the CPU context of the guest. */
   14.12      full_execution_context_t ctxt;
   14.13 @@ -525,8 +526,8 @@ int xc_linux_restore(int xc_handle, XcIO
   14.14  	}	
   14.15      }
   14.16  
   14.17 -    if ( xcio_read(ioctxt, &ctxt,       sizeof(ctxt)) ||
   14.18 -         xcio_read(ioctxt, shared_info, PAGE_SIZE) )
   14.19 +    if ( xcio_read(ioctxt, &ctxt,            sizeof(ctxt)) ||
   14.20 +         xcio_read(ioctxt, shared_info_page, PAGE_SIZE) )
   14.21      {
   14.22          xcio_error(ioctxt, "Error when reading from state file");
   14.23          goto out;
   14.24 @@ -577,9 +578,10 @@ int xc_linux_restore(int xc_handle, XcIO
   14.25      ctxt.pt_base = pfn_to_mfn_table[pfn] << PAGE_SHIFT;
   14.26  
   14.27      /* clear any pending events and the selector */
   14.28 -    memset(&(((shared_info_t *)shared_info)->evtchn_pending[0]),
   14.29 -           0, sizeof (((shared_info_t *)shared_info)->evtchn_pending)+
   14.30 -           sizeof(((shared_info_t *)shared_info)->evtchn_pending_sel));
   14.31 +    memset(&(shared_info->evtchn_pending[0]), 0,
   14.32 +	   sizeof (shared_info->evtchn_pending));
   14.33 +    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
   14.34 +        shared_info->vcpu_data[i].evtchn_pending_sel = 0;
   14.35  
   14.36      /* Copy saved contents of shared-info page. No checking needed. */
   14.37      ppage = xc_map_foreign_range(
    15.1 --- a/xen/arch/x86/domain.c	Tue Nov 09 13:14:11 2004 +0000
    15.2 +++ b/xen/arch/x86/domain.c	Wed Nov 10 11:00:27 2004 +0000
    15.3 @@ -550,7 +550,8 @@ void domain_relinquish_memory(struct dom
    15.4       * Relinquish GDT mappings. No need for explicit unmapping of the LDT as 
    15.5       * it automatically gets squashed when the guest's mappings go away.
    15.6       */
    15.7 -    destroy_gdt(d);
    15.8 +    for_each_exec_domain(d, ed)
    15.9 +        destroy_gdt(ed);
   15.10  
   15.11      /* Relinquish every page of memory. */
   15.12      relinquish_list(d, &d->xenpage_list);
    16.1 --- a/xen/arch/x86/x86_32/entry.S	Tue Nov 09 13:14:11 2004 +0000
    16.2 +++ b/xen/arch/x86/x86_32/entry.S	Wed Nov 10 11:00:27 2004 +0000
    16.3 @@ -621,6 +621,7 @@ ENTRY(hypercall_table)
    16.4          .long SYMBOL_NAME(do_grant_table_op)     /* 20 */
    16.5          .long SYMBOL_NAME(do_vm_assist)
    16.6          .long SYMBOL_NAME(do_update_va_mapping_otherdomain)
    16.7 +        .long SYMBOL_NAME(do_boot_vcpu)
    16.8          .rept NR_hypercalls-((.-hypercall_table)/4)
    16.9          .long SYMBOL_NAME(do_ni_hypercall)
   16.10          .endr
    17.1 --- a/xen/arch/x86/x86_32/mm.c	Tue Nov 09 13:14:11 2004 +0000
    17.2 +++ b/xen/arch/x86/x86_32/mm.c	Wed Nov 10 11:00:27 2004 +0000
    17.3 @@ -240,19 +240,16 @@ int check_descriptor(unsigned long *d)
    17.4  }
    17.5  
    17.6  
    17.7 -void destroy_gdt(struct domain *d)
    17.8 +void destroy_gdt(struct exec_domain *ed)
    17.9  {
   17.10 -    struct exec_domain *ed;
   17.11      int i;
   17.12      unsigned long pfn;
   17.13  
   17.14 -    for_each_exec_domain(d, ed) {
   17.15 -        for ( i = 0; i < 16; i++ )
   17.16 -        {
   17.17 -            if ( (pfn = l1_pgentry_to_pagenr(ed->mm.perdomain_pt[i])) != 0 )
   17.18 -                put_page_and_type(&frame_table[pfn]);
   17.19 -            ed->mm.perdomain_pt[i] = mk_l1_pgentry(0);
   17.20 -        }
   17.21 +    for ( i = 0; i < 16; i++ )
   17.22 +    {
   17.23 +        if ( (pfn = l1_pgentry_to_pagenr(ed->mm.perdomain_pt[i])) != 0 )
   17.24 +            put_page_and_type(&frame_table[pfn]);
   17.25 +        ed->mm.perdomain_pt[i] = mk_l1_pgentry(0);
   17.26      }
   17.27  }
   17.28  
   17.29 @@ -302,7 +299,7 @@ long set_gdt(struct exec_domain *ed,
   17.30      unmap_domain_mem(vgdt);
   17.31  
   17.32      /* Tear down the old GDT. */
   17.33 -    destroy_gdt(d);
   17.34 +    destroy_gdt(ed);
   17.35  
   17.36      /* Install the new GDT. */
   17.37      for ( i = 0; i < nr_pages; i++ )
    18.1 --- a/xen/common/domain.c	Tue Nov 09 13:14:11 2004 +0000
    18.2 +++ b/xen/common/domain.c	Wed Nov 10 11:00:27 2004 +0000
    18.3 @@ -7,6 +7,7 @@
    18.4  #include <xen/config.h>
    18.5  #include <xen/init.h>
    18.6  #include <xen/lib.h>
    18.7 +#include <xen/sched.h>
    18.8  #include <xen/errno.h>
    18.9  #include <xen/sched.h>
   18.10  #include <xen/mm.h>
   18.11 @@ -62,7 +63,7 @@ struct domain *do_createdomain(domid_t d
   18.12  
   18.13          arch_do_createdomain(ed);
   18.14  
   18.15 -        sched_add_domain(d);
   18.16 +        sched_add_domain(ed);
   18.17  
   18.18          write_lock(&domlist_lock);
   18.19          pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
   18.20 @@ -77,7 +78,7 @@ struct domain *do_createdomain(domid_t d
   18.21      }
   18.22      else
   18.23      {
   18.24 -        sched_add_domain(d);
   18.25 +        sched_add_domain(ed);
   18.26      }
   18.27  
   18.28      return d;
   18.29 @@ -288,6 +289,76 @@ int final_setup_guestos(struct domain *p
   18.30      return rc;
   18.31  }
   18.32  
   18.33 +extern xmem_cache_t *exec_domain_struct_cachep;
   18.34 +
   18.35 +/*
   18.36 + * final_setup_guestos is used for final setup and launching of domains other
   18.37 + * than domain 0. ie. the domains that are being built by the userspace dom0
   18.38 + * domain builder.
   18.39 + */
   18.40 +long do_boot_vcpu(unsigned long vcpu, full_execution_context_t *ctxt) 
   18.41 +{
   18.42 +    struct domain *d = current->domain;
   18.43 +    struct exec_domain *ed;
   18.44 +    int rc = 0;
   18.45 +    full_execution_context_t *c;
   18.46 +
   18.47 +    if ( d->exec_domain[vcpu] != NULL )
   18.48 +        return EINVAL;
   18.49 +
   18.50 +    if ( alloc_exec_domain_struct(d, vcpu) == NULL )
   18.51 +        return -ENOMEM;
   18.52 +
   18.53 +    if ( (c = xmalloc(sizeof(*c))) == NULL )
   18.54 +    {
   18.55 +        rc = -ENOMEM;
   18.56 +        goto out;
   18.57 +    }
   18.58 +
   18.59 +    if ( copy_from_user(c, ctxt, sizeof(*c)) )
   18.60 +    {
   18.61 +        rc = -EFAULT;
   18.62 +        goto out;
   18.63 +    }
   18.64 +
   18.65 +    printk("do_boot_vcpu for dom %d vcpu %d\n", d->id, vcpu);
   18.66 +
   18.67 +    ed = d->exec_domain[vcpu];
   18.68 +
   18.69 +    atomic_set(&ed->pausecnt, 0);
   18.70 +    shadow_lock_init(ed);
   18.71 +
   18.72 +    memcpy(&ed->thread, &idle0_exec_domain.thread, sizeof(ed->thread));
   18.73 +
   18.74 +    /* arch_do_createdomain */
   18.75 +    ed->mm.perdomain_pt = (l1_pgentry_t *)alloc_xenheap_page();
   18.76 +    memset(ed->mm.perdomain_pt, 0, PAGE_SIZE);
   18.77 +    machine_to_phys_mapping[virt_to_phys(ed->mm.perdomain_pt) >> 
   18.78 +                           PAGE_SHIFT] = INVALID_P2M_ENTRY;
   18.79 +
   18.80 +    sched_add_domain(ed);
   18.81 +
   18.82 +    if ( (rc = arch_final_setup_guestos(ed, c)) != 0 )
   18.83 +        goto out;
   18.84 +
   18.85 +    /* Set up the shared info structure. */
   18.86 +    update_dom_time(d);
   18.87 +
   18.88 +    /* domain_unpause_by_systemcontroller */
   18.89 +    if ( test_and_clear_bit(EDF_CTRLPAUSE, &ed->ed_flags) )
   18.90 +        domain_wake(ed);
   18.91 +
   18.92 +    xfree(c);
   18.93 +    return 0;
   18.94 +
   18.95 + out:
   18.96 +    if ( c != NULL )
   18.97 +        xfree(c);
   18.98 +    xmem_cache_free(exec_domain_struct_cachep, d->exec_domain[vcpu]);
   18.99 +    d->exec_domain[vcpu] = NULL;
  18.100 +    return rc;
  18.101 +}
  18.102 +
  18.103  long vm_assist(struct domain *p, unsigned int cmd, unsigned int type)
  18.104  {
  18.105      if ( type > MAX_VMASST_TYPE )
    19.1 --- a/xen/common/keyhandler.c	Tue Nov 09 13:14:11 2004 +0000
    19.2 +++ b/xen/common/keyhandler.c	Wed Nov 10 11:00:27 2004 +0000
    19.3 @@ -100,15 +100,21 @@ void do_task_queues(unsigned char key)
    19.4                 page->u.inuse.type_info);
    19.5                 
    19.6          for_each_exec_domain ( d, ed ) {
    19.7 -            printk("Guest: CPU %d [has=%c] flags=%lx "
    19.8 -                   "upcall_pend = %02x, upcall_mask = %02x\n",
    19.9 +            printk("Guest: %p CPU %d [has=%c] flags=%lx "
   19.10 +                   "upcall_pend = %02x, upcall_mask = %02x\n", ed,
   19.11                     ed->processor,
   19.12                     test_bit(EDF_RUNNING, &ed->ed_flags) ? 'T':'F',
   19.13                     ed->ed_flags,
   19.14                     ed->vcpu_info->evtchn_upcall_pending, 
   19.15                     ed->vcpu_info->evtchn_upcall_mask);
   19.16          }
   19.17 -        printk("Notifying guest...\n"); 
   19.18 +        ed = d->exec_domain[0];
   19.19 +        printk("Notifying guest... %d/%d\n", d->id, ed->eid); 
   19.20 +        printk("port %d/%d stat %d %d %d\n",
   19.21 +               VIRQ_DEBUG, d->virq_to_evtchn[VIRQ_DEBUG],
   19.22 +               test_bit(d->virq_to_evtchn[VIRQ_DEBUG], &d->shared_info->evtchn_pending[0]),
   19.23 +               test_bit(d->virq_to_evtchn[VIRQ_DEBUG], &d->shared_info->evtchn_mask[0]),
   19.24 +               test_bit(d->virq_to_evtchn[VIRQ_DEBUG]>>5, &ed->vcpu_info->evtchn_pending_sel));
   19.25          send_guest_virq(d->exec_domain[0], VIRQ_DEBUG);
   19.26      }
   19.27  
    20.1 --- a/xen/common/sched_bvt.c	Tue Nov 09 13:14:11 2004 +0000
    20.2 +++ b/xen/common/sched_bvt.c	Wed Nov 10 11:00:27 2004 +0000
    20.3 @@ -179,6 +179,7 @@ int bvt_alloc_task(struct exec_domain *e
    20.4      }
    20.5      ed->ed_sched_priv = &BVT_INFO(d)->ed_inf[ed->eid];
    20.6      BVT_INFO(d)->ed_inf[ed->eid].inf = BVT_INFO(d);
    20.7 +    BVT_INFO(d)->ed_inf[ed->eid].exec_domain = ed;
    20.8      return 0;
    20.9  }
   20.10  
   20.11 @@ -192,25 +193,28 @@ void bvt_add_task(struct exec_domain *d)
   20.12      ASSERT(inf != NULL);
   20.13      ASSERT(d   != NULL);
   20.14  
   20.15 -    inf->mcu_advance = MCU_ADVANCE;
   20.16 -    inf->domain      = d->domain;
   20.17 +    if (d->eid == 0) {
   20.18 +        inf->mcu_advance = MCU_ADVANCE;
   20.19 +        inf->domain      = d->domain;
   20.20 +        inf->warpback    = 0;
   20.21 +        /* Set some default values here. */
   20.22 +        inf->warp        = 0;
   20.23 +        inf->warp_value  = 0;
   20.24 +        inf->warpl       = MILLISECS(2000);
   20.25 +        inf->warpu       = MILLISECS(1000);
   20.26 +        /* initialise the timers */
   20.27 +        init_ac_timer(&inf->warp_timer);
   20.28 +        inf->warp_timer.cpu = d->processor;
   20.29 +        inf->warp_timer.data = (unsigned long)inf;
   20.30 +        inf->warp_timer.function = &warp_timer_fn;
   20.31 +        init_ac_timer(&inf->unwarp_timer);
   20.32 +        inf->unwarp_timer.cpu = d->processor;
   20.33 +        inf->unwarp_timer.data = (unsigned long)inf;
   20.34 +        inf->unwarp_timer.function = &unwarp_timer_fn;
   20.35 +    }
   20.36 +
   20.37      einf->exec_domain = d;
   20.38 -    inf->warpback    = 0;
   20.39 -    /* Set some default values here. */
   20.40 -    inf->warp        = 0;
   20.41 -    inf->warp_value  = 0;
   20.42 -    inf->warpl       = MILLISECS(2000);
   20.43 -    inf->warpu       = MILLISECS(1000);
   20.44 -    /* initialise the timers */
   20.45 -    init_ac_timer(&inf->warp_timer);
   20.46 -    inf->warp_timer.cpu = d->processor;
   20.47 -    inf->warp_timer.data = (unsigned long)inf;
   20.48 -    inf->warp_timer.function = &warp_timer_fn;
   20.49 -    init_ac_timer(&inf->unwarp_timer);
   20.50 -    inf->unwarp_timer.cpu = d->processor;
   20.51 -    inf->unwarp_timer.data = (unsigned long)inf;
   20.52 -    inf->unwarp_timer.function = &unwarp_timer_fn;
   20.53 -    
   20.54 +
   20.55      if ( d->domain->id == IDLE_DOMAIN_ID )
   20.56      {
   20.57          einf->avt = einf->evt = ~0U;
    21.1 --- a/xen/common/schedule.c	Tue Nov 09 13:14:11 2004 +0000
    21.2 +++ b/xen/common/schedule.c	Wed Nov 10 11:00:27 2004 +0000
    21.3 @@ -106,32 +106,67 @@ void free_domain_struct(struct domain *d
    21.4      xmem_cache_free(domain_struct_cachep, d);
    21.5  }
    21.6  
    21.7 +struct exec_domain *alloc_exec_domain_struct(struct domain *d,
    21.8 +                                             unsigned long vcpu)
    21.9 +{
   21.10 +    struct exec_domain *ed, *edc;
   21.11 +
   21.12 +    ASSERT( d->exec_domain[vcpu] == NULL );
   21.13 +
   21.14 +    if ( (ed = xmem_cache_alloc(exec_domain_struct_cachep)) == NULL )
   21.15 +        return NULL;
   21.16 +
   21.17 +    memset(ed, 0, sizeof(*ed));
   21.18 +
   21.19 +    d->exec_domain[vcpu] = ed;
   21.20 +    ed->domain = d;
   21.21 +    ed->eid = vcpu;
   21.22 +
   21.23 +    if ( SCHED_OP(alloc_task, ed) < 0 )
   21.24 +        goto out;
   21.25 +
   21.26 +    if (vcpu != 0) {
   21.27 +        ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid];
   21.28 +
   21.29 +        for_each_exec_domain(d, edc) {
   21.30 +            if (edc->ed_next_list == NULL || edc->ed_next_list->eid > vcpu)
   21.31 +                break;
   21.32 +        }
   21.33 +        ed->ed_next_list = edc->ed_next_list;
   21.34 +        edc->ed_next_list = ed;
   21.35 +
   21.36 +        if (test_bit(EDF_CPUPINNED, &edc->ed_flags)) {
   21.37 +            ed->processor = (edc->processor + 1) % smp_num_cpus;
   21.38 +            set_bit(EDF_CPUPINNED, &ed->ed_flags);
   21.39 +        } else {
   21.40 +            ed->processor = (edc->processor + 1) % smp_num_cpus;  /* XXX */
   21.41 +        }
   21.42 +    }
   21.43 +
   21.44 +    return ed;
   21.45 +
   21.46 + out:
   21.47 +    d->exec_domain[vcpu] = NULL;
   21.48 +    xmem_cache_free(exec_domain_struct_cachep, ed);
   21.49 +
   21.50 +    return NULL;
   21.51 +}
   21.52 +
   21.53  struct domain *alloc_domain_struct(void)
   21.54  {
   21.55      struct domain *d;
   21.56 -    struct exec_domain *ed = NULL;
   21.57  
   21.58      if ( (d = xmem_cache_alloc(domain_struct_cachep)) == NULL )
   21.59          return NULL;
   21.60      
   21.61      memset(d, 0, sizeof(*d));
   21.62  
   21.63 -    if ( (ed = xmem_cache_alloc(exec_domain_struct_cachep)) == NULL )
   21.64 -        goto out;
   21.65 -
   21.66 -    memset(ed, 0, sizeof(*ed));
   21.67 -
   21.68 -    d->exec_domain[0] = ed;
   21.69 -    ed->domain = d;
   21.70 -
   21.71 -    if ( SCHED_OP(alloc_task, ed) < 0 )
   21.72 +    if ( alloc_exec_domain_struct(d, 0) == NULL )
   21.73          goto out;
   21.74  
   21.75      return d;
   21.76  
   21.77   out:
   21.78 -    if ( ed )
   21.79 -        xmem_cache_free(exec_domain_struct_cachep, ed);
   21.80      xmem_cache_free(domain_struct_cachep, d);
   21.81      return NULL;
   21.82  }
   21.83 @@ -139,31 +174,32 @@ struct domain *alloc_domain_struct(void)
   21.84  /*
   21.85   * Add and remove a domain
   21.86   */
   21.87 -void sched_add_domain(struct domain *d) 
   21.88 +void sched_add_domain(struct exec_domain *ed) 
   21.89  {
   21.90 -    struct exec_domain *ed;
   21.91 +    struct domain *d = ed->domain;
   21.92 +
   21.93 +    /* Must be unpaused by control software to start execution. */
   21.94 +    set_bit(EDF_CTRLPAUSE, &ed->ed_flags);
   21.95  
   21.96 -    for_each_exec_domain(d, ed) {
   21.97 -        /* Must be unpaused by control software to start execution. */
   21.98 -        set_bit(EDF_CTRLPAUSE, &ed->ed_flags);
   21.99 +    if (ed->eid == 0)
  21.100 +    {
  21.101 +        if ( d->id != IDLE_DOMAIN_ID )
  21.102 +        {
  21.103 +            /* Initialise the per-domain timer. */
  21.104 +            init_ac_timer(&d->timer);
  21.105 +            d->timer.cpu      = ed->processor;
  21.106 +            d->timer.data     = (unsigned long)d;
  21.107 +            d->timer.function = &dom_timer_fn;
  21.108 +        }
  21.109 +        else
  21.110 +        {
  21.111 +            schedule_data[ed->processor].idle = ed;
  21.112 +        }
  21.113      }
  21.114  
  21.115 -    if ( d->id != IDLE_DOMAIN_ID )
  21.116 -    {
  21.117 -        /* Initialise the per-domain timer. */
  21.118 -        init_ac_timer(&d->timer);
  21.119 -        d->timer.cpu      = d->exec_domain[0]->processor;
  21.120 -        d->timer.data     = (unsigned long)d;
  21.121 -        d->timer.function = &dom_timer_fn;
  21.122 -    }
  21.123 -    else
  21.124 -    {
  21.125 -        schedule_data[d->exec_domain[0]->processor].idle = d->exec_domain[0];
  21.126 -    }
  21.127 +    SCHED_OP(add_task, ed);
  21.128  
  21.129 -    SCHED_OP(add_task, d->exec_domain[0]);
  21.130 -
  21.131 -    TRACE_2D(TRC_SCHED_DOM_ADD, d->id, d);
  21.132 +    TRACE_2D(TRC_SCHED_DOM_ADD, d->id, ed);
  21.133  }
  21.134  
  21.135  void sched_rem_domain(struct domain *d) 
    22.1 --- a/xen/include/asm-x86/processor.h	Tue Nov 09 13:14:11 2004 +0000
    22.2 +++ b/xen/include/asm-x86/processor.h	Wed Nov 10 11:00:27 2004 +0000
    22.3 @@ -420,7 +420,7 @@ static inline void write_ptbase(struct m
    22.4  #define GET_GDT_ENTRIES(_p)     (((*(u16 *)((_p)->mm.gdt + 0))+1)>>3)
    22.5  #define GET_GDT_ADDRESS(_p)     (*(unsigned long *)((_p)->mm.gdt + 2))
    22.6  
    22.7 -void destroy_gdt(struct domain *d);
    22.8 +void destroy_gdt(struct exec_domain *d);
    22.9  long set_gdt(struct exec_domain *d, 
   22.10               unsigned long *frames, 
   22.11               unsigned int entries);
    23.1 --- a/xen/include/public/xen.h	Tue Nov 09 13:14:11 2004 +0000
    23.2 +++ b/xen/include/public/xen.h	Wed Nov 10 11:00:27 2004 +0000
    23.3 @@ -48,6 +48,7 @@
    23.4  #define __HYPERVISOR_grant_table_op       20
    23.5  #define __HYPERVISOR_vm_assist            21
    23.6  #define __HYPERVISOR_update_va_mapping_otherdomain 22
    23.7 +#define __HYPERVISOR_boot_vcpu            23
    23.8  
    23.9  /*
   23.10   * MULTICALLS
   23.11 @@ -287,6 +288,7 @@ typedef struct vcpu_info_st
   23.12      u8 evtchn_upcall_pending;
   23.13      u8 evtchn_upcall_mask;
   23.14      u8 pad0, pad1;
   23.15 +    u32 evtchn_pending_sel;             /* 132 */
   23.16  } PACKED vcpu_info_t;
   23.17  
   23.18  /*
   23.19 @@ -331,7 +333,6 @@ typedef struct shared_info_st
   23.20       * word in the PENDING bitfield array.
   23.21       */
   23.22      u32 evtchn_pending[32];             /*   4 */
   23.23 -    u32 evtchn_pending_sel;             /* 132 */
   23.24      u32 evtchn_mask[32];                /* 136 */
   23.25  
   23.26      /*
    24.1 --- a/xen/include/xen/event.h	Tue Nov 09 13:14:11 2004 +0000
    24.2 +++ b/xen/include/xen/event.h	Wed Nov 10 11:00:27 2004 +0000
    24.3 @@ -29,7 +29,7 @@ static inline void evtchn_set_pending(st
    24.4      /* These three operations must happen in strict order. */
    24.5      if ( !test_and_set_bit(port,    &s->evtchn_pending[0]) &&
    24.6           !test_bit        (port,    &s->evtchn_mask[0])    &&
    24.7 -         !test_and_set_bit(port>>5, &s->evtchn_pending_sel) )
    24.8 +         !test_and_set_bit(port>>5, &ed->vcpu_info->evtchn_pending_sel) )
    24.9      {
   24.10          /* The VCPU pending flag must be set /after/ update to evtchn-pend. */
   24.11          set_bit(0, &ed->vcpu_info->evtchn_upcall_pending);
    25.1 --- a/xen/include/xen/sched.h	Tue Nov 09 13:14:11 2004 +0000
    25.2 +++ b/xen/include/xen/sched.h	Wed Nov 10 11:00:27 2004 +0000
    25.3 @@ -178,6 +178,9 @@ extern struct exec_domain *idle_task[NR_
    25.4  #define IDLE_DOMAIN_ID   (0x7FFFU)
    25.5  #define is_idle_task(_p) (test_bit(DF_IDLETASK, &(_p)->d_flags))
    25.6  
    25.7 +struct exec_domain *alloc_exec_domain_struct(struct domain *d,
    25.8 +                                             unsigned long vcpu);
    25.9 +
   25.10  void free_domain_struct(struct domain *d);
   25.11  struct domain *alloc_domain_struct();
   25.12  
   25.13 @@ -242,7 +245,7 @@ extern unsigned long wait_init_idle;
   25.14  #define set_current_state(_s) do { current->state = (_s); } while (0)
   25.15  void scheduler_init(void);
   25.16  void schedulers_start(void);
   25.17 -void sched_add_domain(struct domain *d);
   25.18 +void sched_add_domain(struct exec_domain *d);
   25.19  void sched_rem_domain(struct domain *d);
   25.20  long sched_ctl(struct sched_ctl_cmd *);
   25.21  long sched_adjdom(struct sched_adjdom_cmd *);