ia64/xen-unstable

changeset 15585:c3929e540632

Provide cpu hotplug support to Xen. Note this hotplug
support is specific to PM, instead of for a run-time
single CPU hotplug which can be a separate task. See
embedded comment:

/*
* XXX: One important thing missed here is to migrate vcpus
* from dead cpu to other online ones and then put whole
* system into a stop state. It assures a safe environment
* for a cpu hotplug/remove at normal running state.
*
* However for xen PM case, at this point:
* -> All other domains should be notified with PM event,
* and then in following states:
* * Suspend state, or
* * Paused state, which is a force step to all
* domains if they do nothing to suspend
* -> All vcpus of dom0 (except vcpu0) have already beem
* hot removed
* with the net effect that all other cpus only have idle vcpu
* running. In this special case, we can avoid vcpu migration
* then and system can be considered in a stop state.
*
* So current cpu hotplug is a special version for PM specific
* usage, and need more effort later for full cpu hotplug.
* (ktian1)
*/

Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Jul 11 17:48:10 2007 +0100 (2007-07-11)
parents e00547dcda09
children a9103c71565e
files xen/arch/x86/cpu/common.c xen/arch/x86/domain.c xen/arch/x86/i8259.c xen/arch/x86/io_apic.c xen/arch/x86/irq.c xen/arch/x86/smp.c xen/arch/x86/smpboot.c xen/include/asm-x86/config.h xen/include/asm-x86/smp.h xen/include/asm-x86/system.h
line diff
     1.1 --- a/xen/arch/x86/cpu/common.c	Wed Jul 11 17:28:09 2007 +0100
     1.2 +++ b/xen/arch/x86/cpu/common.c	Wed Jul 11 17:48:10 2007 +0100
     1.3 @@ -600,9 +600,5 @@ void __cpuinit cpu_uninit(void)
     1.4  {
     1.5  	int cpu = raw_smp_processor_id();
     1.6  	cpu_clear(cpu, cpu_initialized);
     1.7 -
     1.8 -	/* lazy TLB state */
     1.9 -	per_cpu(cpu_tlbstate, cpu).state = 0;
    1.10 -	per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
    1.11  }
    1.12  #endif
     2.1 --- a/xen/arch/x86/domain.c	Wed Jul 11 17:28:09 2007 +0100
     2.2 +++ b/xen/arch/x86/domain.c	Wed Jul 11 17:48:10 2007 +0100
     2.3 @@ -81,24 +81,23 @@ static void default_idle(void)
     2.4  /* We don't actually take CPU down, just spin without interrupts. */
     2.5  static inline void play_dead(void)
     2.6  {
     2.7 -	/* This must be done before dead CPU ack */
     2.8 -	cpu_exit_clear();
     2.9 -	wbinvd();
    2.10 -	mb();
    2.11 -	/* Ack it */
    2.12 -	__get_cpu_var(cpu_state) = CPU_DEAD;
    2.13 +    __cpu_disable();
    2.14 +    /* This must be done before dead CPU ack */
    2.15 +    cpu_exit_clear();
    2.16 +    wbinvd();
    2.17 +    mb();
    2.18 +    /* Ack it */
    2.19 +    __get_cpu_var(cpu_state) = CPU_DEAD;
    2.20  
    2.21 -	/*
    2.22 -	 * With physical CPU hotplug, we should halt the cpu
    2.23 -	 */
    2.24 -	local_irq_disable();
    2.25 -	while (1)
    2.26 -		halt();
    2.27 +    /* With physical CPU hotplug, we should halt the cpu. */
    2.28 +    local_irq_disable();
    2.29 +    for ( ; ; )
    2.30 +        halt();
    2.31  }
    2.32  #else
    2.33  static inline void play_dead(void)
    2.34  {
    2.35 -	BUG();
    2.36 +    BUG();
    2.37  }
    2.38  #endif /* CONFIG_HOTPLUG_CPU */
    2.39  
    2.40 @@ -106,6 +105,8 @@ void idle_loop(void)
    2.41  {
    2.42      for ( ; ; )
    2.43      {
    2.44 +        if (cpu_is_offline(smp_processor_id()))
    2.45 +            play_dead();
    2.46          page_scrub_schedule_work();
    2.47          default_idle();
    2.48          do_softirq();
     3.1 --- a/xen/arch/x86/i8259.c	Wed Jul 11 17:28:09 2007 +0100
     3.2 +++ b/xen/arch/x86/i8259.c	Wed Jul 11 17:48:10 2007 +0100
     3.3 @@ -396,6 +396,7 @@ void __init init_IRQ(void)
     3.4          irq_desc[i].action  = NULL;
     3.5          irq_desc[i].depth   = 1;
     3.6          spin_lock_init(&irq_desc[i].lock);
     3.7 +        cpus_setall(irq_desc[i].affinity);
     3.8          set_intr_gate(i, interrupt[i]);
     3.9      }
    3.10  
     4.1 --- a/xen/arch/x86/io_apic.c	Wed Jul 11 17:28:09 2007 +0100
     4.2 +++ b/xen/arch/x86/io_apic.c	Wed Jul 11 17:48:10 2007 +0100
     4.3 @@ -35,9 +35,6 @@
     4.4  #include <mach_apic.h>
     4.5  #include <io_ports.h>
     4.6  
     4.7 -#define set_irq_info(irq, mask) ((void)0)
     4.8 -#define set_native_irq_info(irq, mask) ((void)0)
     4.9 -
    4.10  /* Different to Linux: our implementation can be simpler. */
    4.11  #define make_8259A_irq(irq) (io_apic_irqs &= ~(1<<(irq)))
    4.12  
     5.1 --- a/xen/arch/x86/irq.c	Wed Jul 11 17:28:09 2007 +0100
     5.2 +++ b/xen/arch/x86/irq.c	Wed Jul 11 17:48:10 2007 +0100
     5.3 @@ -656,42 +656,34 @@ static int __init setup_dump_irqs(void)
     5.4  __initcall(setup_dump_irqs);
     5.5  
     5.6  #ifdef CONFIG_HOTPLUG_CPU
     5.7 -#include <mach_apic.h>
     5.8 +#include <asm/mach-generic/mach_apic.h>
     5.9 +#include <xen/delay.h>
    5.10  
    5.11  void fixup_irqs(cpumask_t map)
    5.12  {
    5.13 -	unsigned int irq;
    5.14 -	static int warned;
    5.15 +    unsigned int irq;
    5.16 +    static int warned;
    5.17  
    5.18 -	for (irq = 0; irq < NR_IRQS; irq++) {
    5.19 -		cpumask_t mask;
    5.20 -		if (irq == 2)
    5.21 -			continue;
    5.22 +    for ( irq = 0; irq < NR_IRQS; irq++ )
    5.23 +    {
    5.24 +        cpumask_t mask;
    5.25 +        if ( irq == 2 )
    5.26 +            continue;
    5.27  
    5.28 -		cpus_and(mask, irq_desc[irq].affinity, map);
    5.29 -		if (any_online_cpu(mask) == NR_CPUS) {
    5.30 -			printk("Breaking affinity for irq %i\n", irq);
    5.31 -			mask = map;
    5.32 -		}
    5.33 -		if (irq_desc[irq].chip->set_affinity)
    5.34 -			irq_desc[irq].chip->set_affinity(irq, mask);
    5.35 -		else if (irq_desc[irq].action && !(warned++))
    5.36 -			printk("Cannot set affinity for irq %i\n", irq);
    5.37 -	}
    5.38 +        cpus_and(mask, irq_desc[irq].affinity, map);
    5.39 +        if ( any_online_cpu(mask) == NR_CPUS )
    5.40 +        {
    5.41 +            printk("Breaking affinity for irq %i\n", irq);
    5.42 +            mask = map;
    5.43 +        }
    5.44 +        if ( irq_desc[irq].handler->set_affinity )
    5.45 +            irq_desc[irq].handler->set_affinity(irq, mask);
    5.46 +        else if ( irq_desc[irq].action && !(warned++) )
    5.47 +            printk("Cannot set affinity for irq %i\n", irq);
    5.48 +    }
    5.49  
    5.50 -#if 0
    5.51 -	barrier();
    5.52 -	/* Ingo Molnar says: "after the IO-APIC masks have been redirected
    5.53 -	   [note the nop - the interrupt-enable boundary on x86 is two
    5.54 -	   instructions from sti] - to flush out pending hardirqs and
    5.55 -	   IPIs. After this point nothing is supposed to reach this CPU." */
    5.56 -	__asm__ __volatile__("sti; nop; cli");
    5.57 -	barrier();
    5.58 -#else
    5.59 -	/* That doesn't seem sufficient.  Give it 1ms. */
    5.60 -	local_irq_enable();
    5.61 -	mdelay(1);
    5.62 -	local_irq_disable();
    5.63 -#endif
    5.64 +    local_irq_enable();
    5.65 +    mdelay(1);
    5.66 +    local_irq_disable();
    5.67  }
    5.68  #endif
     6.1 --- a/xen/arch/x86/smp.c	Wed Jul 11 17:28:09 2007 +0100
     6.2 +++ b/xen/arch/x86/smp.c	Wed Jul 11 17:48:10 2007 +0100
     6.3 @@ -256,16 +256,6 @@ struct call_data_struct {
     6.4  static DEFINE_SPINLOCK(call_lock);
     6.5  static struct call_data_struct *call_data;
     6.6  
     6.7 -void lock_ipi_call_lock(void)
     6.8 -{
     6.9 -	spin_lock_irq(&call_lock);
    6.10 -}
    6.11 -
    6.12 -void unlock_ipi_call_lock(void)
    6.13 -{
    6.14 -	spin_unlock_irq(&call_lock);
    6.15 -}
    6.16 -
    6.17  int smp_call_function(
    6.18      void (*func) (void *info),
    6.19      void *info,
     7.1 --- a/xen/arch/x86/smpboot.c	Wed Jul 11 17:28:09 2007 +0100
     7.2 +++ b/xen/arch/x86/smpboot.c	Wed Jul 11 17:48:10 2007 +0100
     7.3 @@ -110,6 +110,11 @@ u8 x86_cpu_to_apicid[NR_CPUS] __read_mos
     7.4  EXPORT_SYMBOL(x86_cpu_to_apicid);
     7.5  
     7.6  static void map_cpu_to_logical_apicid(void);
     7.7 +/* State of each CPU. */
     7.8 +DEFINE_PER_CPU(int, cpu_state) = { 0 };
     7.9 +
    7.10 +static void *stack_base[NR_CPUS] __cacheline_aligned;
    7.11 +spinlock_t cpu_add_remove_lock;
    7.12  
    7.13  /*
    7.14   * The bootstrap kernel entry code has set these up. Save them for
    7.15 @@ -396,9 +401,11 @@ void __devinit smp_callin(void)
    7.16  	/*
    7.17  	 *      Synchronize the TSC with the BP
    7.18  	 */
    7.19 -	if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
    7.20 +	if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled) {
    7.21  		synchronize_tsc_ap();
    7.22 -	calibrate_tsc_ap();
    7.23 +		/* No sync for same reason as above */
    7.24 +		calibrate_tsc_ap();
    7.25 +	}
    7.26  }
    7.27  
    7.28  static int cpucount, booting_cpu;
    7.29 @@ -464,8 +471,12 @@ static void construct_percpu_idt(unsigne
    7.30  {
    7.31  	unsigned char idt_load[10];
    7.32  
    7.33 -	idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
    7.34 -	memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t));
    7.35 +	/* If IDT table exists since last hotplug, reuse it */
    7.36 +	if (!idt_tables[cpu]) {
    7.37 +		idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
    7.38 +		memcpy(idt_tables[cpu], idt_table,
    7.39 +				IDT_ENTRIES*sizeof(idt_entry_t));
    7.40 +	}
    7.41  
    7.42  	*(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
    7.43  	*(unsigned long  *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
    7.44 @@ -488,7 +499,7 @@ void __devinit start_secondary(void *unu
    7.45  
    7.46  	set_processor_id(cpu);
    7.47  	set_current(idle_vcpu[cpu]);
    7.48 -        this_cpu(curr_vcpu) = idle_vcpu[cpu];
    7.49 +	this_cpu(curr_vcpu) = idle_vcpu[cpu];
    7.50  
    7.51  	percpu_traps_init();
    7.52  
    7.53 @@ -516,24 +527,14 @@ void __devinit start_secondary(void *unu
    7.54  	set_cpu_sibling_map(raw_smp_processor_id());
    7.55  	wmb();
    7.56  
    7.57 -	/*
    7.58 -	 * We need to hold call_lock, so there is no inconsistency
    7.59 -	 * between the time smp_call_function() determines number of
    7.60 -	 * IPI receipients, and the time when the determination is made
    7.61 -	 * for which cpus receive the IPI. Holding this
    7.62 -	 * lock helps us to not include this cpu in a currently in progress
    7.63 -	 * smp_call_function().
    7.64 -	 */
    7.65 -	/*lock_ipi_call_lock();*/
    7.66  	cpu_set(smp_processor_id(), cpu_online_map);
    7.67 -	/*unlock_ipi_call_lock();*/
    7.68 -	/*per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;*/
    7.69 +	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
    7.70 +
    7.71 +	init_percpu_time();
    7.72  
    7.73  	/* We can take interrupts now: we're officially "up". */
    7.74  	local_irq_enable();
    7.75  
    7.76 -        init_percpu_time();
    7.77 -
    7.78  	wmb();
    7.79  	startup_cpu_idle_loop();
    7.80  }
    7.81 @@ -794,6 +795,22 @@ static inline int alloc_cpu_id(void)
    7.82  	return cpu;
    7.83  }
    7.84  
    7.85 +static struct vcpu *prepare_idle_vcpu(unsigned int cpu)
    7.86 +{
    7.87 +	if (idle_vcpu[cpu])
    7.88 +		return idle_vcpu[cpu];
    7.89 +
    7.90 +	return alloc_idle_vcpu(cpu);
    7.91 +}
    7.92 +
    7.93 +static void *prepare_idle_stack(unsigned int cpu)
    7.94 +{
    7.95 +	if (!stack_base[cpu])
    7.96 +		stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER);
    7.97 +
    7.98 +	return stack_base[cpu];
    7.99 +}
   7.100 +
   7.101  static int __devinit do_boot_cpu(int apicid, int cpu)
   7.102  /*
   7.103   * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
   7.104 @@ -811,7 +828,7 @@ static int __devinit do_boot_cpu(int api
   7.105  
   7.106  	booting_cpu = cpu;
   7.107  
   7.108 -	v = alloc_idle_vcpu(cpu);
   7.109 +	v = prepare_idle_vcpu(cpu);
   7.110  	BUG_ON(v == NULL);
   7.111  
   7.112  	/* start_eip had better be page-aligned! */
   7.113 @@ -820,7 +837,7 @@ static int __devinit do_boot_cpu(int api
   7.114  	/* So we see what's up   */
   7.115  	printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
   7.116  
   7.117 -	stack_start.esp = alloc_xenheap_pages(STACK_ORDER);
   7.118 +	stack_start.esp = prepare_idle_stack(cpu);
   7.119  
   7.120  	/* Debug build: detect stack overflow by setting up a guard page. */
   7.121  	memguard_guard_stack(stack_start.esp);
   7.122 @@ -898,6 +915,12 @@ static int __devinit do_boot_cpu(int api
   7.123  }
   7.124  
   7.125  #ifdef CONFIG_HOTPLUG_CPU
   7.126 +static void idle_task_exit(void)
   7.127 +{
   7.128 +	/* Give up lazy state borrowed by this idle vcpu */
   7.129 +	__sync_lazy_execstate();
   7.130 +}
   7.131 +
   7.132  void cpu_exit_clear(void)
   7.133  {
   7.134  	int cpu = raw_smp_processor_id();
   7.135 @@ -906,7 +929,6 @@ void cpu_exit_clear(void)
   7.136  
   7.137  	cpucount --;
   7.138  	cpu_uninit();
   7.139 -	irq_ctx_exit(cpu);
   7.140  
   7.141  	cpu_clear(cpu, cpu_callout_map);
   7.142  	cpu_clear(cpu, cpu_callin_map);
   7.143 @@ -915,26 +937,9 @@ void cpu_exit_clear(void)
   7.144  	unmap_cpu_to_logical_apicid(cpu);
   7.145  }
   7.146  
   7.147 -struct warm_boot_cpu_info {
   7.148 -	struct completion *complete;
   7.149 -	int apicid;
   7.150 -	int cpu;
   7.151 -};
   7.152 -
   7.153 -static void __cpuinit do_warm_boot_cpu(void *p)
   7.154 -{
   7.155 -	struct warm_boot_cpu_info *info = p;
   7.156 -	do_boot_cpu(info->apicid, info->cpu);
   7.157 -	complete(info->complete);
   7.158 -}
   7.159 -
   7.160  static int __cpuinit __smp_prepare_cpu(int cpu)
   7.161  {
   7.162 -	DECLARE_COMPLETION(done);
   7.163 -	struct warm_boot_cpu_info info;
   7.164 -	struct work_struct task;
   7.165  	int	apicid, ret;
   7.166 -	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
   7.167  
   7.168  	apicid = x86_cpu_to_apicid[cpu];
   7.169  	if (apicid == BAD_APICID) {
   7.170 @@ -942,34 +947,12 @@ static int __cpuinit __smp_prepare_cpu(i
   7.171  		goto exit;
   7.172  	}
   7.173  
   7.174 -	/*
   7.175 -	 * the CPU isn't initialized at boot time, allocate gdt table here.
   7.176 -	 * cpu_init will initialize it
   7.177 -	 */
   7.178 -	if (!cpu_gdt_descr->address) {
   7.179 -		cpu_gdt_descr->address = get_zeroed_page(GFP_KERNEL);
   7.180 -		if (!cpu_gdt_descr->address)
   7.181 -			printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
   7.182 -			ret = -ENOMEM;
   7.183 -			goto exit;
   7.184 -	}
   7.185 -
   7.186 -	info.complete = &done;
   7.187 -	info.apicid = apicid;
   7.188 -	info.cpu = cpu;
   7.189 -	INIT_WORK(&task, do_warm_boot_cpu, &info);
   7.190 -
   7.191  	tsc_sync_disabled = 1;
   7.192  
   7.193 -	/* init low mem mapping */
   7.194 -	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
   7.195 -			KERNEL_PGD_PTRS);
   7.196 -	flush_tlb_all();
   7.197 -	schedule_work(&task);
   7.198 -	wait_for_completion(&done);
   7.199 +	do_boot_cpu(apicid, cpu);
   7.200  
   7.201  	tsc_sync_disabled = 0;
   7.202 -	zap_low_mappings();
   7.203 +
   7.204  	ret = 0;
   7.205  exit:
   7.206  	return ret;
   7.207 @@ -1003,6 +986,8 @@ static void __init smp_boot_cpus(unsigne
   7.208  	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
   7.209  	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
   7.210  
   7.211 +	stack_base[0] = stack_start.esp;
   7.212 +
   7.213  	/*current_thread_info()->cpu = 0;*/
   7.214  	/*smp_tune_scheduling();*/
   7.215  
   7.216 @@ -1173,7 +1158,8 @@ void __devinit smp_prepare_boot_cpu(void
   7.217  	cpu_set(smp_processor_id(), cpu_callout_map);
   7.218  	cpu_set(smp_processor_id(), cpu_present_map);
   7.219  	cpu_set(smp_processor_id(), cpu_possible_map);
   7.220 -	/*per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;*/
   7.221 +	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
   7.222 +	spin_lock_init(&cpu_add_remove_lock);
   7.223  }
   7.224  
   7.225  #ifdef CONFIG_HOTPLUG_CPU
   7.226 @@ -1196,11 +1182,12 @@ remove_siblinginfo(int cpu)
   7.227  		cpu_clear(cpu, cpu_sibling_map[sibling]);
   7.228  	cpus_clear(cpu_sibling_map[cpu]);
   7.229  	cpus_clear(cpu_core_map[cpu]);
   7.230 -	c[cpu].phys_proc_id = 0;
   7.231 -	c[cpu].cpu_core_id = 0;
   7.232 +	phys_proc_id[cpu] = BAD_APICID;
   7.233 +	cpu_core_id[cpu] = BAD_APICID;
   7.234  	cpu_clear(cpu, cpu_sibling_setup_map);
   7.235  }
   7.236  
   7.237 +extern void fixup_irqs(cpumask_t map);
   7.238  int __cpu_disable(void)
   7.239  {
   7.240  	cpumask_t map = cpu_online_map;
   7.241 @@ -1217,12 +1204,15 @@ int __cpu_disable(void)
   7.242  	if (cpu == 0)
   7.243  		return -EBUSY;
   7.244  
   7.245 +	local_irq_disable();
   7.246  	clear_local_APIC();
   7.247  	/* Allow any queued timer interrupts to get serviced */
   7.248  	local_irq_enable();
   7.249  	mdelay(1);
   7.250  	local_irq_disable();
   7.251  
   7.252 +	time_suspend();
   7.253 +
   7.254  	remove_siblinginfo(cpu);
   7.255  
   7.256  	cpu_clear(cpu, map);
   7.257 @@ -1241,15 +1231,91 @@ void __cpu_die(unsigned int cpu)
   7.258  		/* They ack this in play_dead by setting CPU_DEAD */
   7.259  		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
   7.260  			printk ("CPU %d is now offline\n", cpu);
   7.261 -			if (1 == num_online_cpus())
   7.262 -				alternatives_smp_switch(0);
   7.263  			return;
   7.264  		}
   7.265 -		msleep(100);
   7.266 +		mdelay(100);
   7.267 +		mb();
   7.268 +		process_pending_timers();
   7.269  	}
   7.270   	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
   7.271  }
   7.272  
   7.273 +/* 
   7.274 + * XXX: One important thing missed here is to migrate vcpus
   7.275 + * from dead cpu to other online ones and then put whole
   7.276 + * system into a stop state. It assures a safe environment
   7.277 + * for a cpu hotplug/remove at normal running state.
   7.278 + *
   7.279 + * However for xen PM case, at this point:
   7.280 + * 	-> All other domains should be notified with PM event,
   7.281 + *	   and then in following states:
   7.282 + *		* Suspend state, or
   7.283 + *		* Paused state, which is a force step to all
   7.284 + *		  domains if they do nothing to suspend
   7.285 + *	-> All vcpus of dom0 (except vcpu0) have already beem
   7.286 + *	   hot removed
   7.287 + * with the net effect that all other cpus only have idle vcpu
   7.288 + * running. In this special case, we can avoid vcpu migration
   7.289 + * then and system can be considered in a stop state.
   7.290 + *
   7.291 + * So current cpu hotplug is a special version for PM specific
   7.292 + * usage, and need more effort later for full cpu hotplug.
   7.293 + * (ktian1)
   7.294 + */
   7.295 +int cpu_down(unsigned int cpu)
   7.296 +{
   7.297 +	int err = 0;
   7.298 +	cpumask_t mask;
   7.299 +
   7.300 +	spin_lock(&cpu_add_remove_lock);
   7.301 +	if (num_online_cpus() == 1) {
   7.302 +		err = -EBUSY;
   7.303 +		goto out;
   7.304 +	}
   7.305 +
   7.306 +	if (!cpu_online(cpu)) {
   7.307 +		err = -EINVAL;
   7.308 +		goto out;
   7.309 +	}
   7.310 +
   7.311 +	printk("Prepare to bring CPU%d down...\n", cpu);
   7.312 +	/* Send notification to remote idle vcpu */
   7.313 +	cpus_clear(mask);
   7.314 +	cpu_set(cpu, mask);
   7.315 +	per_cpu(cpu_state, cpu) = CPU_DYING;
   7.316 +	smp_send_event_check_mask(mask);
   7.317 +
   7.318 +	__cpu_die(cpu);
   7.319 +
   7.320 +	if (cpu_online(cpu)) {
   7.321 +		printk("Bad state (DEAD, but in online map) on CPU%d\n", cpu);
   7.322 +		err = -EBUSY;
   7.323 +	}
   7.324 +out:
   7.325 +	spin_unlock(&cpu_add_remove_lock);
   7.326 +	return err;
   7.327 +}
   7.328 +
   7.329 +int cpu_up(unsigned int cpu)
   7.330 +{
   7.331 +	int err = 0;
   7.332 +
   7.333 +	spin_lock(&cpu_add_remove_lock);
   7.334 +	if (cpu_online(cpu)) {
   7.335 +		printk("Bring up a online cpu. Bogus!\n");
   7.336 +		err = -EBUSY;
   7.337 +		goto out;
   7.338 +	}
   7.339 +
   7.340 +	err = __cpu_up(cpu);
   7.341 +	if (err < 0)
   7.342 +		goto out;
   7.343 +
   7.344 +out:
   7.345 +	spin_unlock(&cpu_add_remove_lock);
   7.346 +	return err;
   7.347 +}
   7.348 +
   7.349  /* From kernel/power/main.c */
   7.350  /* This is protected by pm_sem semaphore */
   7.351  static cpumask_t frozen_cpus;
   7.352 @@ -1308,6 +1374,22 @@ void __cpu_die(unsigned int cpu)
   7.353  
   7.354  int __devinit __cpu_up(unsigned int cpu)
   7.355  {
   7.356 +#ifdef CONFIG_HOTPLUG_CPU
   7.357 +	int ret=0;
   7.358 +
   7.359 +	/*
   7.360 +	 * We do warm boot only on cpus that had booted earlier
   7.361 +	 * Otherwise cold boot is all handled from smp_boot_cpus().
   7.362 +	 * cpu_callin_map is set during AP kickstart process. Its reset
   7.363 +	 * when a cpu is taken offline from cpu_exit_clear().
   7.364 +	 */
   7.365 +	if (!cpu_isset(cpu, cpu_callin_map))
   7.366 +		ret = __smp_prepare_cpu(cpu);
   7.367 +
   7.368 +	if (ret)
   7.369 +		return -EIO;
   7.370 +#endif
   7.371 +
   7.372  	/* In case one didn't come up */
   7.373  	if (!cpu_isset(cpu, cpu_callin_map)) {
   7.374  		printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
     8.1 --- a/xen/include/asm-x86/config.h	Wed Jul 11 17:28:09 2007 +0100
     8.2 +++ b/xen/include/asm-x86/config.h	Wed Jul 11 17:48:10 2007 +0100
     8.3 @@ -40,6 +40,9 @@
     8.4  
     8.5  #define CONFIG_VGA 1
     8.6  
     8.7 +#define CONFIG_HOTPLUG 1
     8.8 +#define CONFIG_HOTPLUG_CPU 1
     8.9 +
    8.10  #define HZ 100
    8.11  
    8.12  #define OPT_CONSOLE_STR "com1,vga"
     9.1 --- a/xen/include/asm-x86/smp.h	Wed Jul 11 17:28:09 2007 +0100
     9.2 +++ b/xen/include/asm-x86/smp.h	Wed Jul 11 17:48:10 2007 +0100
     9.3 @@ -50,9 +50,22 @@ extern u8 x86_cpu_to_apicid[];
     9.4  
     9.5  #define cpu_physical_id(cpu)	x86_cpu_to_apicid[cpu]
     9.6  
     9.7 +/* State of each CPU. */
     9.8 +#define CPU_ONLINE	0x0002	/* CPU is up */
     9.9 +#define CPU_DYING	0x0003	/* CPU is requested to die */
    9.10 +#define CPU_DEAD	0x0004	/* CPU is dead */
    9.11 +DECLARE_PER_CPU(int, cpu_state);
    9.12 +
    9.13  #ifdef CONFIG_HOTPLUG_CPU
    9.14 +#define cpu_is_offline(cpu) unlikely(per_cpu(cpu_state,cpu) == CPU_DYING)
    9.15 +extern int cpu_down(unsigned int cpu);
    9.16 +extern int cpu_up(unsigned int cpu);
    9.17  extern void cpu_exit_clear(void);
    9.18  extern void cpu_uninit(void);
    9.19 +extern void disable_nonboot_cpus(void);
    9.20 +extern void enable_nonboot_cpus(void);
    9.21 +#else
    9.22 +static inline int cpu_is_offline(int cpu) {return 0;}
    9.23  #endif
    9.24  
    9.25  /*
    10.1 --- a/xen/include/asm-x86/system.h	Wed Jul 11 17:28:09 2007 +0100
    10.2 +++ b/xen/include/asm-x86/system.h	Wed Jul 11 17:48:10 2007 +0100
    10.3 @@ -313,6 +313,8 @@ static always_inline unsigned long long 
    10.4  #define __sti()			__asm__ __volatile__("sti": : :"memory")
    10.5  /* used in the idle loop; sti takes one instruction cycle to complete */
    10.6  #define safe_halt()		__asm__ __volatile__("sti; hlt": : :"memory")
    10.7 +/* used when interrupts are already enabled or to shutdown the processor */
    10.8 +#define halt()			__asm__ __volatile__("hlt": : :"memory")
    10.9  
   10.10  /* For spinlocks etc */
   10.11  #if defined(__i386__)