ia64/xen-unstable

changeset 17037:2a3111016f88

Rendezvous selected cpus in softirq (stop_machine).

This is similar to stop_machine_run stub from Linux, to pull
selected cpus in rendezvous point and the do some batch work
under a safe environment. Current one usage is from S3 path,
where individual cpu is pulled down with related online
footprints being cleared. It's dangerous to have other cpus
checking clobbered data structure in the middle, such as
cpu_online_map, cpu_sibling_map, etc.

Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Feb 11 15:59:49 2008 +0000 (2008-02-11)
parents 7b0c0ab0566b
children 181578832957
files xen/arch/x86/cpu/mtrr/main.c xen/arch/x86/domain.c xen/arch/x86/smpboot.c xen/common/Makefile xen/common/stop_machine.c xen/include/asm-x86/smp.h xen/include/xen/smp.h xen/include/xen/softirq.h xen/include/xen/stop_machine.h
line diff
     1.1 --- a/xen/arch/x86/cpu/mtrr/main.c	Mon Feb 11 14:55:33 2008 +0000
     1.2 +++ b/xen/arch/x86/cpu/mtrr/main.c	Mon Feb 11 15:59:49 2008 +0000
     1.3 @@ -46,8 +46,6 @@
     1.4  #define DEFINE_MUTEX(_m) DEFINE_SPINLOCK(_m)
     1.5  #define mutex_lock(_m) spin_lock(_m)
     1.6  #define mutex_unlock(_m) spin_unlock(_m)
     1.7 -#define lock_cpu_hotplug() ((void)0)
     1.8 -#define unlock_cpu_hotplug() ((void)0)
     1.9  #define dump_stack() ((void)0)
    1.10  #define	get_cpu()	smp_processor_id()
    1.11  #define put_cpu()	do {} while(0)
     2.1 --- a/xen/arch/x86/domain.c	Mon Feb 11 14:55:33 2008 +0000
     2.2 +++ b/xen/arch/x86/domain.c	Mon Feb 11 15:59:49 2008 +0000
     2.3 @@ -82,7 +82,6 @@ static void default_idle(void)
     2.4  
     2.5  static void play_dead(void)
     2.6  {
     2.7 -    __cpu_disable();
     2.8      /* This must be done before dead CPU ack */
     2.9      cpu_exit_clear();
    2.10      hvm_cpu_down();
    2.11 @@ -101,7 +100,7 @@ void idle_loop(void)
    2.12  {
    2.13      for ( ; ; )
    2.14      {
    2.15 -        if (cpu_is_offline(smp_processor_id()))
    2.16 +        if ( cpu_is_offline(smp_processor_id()) )
    2.17              play_dead();
    2.18          page_scrub_schedule_work();
    2.19          default_idle();
     3.1 --- a/xen/arch/x86/smpboot.c	Mon Feb 11 14:55:33 2008 +0000
     3.2 +++ b/xen/arch/x86/smpboot.c	Mon Feb 11 15:59:49 2008 +0000
     3.3 @@ -54,6 +54,7 @@
     3.4  #include <mach_apic.h>
     3.5  #include <mach_wakecpu.h>
     3.6  #include <smpboot_hooks.h>
     3.7 +#include <xen/stop_machine.h>
     3.8  
     3.9  #define set_kernel_exec(x, y) (0)
    3.10  #define setup_trampoline()    (bootsym_phys(trampoline_realmode_entry))
    3.11 @@ -1208,6 +1209,15 @@ int __cpu_disable(void)
    3.12  	if (cpu == 0)
    3.13  		return -EBUSY;
    3.14  
    3.15 +	/*
    3.16 +	 * Only S3 is using this path, and thus idle vcpus are running on all
    3.17 +	 * APs when we are called. To support full cpu hotplug, other 
    3.18 +	 * notification mechanisms should be introduced (e.g., migrate vcpus
    3.19 +	 * off this physical cpu before rendezvous point).
    3.20 +	 */
    3.21 +	if (!is_idle_vcpu(current))
    3.22 +		return -EINVAL;
    3.23 +
    3.24  	local_irq_disable();
    3.25  	clear_local_APIC();
    3.26  	/* Allow any queued timer interrupts to get serviced */
    3.27 @@ -1244,6 +1254,11 @@ void __cpu_die(unsigned int cpu)
    3.28   	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
    3.29  }
    3.30  
    3.31 +static int take_cpu_down(void *unused)
    3.32 +{
    3.33 +    return __cpu_disable();
    3.34 +}
    3.35 +
    3.36  /* 
    3.37   * XXX: One important thing missed here is to migrate vcpus
    3.38   * from dead cpu to other online ones and then put whole
    3.39 @@ -1269,7 +1284,6 @@ void __cpu_die(unsigned int cpu)
    3.40  int cpu_down(unsigned int cpu)
    3.41  {
    3.42  	int err = 0;
    3.43 -	cpumask_t mask;
    3.44  
    3.45  	spin_lock(&cpu_add_remove_lock);
    3.46  	if (num_online_cpus() == 1) {
    3.47 @@ -1283,11 +1297,10 @@ int cpu_down(unsigned int cpu)
    3.48  	}
    3.49  
    3.50  	printk("Prepare to bring CPU%d down...\n", cpu);
    3.51 -	/* Send notification to remote idle vcpu */
    3.52 -	cpus_clear(mask);
    3.53 -	cpu_set(cpu, mask);
    3.54 -	per_cpu(cpu_state, cpu) = CPU_DYING;
    3.55 -	smp_send_event_check_mask(mask);
    3.56 +
    3.57 +	err = __stop_machine_run(take_cpu_down, NULL, cpu);
    3.58 +	if ( err < 0 )
    3.59 +		goto out;
    3.60  
    3.61  	__cpu_die(cpu);
    3.62  
     4.1 --- a/xen/common/Makefile	Mon Feb 11 14:55:33 2008 +0000
     4.2 +++ b/xen/common/Makefile	Mon Feb 11 15:59:49 2008 +0000
     4.3 @@ -16,6 +16,7 @@ obj-y += sched_sedf.o
     4.4  obj-y += schedule.o
     4.5  obj-y += shutdown.o
     4.6  obj-y += softirq.o
     4.7 +obj-y += stop_machine.o
     4.8  obj-y += string.o
     4.9  obj-y += symbols.o
    4.10  obj-y += sysctl.o
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/xen/common/stop_machine.c	Mon Feb 11 15:59:49 2008 +0000
     5.3 @@ -0,0 +1,168 @@
     5.4 +/******************************************************************************
     5.5 + * common/stop_machine.c
     5.6 + *
     5.7 + * Facilities to put whole machine in a safe 'stop' state
     5.8 + *
     5.9 + * Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation
    5.10 + * Copyright 2008 Kevin Tian <kevin.tian@intel.com>, Intel Corporation.
    5.11 + *
    5.12 + * This program is free software; you can redistribute it and/or modify it
    5.13 + * under the terms and conditions of the GNU General Public License,
    5.14 + * version 2, as published by the Free Software Foundation.
    5.15 + *
    5.16 + * This program is distributed in the hope it will be useful, but WITHOUT
    5.17 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    5.18 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
    5.19 + * more details.
    5.20 + *
    5.21 + * You should have received a copy of the GNU General Public License along with
    5.22 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
    5.23 + * Place - Suite 330, Boston, MA 02111-1307 USA.
    5.24 + */
    5.25 +
    5.26 +#include <xen/config.h>
    5.27 +#include <xen/init.h>
    5.28 +#include <xen/spinlock.h>
    5.29 +#include <asm/smp.h>
    5.30 +#include <asm/current.h>
    5.31 +#include <xen/softirq.h>
    5.32 +#include <asm/processor.h>
    5.33 +#include <xen/errno.h>
    5.34 +
    5.35 +enum stopmachine_state {
    5.36 +    STOPMACHINE_START,
    5.37 +    STOPMACHINE_PREPARE,
    5.38 +    STOPMACHINE_DISABLE_IRQ,
    5.39 +    STOPMACHINE_INVOKE,
    5.40 +    STOPMACHINE_EXIT
    5.41 +};
    5.42 +
    5.43 +struct stopmachine_data {
    5.44 +    unsigned int nr_cpus;
    5.45 +
    5.46 +    enum stopmachine_state state;
    5.47 +    atomic_t done;
    5.48 +
    5.49 +    unsigned int fn_cpu;
    5.50 +    int fn_result;
    5.51 +    int (*fn)(void *);
    5.52 +    void *fn_data;
    5.53 +};
    5.54 +
    5.55 +static struct stopmachine_data stopmachine_data;
    5.56 +static DEFINE_SPINLOCK(stopmachine_lock);
    5.57 +
    5.58 +static void stopmachine_set_state(enum stopmachine_state state)
    5.59 +{
    5.60 +    atomic_set(&stopmachine_data.done, 0);
    5.61 +    smp_wmb();
    5.62 +    stopmachine_data.state = state;
    5.63 +    while ( atomic_read(&stopmachine_data.done) != stopmachine_data.nr_cpus )
    5.64 +        cpu_relax();
    5.65 +}
    5.66 +
    5.67 +int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
    5.68 +{
    5.69 +    cpumask_t allbutself;
    5.70 +    unsigned int i, nr_cpus;
    5.71 +    int ret;
    5.72 +
    5.73 +    BUG_ON(!local_irq_is_enabled());
    5.74 +
    5.75 +    allbutself = cpu_online_map;
    5.76 +    cpu_clear(smp_processor_id(), allbutself);
    5.77 +    nr_cpus = cpus_weight(allbutself);
    5.78 +
    5.79 +    if ( nr_cpus == 0 )
    5.80 +    {
    5.81 +        BUG_ON(cpu != smp_processor_id());
    5.82 +        return (*fn)(data);
    5.83 +    }
    5.84 +
    5.85 +    /* Note: We shouldn't spin on lock when it's held by others since others
    5.86 +     * is expecting this cpus to enter softirq context. Or else deadlock
    5.87 +     * is caused.
    5.88 +     */
    5.89 +    if ( !spin_trylock(&stopmachine_lock) )
    5.90 +        return -EBUSY;
    5.91 +
    5.92 +    stopmachine_data.fn = fn;
    5.93 +    stopmachine_data.fn_data = data;
    5.94 +    stopmachine_data.nr_cpus = nr_cpus;
    5.95 +    stopmachine_data.fn_cpu = cpu;
    5.96 +    atomic_set(&stopmachine_data.done, 0);
    5.97 +    stopmachine_data.state = STOPMACHINE_START;
    5.98 +
    5.99 +    smp_wmb();
   5.100 +
   5.101 +    for_each_cpu_mask ( i, allbutself )
   5.102 +        cpu_raise_softirq(i, STOPMACHINE_SOFTIRQ);
   5.103 +
   5.104 +    stopmachine_set_state(STOPMACHINE_PREPARE);
   5.105 +
   5.106 +    local_irq_disable();
   5.107 +    stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
   5.108 +
   5.109 +    if ( cpu == smp_processor_id() )
   5.110 +        stopmachine_data.fn_result = (*fn)(data);
   5.111 +    stopmachine_set_state(STOPMACHINE_INVOKE);
   5.112 +    ret = stopmachine_data.fn_result;
   5.113 +
   5.114 +    stopmachine_set_state(STOPMACHINE_EXIT);
   5.115 +    local_irq_enable();
   5.116 +
   5.117 +    spin_unlock(&stopmachine_lock);
   5.118 +
   5.119 +    return ret;
   5.120 +}
   5.121 +
   5.122 +int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
   5.123 +{
   5.124 +    int ret;
   5.125 +
   5.126 +    lock_cpu_hotplug();
   5.127 +    ret = __stop_machine_run(fn, data, cpu);
   5.128 +    unlock_cpu_hotplug();
   5.129 +
   5.130 +    return ret;
   5.131 +}
   5.132 +
   5.133 +static void stopmachine_softirq(void)
   5.134 +{
   5.135 +    enum stopmachine_state state = STOPMACHINE_START;
   5.136 +
   5.137 +    smp_mb();
   5.138 +
   5.139 +    while ( state != STOPMACHINE_EXIT )
   5.140 +    {
   5.141 +        while ( stopmachine_data.state == state )
   5.142 +            cpu_relax();
   5.143 +
   5.144 +        state = stopmachine_data.state;
   5.145 +        switch ( state )
   5.146 +        {
   5.147 +        case STOPMACHINE_DISABLE_IRQ:
   5.148 +            local_irq_disable();
   5.149 +            break;
   5.150 +        case STOPMACHINE_INVOKE:
   5.151 +            if ( stopmachine_data.fn_cpu == smp_processor_id() )
   5.152 +                stopmachine_data.fn_result =
   5.153 +                    stopmachine_data.fn(stopmachine_data.fn_data);
   5.154 +            break;
   5.155 +        default:
   5.156 +            break;
   5.157 +        }
   5.158 +
   5.159 +        smp_mb();
   5.160 +        atomic_inc(&stopmachine_data.done);
   5.161 +    }
   5.162 +
   5.163 +    local_irq_enable();
   5.164 +}
   5.165 +
   5.166 +static int __init cpu_stopmachine_init(void)
   5.167 +{
   5.168 +    open_softirq(STOPMACHINE_SOFTIRQ, stopmachine_softirq);
   5.169 +    return 0;
   5.170 +}
   5.171 +__initcall(cpu_stopmachine_init);
     6.1 --- a/xen/include/asm-x86/smp.h	Mon Feb 11 14:55:33 2008 +0000
     6.2 +++ b/xen/include/asm-x86/smp.h	Mon Feb 11 15:59:49 2008 +0000
     6.3 @@ -51,12 +51,11 @@ extern u8 x86_cpu_to_apicid[];
     6.4  
     6.5  /* State of each CPU. */
     6.6  #define CPU_ONLINE	0x0002	/* CPU is up */
     6.7 -#define CPU_DYING	0x0003	/* CPU is requested to die */
     6.8  #define CPU_DEAD	0x0004	/* CPU is dead */
     6.9  DECLARE_PER_CPU(int, cpu_state);
    6.10  
    6.11  #ifdef CONFIG_HOTPLUG_CPU
    6.12 -#define cpu_is_offline(cpu) unlikely(per_cpu(cpu_state,cpu) == CPU_DYING)
    6.13 +#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
    6.14  extern int cpu_down(unsigned int cpu);
    6.15  extern int cpu_up(unsigned int cpu);
    6.16  extern void cpu_exit_clear(void);
     7.1 --- a/xen/include/xen/smp.h	Mon Feb 11 14:55:33 2008 +0000
     7.2 +++ b/xen/include/xen/smp.h	Mon Feb 11 15:59:49 2008 +0000
     7.3 @@ -112,4 +112,21 @@ static inline int on_each_cpu(
     7.4  
     7.5  #define smp_processor_id() raw_smp_processor_id()
     7.6  
     7.7 +#ifdef CONFIG_HOTPLUG_CPU
     7.8 +extern spinlock_t cpu_add_remove_lock;
     7.9 +/*
    7.10 + * FIXME: need a better lock mechanism when real cpu hotplug is later
    7.11 + * supported, since spinlock may cause dead lock:
    7.12 + *     cpu0: in stop_machine with lock held. Wait for cpu1 to respond
    7.13 + *           to stop request
    7.14 + *     cpu1: spin loop on lock upon cpu hotplug request from guest,
    7.15 + *           without chance to handle softirq
    7.16 + * ...
    7.17 + */
    7.18 +#define lock_cpu_hotplug() spin_lock(&cpu_add_remove_lock);
    7.19 +#define unlock_cpu_hotplug() spin_unlock(&cpu_add_remove_lock);
    7.20 +#else
    7.21 +#define lock_cpu_hotplug() do { } while ( 0 )
    7.22 +#define unlock_cpu_hotplug() do { } while ( 0 )
    7.23  #endif
    7.24 +#endif
     8.1 --- a/xen/include/xen/softirq.h	Mon Feb 11 14:55:33 2008 +0000
     8.2 +++ b/xen/include/xen/softirq.h	Mon Feb 11 15:59:49 2008 +0000
     8.3 @@ -10,8 +10,9 @@
     8.4  #define PAGE_SCRUB_SOFTIRQ                5
     8.5  #define TRACE_SOFTIRQ                     6
     8.6  #define RCU_SOFTIRQ                       7
     8.7 +#define STOPMACHINE_SOFTIRQ               8
     8.8  
     8.9 -#define NR_COMMON_SOFTIRQS                8
    8.10 +#define NR_COMMON_SOFTIRQS                9
    8.11  
    8.12  #include <asm/softirq.h>
    8.13  
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/xen/include/xen/stop_machine.h	Mon Feb 11 15:59:49 2008 +0000
     9.3 @@ -0,0 +1,30 @@
     9.4 +#ifndef __XEN_STOP_MACHINE_H__
     9.5 +#define __XEN_STOP_MACHINE_H__
     9.6 +
     9.7 +/**
     9.8 + * stop_machine_run: freeze the machine on all CPUs and run this function
     9.9 + * @fn: the function to run
    9.10 + * @data: the data ptr for the @fn()
    9.11 + * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS).
    9.12 + *
    9.13 + * Description: This causes every other cpu to enter a safe point, with
    9.14 + * each of which disables interrupts, and finally interrupts are disabled
    9.15 + * on the current CPU.  The result is that none is holding a spinlock
    9.16 + * or inside any other preempt-disabled region when @fn() runs.
    9.17 + *
    9.18 + * This can be thought of as a very heavy write lock, equivalent to
    9.19 + * grabbing every spinlock in the kernel. */
    9.20 +int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
    9.21 +
    9.22 +/**
    9.23 + * __stop_machine_run: freeze the machine on all CPUs and run this function
    9.24 + * @fn: the function to run
    9.25 + * @data: the data ptr for the @fn
    9.26 + * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
    9.27 + *
    9.28 + * Description: This is a special version of the above, without explicit
    9.29 + * lock acquisition. Used by hotplug cpu.
    9.30 + */
    9.31 +int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
    9.32 +
    9.33 +#endif /* __XEN_STOP_MACHINE_H__ */