direct-io.hg

changeset 9458:5b1a10f9da4c

Clean up and fix VCPU hotplug and SMP save/restore.
1. No longer hold xenbus_lock while taking down VCPUs
in SMP suspend path. This allows block device hotplug
to continue working and so we will not deadlock on
paging in userspace hotplug code.
2. Track xenbus and local-admin permitted cpumasks for
VCPUs to bring online. So, if a local admin takes a
CPU down, that won't surprisingly get overridden next
time the kernel interrogates xenstore.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Mar 28 18:43:30 2006 +0100 (2006-03-28)
parents 4109c4e7804a
children db8266c27c4f
files linux-2.6-xen-sparse/drivers/xen/core/reboot.c linux-2.6-xen-sparse/drivers/xen/core/smpboot.c linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c linux-2.6-xen-sparse/include/xen/gnttab.h linux-2.6-xen-sparse/include/xen/xenbus.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c	Tue Mar 28 14:19:22 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c	Tue Mar 28 18:43:30 2006 +0100
     1.3 @@ -15,6 +15,7 @@
     1.4  #include <xen/xenbus.h>
     1.5  #include <linux/cpu.h>
     1.6  #include <linux/kthread.h>
     1.7 +#include <xen/gnttab.h>
     1.8  #include <xen/xencons.h>
     1.9  
    1.10  #if defined(__i386__) || defined(__x86_64__)
    1.11 @@ -76,31 +77,24 @@ static int shutting_down = SHUTDOWN_INVA
    1.12  static void __shutdown_handler(void *unused);
    1.13  static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
    1.14  
    1.15 -#ifndef CONFIG_HOTPLUG_CPU
    1.16 -#define cpu_down(x) (-EOPNOTSUPP)
    1.17 -#define cpu_up(x) (-EOPNOTSUPP)
    1.18 +#ifdef CONFIG_SMP
    1.19 +int  smp_suspend(void);
    1.20 +void smp_resume(void);
    1.21 +#else
    1.22 +#define smp_suspend()	(0)
    1.23 +#define smp_resume()	((void)0)
    1.24  #endif
    1.25  
    1.26 -
    1.27  static int __do_suspend(void *ignore)
    1.28  {
    1.29 -	int i, j, k, fpp;
    1.30 +	int i, j, k, fpp, err;
    1.31  
    1.32  	extern unsigned long max_pfn;
    1.33  	extern unsigned long *pfn_to_mfn_frame_list_list;
    1.34  	extern unsigned long *pfn_to_mfn_frame_list[];
    1.35  
    1.36 -	extern int gnttab_suspend(void);
    1.37 -	extern int gnttab_resume(void);
    1.38  	extern void time_resume(void);
    1.39  
    1.40 -#ifdef CONFIG_SMP
    1.41 -	cpumask_t prev_online_cpus;
    1.42 -	int vcpu_prepare(int vcpu);
    1.43 -#endif
    1.44 -
    1.45 -	int err = 0;
    1.46 -
    1.47  	BUG_ON(smp_processor_id() != 0);
    1.48  	BUG_ON(in_interrupt());
    1.49  
    1.50 @@ -110,40 +104,12 @@ static int __do_suspend(void *ignore)
    1.51  		return -EOPNOTSUPP;
    1.52  	}
    1.53  
    1.54 -#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
    1.55 -	if (num_online_cpus() > 1) {
    1.56 -		printk(KERN_WARNING "Can't suspend SMP guests "
    1.57 -		       "without CONFIG_HOTPLUG_CPU\n");
    1.58 -		return -EOPNOTSUPP;
    1.59 -	}
    1.60 -#endif
    1.61 +	err = smp_suspend();
    1.62 +	if (err)
    1.63 +		return err;
    1.64  
    1.65  	xenbus_suspend();
    1.66  
    1.67 -	lock_cpu_hotplug();
    1.68 -#ifdef CONFIG_SMP
    1.69 -	/*
    1.70 -	 * Take all other CPUs offline. We hold the hotplug mutex to
    1.71 -	 * avoid other processes bringing up CPUs under our feet.
    1.72 -	 */
    1.73 -	cpus_clear(prev_online_cpus);
    1.74 -	while (num_online_cpus() > 1) {
    1.75 -		for_each_online_cpu(i) {
    1.76 -			if (i == 0)
    1.77 -				continue;
    1.78 -			unlock_cpu_hotplug();
    1.79 -			err = cpu_down(i);
    1.80 -			lock_cpu_hotplug();
    1.81 -			if (err != 0) {
    1.82 -				printk(KERN_CRIT "Failed to take all CPUs "
    1.83 -				       "down: %d.\n", err);
    1.84 -				goto out_reenable_cpus;
    1.85 -			}
    1.86 -			cpu_set(i, prev_online_cpus);
    1.87 -		}
    1.88 -	}
    1.89 -#endif
    1.90 -
    1.91  	preempt_disable();
    1.92  
    1.93  #ifdef __i386__
    1.94 @@ -153,7 +119,6 @@ static int __do_suspend(void *ignore)
    1.95  
    1.96  	__cli();
    1.97  	preempt_enable();
    1.98 -	unlock_cpu_hotplug();
    1.99  
   1.100  	gnttab_suspend();
   1.101  
   1.102 @@ -203,30 +168,9 @@ static int __do_suspend(void *ignore)
   1.103  
   1.104  	xencons_resume();
   1.105  
   1.106 -#ifdef CONFIG_SMP
   1.107 -	for_each_cpu(i)
   1.108 -		vcpu_prepare(i);
   1.109 -
   1.110 -#endif
   1.111 -
   1.112 -	/*
   1.113 -	 * Only resume xenbus /after/ we've prepared our VCPUs; otherwise
   1.114 -	 * the VCPU hotplug callback can race with our vcpu_prepare
   1.115 -	 */
   1.116  	xenbus_resume();
   1.117  
   1.118 -#ifdef CONFIG_SMP
   1.119 - out_reenable_cpus:
   1.120 -	for_each_cpu_mask(i, prev_online_cpus) {
   1.121 -		j = cpu_up(i);
   1.122 -		if ((j != 0) && !cpu_online(i)) {
   1.123 -			printk(KERN_CRIT "Failed to bring cpu "
   1.124 -			       "%d back up (%d).\n",
   1.125 -			       i, j);
   1.126 -			err = j;
   1.127 -		}
   1.128 -	}
   1.129 -#endif
   1.130 +	smp_resume();
   1.131  
   1.132  	return err;
   1.133  }
   1.134 @@ -334,7 +278,6 @@ static void shutdown_handler(struct xenb
   1.135  	kfree(str);
   1.136  }
   1.137  
   1.138 -#ifdef CONFIG_MAGIC_SYSRQ
   1.139  static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
   1.140  			  unsigned int len)
   1.141  {
   1.142 @@ -360,45 +303,35 @@ static void sysrq_handler(struct xenbus_
   1.143  	if (err == -EAGAIN)
   1.144  		goto again;
   1.145  
   1.146 -	if (sysrq_key != '\0') {
   1.147 +#ifdef CONFIG_MAGIC_SYSRQ
   1.148 +	if (sysrq_key != '\0')
   1.149  		handle_sysrq(sysrq_key, NULL, NULL);
   1.150 -	}
   1.151 +#endif
   1.152  }
   1.153 -#endif
   1.154  
   1.155  static struct xenbus_watch shutdown_watch = {
   1.156  	.node = "control/shutdown",
   1.157  	.callback = shutdown_handler
   1.158  };
   1.159  
   1.160 -#ifdef CONFIG_MAGIC_SYSRQ
   1.161  static struct xenbus_watch sysrq_watch = {
   1.162  	.node ="control/sysrq",
   1.163  	.callback = sysrq_handler
   1.164  };
   1.165 -#endif
   1.166  
   1.167  static int setup_shutdown_watcher(struct notifier_block *notifier,
   1.168                                    unsigned long event,
   1.169                                    void *data)
   1.170  {
   1.171 -	int err1 = 0;
   1.172 -#ifdef CONFIG_MAGIC_SYSRQ
   1.173 -	int err2 = 0;
   1.174 -#endif
   1.175 +	int err;
   1.176  
   1.177 -	err1 = register_xenbus_watch(&shutdown_watch);
   1.178 -#ifdef CONFIG_MAGIC_SYSRQ
   1.179 -	err2 = register_xenbus_watch(&sysrq_watch);
   1.180 -#endif
   1.181 -
   1.182 -	if (err1)
   1.183 +	err = register_xenbus_watch(&shutdown_watch);
   1.184 +	if (err)
   1.185  		printk(KERN_ERR "Failed to set shutdown watcher\n");
   1.186  
   1.187 -#ifdef CONFIG_MAGIC_SYSRQ
   1.188 -	if (err2)
   1.189 +	err = register_xenbus_watch(&sysrq_watch);
   1.190 +	if (err)
   1.191  		printk(KERN_ERR "Failed to set sysrq watcher\n");
   1.192 -#endif
   1.193  
   1.194  	return NOTIFY_DONE;
   1.195  }
     2.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c	Tue Mar 28 14:19:22 2006 +0100
     2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c	Tue Mar 28 18:43:30 2006 +0100
     2.3 @@ -79,6 +79,15 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
     2.4  unsigned int maxcpus = NR_CPUS;
     2.5  #endif
     2.6  
     2.7 +/*
     2.8 + * Set of CPUs that remote admin software will allow us to bring online.
     2.9 + * Notified to us via xenbus.
    2.10 + */
    2.11 +static cpumask_t xenbus_allowed_cpumask;
    2.12 +
    2.13 +/* Set of CPUs that local admin will allow us to bring online. */
    2.14 +static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
    2.15 +
    2.16  void __init prefill_possible_map(void)
    2.17  {
    2.18  	int i, rc;
    2.19 @@ -146,7 +155,7 @@ static void cpu_bringup(void)
    2.20  	cpu_idle();
    2.21  }
    2.22  
    2.23 -void vcpu_prepare(int vcpu)
    2.24 +static void vcpu_prepare(int vcpu)
    2.25  {
    2.26  	vcpu_guest_context_t ctxt;
    2.27  	struct task_struct *idle = idle_task(vcpu);
    2.28 @@ -278,6 +287,8 @@ void __init smp_prepare_cpus(unsigned in
    2.29  		vcpu_prepare(cpu);
    2.30  	}
    2.31  
    2.32 +	xenbus_allowed_cpumask = cpu_present_map;
    2.33 +
    2.34  	/* Currently, Xen gives no dynamic NUMA/HT info. */
    2.35  	for (cpu = 1; cpu < NR_CPUS; cpu++) {
    2.36  		cpu_sibling_map[cpu] = cpumask_of_cpu(cpu);
    2.37 @@ -301,6 +312,15 @@ void __devinit smp_prepare_boot_cpu(void
    2.38  	cpu_online_map   = cpumask_of_cpu(0);
    2.39  }
    2.40  
    2.41 +static int local_cpu_hotplug_request(void)
    2.42 +{
    2.43 +	/*
    2.44 +	 * We assume a CPU hotplug request comes from local admin if it is made
    2.45 +	 * via a userspace process (i.e., one with a real mm_struct).
    2.46 +	 */
    2.47 +	return (current->mm != NULL);
    2.48 +}
    2.49 +
    2.50  #ifdef CONFIG_HOTPLUG_CPU
    2.51  
    2.52  /*
    2.53 @@ -331,8 +351,10 @@ static void vcpu_hotplug(unsigned int cp
    2.54  	}
    2.55  
    2.56  	if (strcmp(state, "online") == 0) {
    2.57 +		cpu_set(cpu, xenbus_allowed_cpumask);
    2.58  		(void)cpu_up(cpu);
    2.59  	} else if (strcmp(state, "offline") == 0) {
    2.60 +		cpu_clear(cpu, xenbus_allowed_cpumask);
    2.61  		(void)cpu_down(cpu);
    2.62  	} else {
    2.63  		printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
    2.64 @@ -353,6 +375,22 @@ static void handle_vcpu_hotplug_event(
    2.65  	}
    2.66  }
    2.67  
    2.68 +static int smpboot_cpu_notify(struct notifier_block *notifier,
    2.69 +			      unsigned long action, void *hcpu)
    2.70 +{
    2.71 +	int cpu = (long)hcpu;
    2.72 +
    2.73 +	/*
    2.74 +	 * We do this in a callback notifier rather than __cpu_disable()
    2.75 +	 * because local_cpu_hotplug_request() does not work in the latter
    2.76 +	 * as it's always executed from within a stopmachine kthread.
    2.77 +	 */
    2.78 +	if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
    2.79 +		cpu_clear(cpu, local_allowed_cpumask);
    2.80 +
    2.81 +	return NOTIFY_OK;
    2.82 +}
    2.83 +
    2.84  static int setup_cpu_watcher(struct notifier_block *notifier,
    2.85  			      unsigned long event, void *data)
    2.86  {
    2.87 @@ -360,7 +398,8 @@ static int setup_cpu_watcher(struct noti
    2.88  
    2.89  	static struct xenbus_watch cpu_watch = {
    2.90  		.node = "cpu",
    2.91 -		.callback = handle_vcpu_hotplug_event };
    2.92 +		.callback = handle_vcpu_hotplug_event,
    2.93 +		.flags = XBWF_new_thread };
    2.94  	(void)register_xenbus_watch(&cpu_watch);
    2.95  
    2.96  	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
    2.97 @@ -375,14 +414,62 @@ static int setup_cpu_watcher(struct noti
    2.98  
    2.99  static int __init setup_vcpu_hotplug_event(void)
   2.100  {
   2.101 +	static struct notifier_block hotplug_cpu = {
   2.102 +		.notifier_call = smpboot_cpu_notify };
   2.103  	static struct notifier_block xsn_cpu = {
   2.104  		.notifier_call = setup_cpu_watcher };
   2.105 +
   2.106 +	register_cpu_notifier(&hotplug_cpu);
   2.107  	register_xenstore_notifier(&xsn_cpu);
   2.108 +
   2.109  	return 0;
   2.110  }
   2.111  
   2.112  arch_initcall(setup_vcpu_hotplug_event);
   2.113  
   2.114 +int smp_suspend(void)
   2.115 +{
   2.116 +	int i, err;
   2.117 +
   2.118 +	lock_cpu_hotplug();
   2.119 +
   2.120 +	/*
   2.121 +	 * Take all other CPUs offline. We hold the hotplug mutex to
   2.122 +	 * avoid other processes bringing up CPUs under our feet.
   2.123 +	 */
   2.124 +	while (num_online_cpus() > 1) {
   2.125 +		unlock_cpu_hotplug();
   2.126 +		for_each_online_cpu(i) {
   2.127 +			if (i == 0)
   2.128 +				continue;
   2.129 +			err = cpu_down(i);
   2.130 +			if (err) {
   2.131 +				printk(KERN_CRIT "Failed to take all CPUs "
   2.132 +				       "down: %d.\n", err);
   2.133 +				for_each_cpu(i)
   2.134 +					vcpu_hotplug(i);
   2.135 +				return err;
   2.136 +			}
   2.137 +		}
   2.138 +		lock_cpu_hotplug();
   2.139 +	}
   2.140 +
   2.141 +	return 0;
   2.142 +}
   2.143 +
   2.144 +void smp_resume(void)
   2.145 +{
   2.146 +	int i;
   2.147 +
   2.148 +	for_each_cpu(i)
   2.149 +		vcpu_prepare(i);
   2.150 +
   2.151 +	unlock_cpu_hotplug();
   2.152 +
   2.153 +	for_each_cpu(i)
   2.154 +		vcpu_hotplug(i);
   2.155 +}
   2.156 +
   2.157  int __cpu_disable(void)
   2.158  {
   2.159  	cpumask_t map = cpu_online_map;
   2.160 @@ -415,6 +502,20 @@ void __cpu_die(unsigned int cpu)
   2.161  
   2.162  #else /* !CONFIG_HOTPLUG_CPU */
   2.163  
   2.164 +int smp_suspend(void)
   2.165 +{
   2.166 +	if (num_online_cpus() > 1) {
   2.167 +		printk(KERN_WARNING "Can't suspend SMP guests "
   2.168 +		       "without CONFIG_HOTPLUG_CPU\n");
   2.169 +		return -EOPNOTSUPP;
   2.170 +	}
   2.171 +	return 0;
   2.172 +}
   2.173 +
   2.174 +void smp_resume(void)
   2.175 +{
   2.176 +}
   2.177 +
   2.178  int __cpu_disable(void)
   2.179  {
   2.180  	return -ENOSYS;
   2.181 @@ -429,6 +530,20 @@ void __cpu_die(unsigned int cpu)
   2.182  
   2.183  int __devinit __cpu_up(unsigned int cpu)
   2.184  {
   2.185 +	int rc;
   2.186 +
   2.187 +	if (local_cpu_hotplug_request()) {
   2.188 +		cpu_set(cpu, local_allowed_cpumask);
   2.189 +		if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
   2.190 +			printk("%s: attempt to bring up CPU %u disallowed by "
   2.191 +			       "remote admin.\n", __FUNCTION__, cpu);
   2.192 +			return -EBUSY;
   2.193 +		}
   2.194 +	} else if (!cpu_isset(cpu, local_allowed_cpumask) ||
   2.195 +		   !cpu_isset(cpu, xenbus_allowed_cpumask)) {
   2.196 +		return -EBUSY;
   2.197 +	}
   2.198 +
   2.199  #ifdef CONFIG_SMP_ALTERNATIVES
   2.200  	if (num_online_cpus() == 1)
   2.201  		prepare_for_smp();
   2.202 @@ -436,7 +551,9 @@ int __devinit __cpu_up(unsigned int cpu)
   2.203  
   2.204  	xen_smp_intr_init(cpu);
   2.205  	cpu_set(cpu, cpu_online_map);
   2.206 -	if (HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL) != 0)
   2.207 +
   2.208 +	rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
   2.209 +	if (rc != 0)
   2.210  		BUG();
   2.211  
   2.212  	return 0;
     3.1 --- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c	Tue Mar 28 14:19:22 2006 +0100
     3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c	Tue Mar 28 18:43:30 2006 +0100
     3.3 @@ -16,7 +16,7 @@ static struct pciback_device *alloc_pdev
     3.4  {
     3.5  	struct pciback_device *pdev;
     3.6  
     3.7 -	pdev = kmalloc(sizeof(struct pciback_device), GFP_KERNEL);
     3.8 +	pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL);
     3.9  	if (pdev == NULL)
    3.10  		goto out;
    3.11  	dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
     4.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c	Tue Mar 28 14:19:22 2006 +0100
     4.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c	Tue Mar 28 18:43:30 2006 +0100
     4.3 @@ -685,6 +685,24 @@ void xs_resume(void)
     4.4  	up_write(&xs_state.suspend_mutex);
     4.5  }
     4.6  
     4.7 +static int xenwatch_handle_callback(void *data)
     4.8 +{
     4.9 +	struct xs_stored_msg *msg = data;
    4.10 +
    4.11 +	msg->u.watch.handle->callback(msg->u.watch.handle,
    4.12 +				      (const char **)msg->u.watch.vec,
    4.13 +				      msg->u.watch.vec_size);
    4.14 +
    4.15 +	kfree(msg->u.watch.vec);
    4.16 +	kfree(msg);
    4.17 +
    4.18 +	/* Kill this kthread if we were spawned just for this callback. */
    4.19 +	if (current->pid != xenwatch_pid)
    4.20 +		do_exit(0);
    4.21 +
    4.22 +	return 0;
    4.23 +}
    4.24 +
    4.25  static int xenwatch_thread(void *unused)
    4.26  {
    4.27  	struct list_head *ent;
    4.28 @@ -707,12 +725,11 @@ static int xenwatch_thread(void *unused)
    4.29  
    4.30  		if (ent != &watch_events) {
    4.31  			msg = list_entry(ent, struct xs_stored_msg, list);
    4.32 -			msg->u.watch.handle->callback(
    4.33 -				msg->u.watch.handle,
    4.34 -				(const char **)msg->u.watch.vec,
    4.35 -				msg->u.watch.vec_size);
    4.36 -			kfree(msg->u.watch.vec);
    4.37 -			kfree(msg);
    4.38 +			if (msg->u.watch.handle->flags & XBWF_new_thread)
    4.39 +				kthread_run(xenwatch_handle_callback,
    4.40 +					    msg, "xenwatch_cb");
    4.41 +			else
    4.42 +				xenwatch_handle_callback(msg);
    4.43  		}
    4.44  
    4.45  		mutex_unlock(&xenwatch_mutex);
     5.1 --- a/linux-2.6-xen-sparse/include/xen/gnttab.h	Tue Mar 28 14:19:22 2006 +0100
     5.2 +++ b/linux-2.6-xen-sparse/include/xen/gnttab.h	Tue Mar 28 18:43:30 2006 +0100
     5.3 @@ -110,6 +110,9 @@ void gnttab_grant_foreign_transfer_ref(g
     5.4  #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
     5.5  #endif
     5.6  
     5.7 +int gnttab_suspend(void);
     5.8 +int gnttab_resume(void);
     5.9 +
    5.10  #endif /* __ASM_GNTTAB_H__ */
    5.11  
    5.12  /*
     6.1 --- a/linux-2.6-xen-sparse/include/xen/xenbus.h	Tue Mar 28 14:19:22 2006 +0100
     6.2 +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h	Tue Mar 28 18:43:30 2006 +0100
     6.3 @@ -55,8 +55,17 @@ struct xenbus_watch
     6.4  	/* Callback (executed in a process context with no locks held). */
     6.5  	void (*callback)(struct xenbus_watch *,
     6.6  			 const char **vec, unsigned int len);
     6.7 +
     6.8 +	/* See XBWF_ definitions below. */
     6.9 +	unsigned long flags;
    6.10  };
    6.11  
    6.12 +/*
    6.13 + * Execute callback in its own kthread. Useful if the callback is long
    6.14 + * running or heavily serialised, to avoid taking out the main xenwatch thread
    6.15 + * for a long period of time (or even unwittingly causing a deadlock).
    6.16 + */
    6.17 +#define XBWF_new_thread	1
    6.18  
    6.19  /* A xenbus device. */
    6.20  struct xenbus_device {