ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/core/smpboot.c @ 8836:0828f5f18b56

Reset sp on vcpu_prepare; this avoids 'leaking' stack after repeated
save/restore/migrate iterations.

Should fix various crashes observed with save/restore/migrate of multi
VCPU guests.

Signed-off-by: Steven Hand <steven@xensource.com>
author smh22@firebug.cl.cam.ac.uk
date Fri Feb 10 17:57:13 2006 +0100 (2006-02-10)
parents 0a404794aac1
children 765b0657264d
line source
1 /*
2 * Xen SMP booting functions
3 *
4 * See arch/i386/kernel/smpboot.c for copyright and credits for derived
5 * portions of this file.
6 */
8 #include <linux/module.h>
9 #include <linux/config.h>
10 #include <linux/init.h>
11 #include <linux/kernel.h>
12 #include <linux/mm.h>
13 #include <linux/sched.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/smp_lock.h>
16 #include <linux/irq.h>
17 #include <linux/bootmem.h>
18 #include <linux/notifier.h>
19 #include <linux/cpu.h>
20 #include <linux/percpu.h>
21 #include <asm/desc.h>
22 #include <asm/arch_hooks.h>
23 #include <asm/pgalloc.h>
24 #include <xen/evtchn.h>
25 #include <xen/interface/vcpu.h>
26 #include <xen/xenbus.h>
28 #ifdef CONFIG_SMP_ALTERNATIVES
29 #include <asm/smp_alt.h>
30 #endif
32 extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
33 extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
35 extern void local_setup_timer(unsigned int cpu);
36 extern void local_teardown_timer(unsigned int cpu);
38 extern void hypervisor_callback(void);
39 extern void failsafe_callback(void);
40 extern void system_call(void);
41 extern void smp_trap_init(trap_info_t *);
43 /* Number of siblings per CPU package */
44 int smp_num_siblings = 1;
45 int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
46 EXPORT_SYMBOL(phys_proc_id);
47 int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
48 EXPORT_SYMBOL(cpu_core_id);
50 cpumask_t cpu_online_map;
51 EXPORT_SYMBOL(cpu_online_map);
52 cpumask_t cpu_possible_map;
53 EXPORT_SYMBOL(cpu_possible_map);
55 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
56 EXPORT_SYMBOL(cpu_data);
58 #ifdef CONFIG_HOTPLUG_CPU
59 DEFINE_PER_CPU(int, cpu_state) = { 0 };
60 #endif
62 static DEFINE_PER_CPU(int, resched_irq);
63 static DEFINE_PER_CPU(int, callfunc_irq);
64 static char resched_name[NR_CPUS][15];
65 static char callfunc_name[NR_CPUS][15];
67 u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
69 void *xquad_portio;
71 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
72 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
73 EXPORT_SYMBOL(cpu_core_map);
75 #if defined(__i386__)
76 u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
77 EXPORT_SYMBOL(x86_cpu_to_apicid);
78 #elif !defined(CONFIG_X86_IO_APIC)
79 unsigned int maxcpus = NR_CPUS;
80 #endif
82 void __init prefill_possible_map(void)
83 {
84 int i, rc;
86 if (!cpus_empty(cpu_possible_map))
87 return;
89 for (i = 0; i < NR_CPUS; i++) {
90 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
91 if (rc == -ENOENT)
92 break;
93 cpu_set(i, cpu_possible_map);
94 }
95 }
97 void __init smp_alloc_memory(void)
98 {
99 }
101 static void xen_smp_intr_init(unsigned int cpu)
102 {
103 sprintf(resched_name[cpu], "resched%d", cpu);
104 per_cpu(resched_irq, cpu) =
105 bind_ipi_to_irqhandler(
106 RESCHEDULE_VECTOR,
107 cpu,
108 smp_reschedule_interrupt,
109 SA_INTERRUPT,
110 resched_name[cpu],
111 NULL);
112 BUG_ON(per_cpu(resched_irq, cpu) < 0);
114 sprintf(callfunc_name[cpu], "callfunc%d", cpu);
115 per_cpu(callfunc_irq, cpu) =
116 bind_ipi_to_irqhandler(
117 CALL_FUNCTION_VECTOR,
118 cpu,
119 smp_call_function_interrupt,
120 SA_INTERRUPT,
121 callfunc_name[cpu],
122 NULL);
123 BUG_ON(per_cpu(callfunc_irq, cpu) < 0);
125 if (cpu != 0)
126 local_setup_timer(cpu);
127 }
129 #ifdef CONFIG_HOTPLUG_CPU
130 static void xen_smp_intr_exit(unsigned int cpu)
131 {
132 if (cpu != 0)
133 local_teardown_timer(cpu);
135 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
136 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
137 }
138 #endif
140 static void cpu_bringup(void)
141 {
142 cpu_init();
143 touch_softlockup_watchdog();
144 preempt_disable();
145 local_irq_enable();
146 cpu_idle();
147 }
149 void vcpu_prepare(int vcpu)
150 {
151 vcpu_guest_context_t ctxt;
152 struct task_struct *idle = idle_task(vcpu);
154 if (vcpu == 0)
155 return;
157 memset(&ctxt, 0, sizeof(ctxt));
159 ctxt.flags = VGCF_IN_KERNEL;
160 ctxt.user_regs.ds = __USER_DS;
161 ctxt.user_regs.es = __USER_DS;
162 ctxt.user_regs.fs = 0;
163 ctxt.user_regs.gs = 0;
164 ctxt.user_regs.ss = __KERNEL_DS;
165 ctxt.user_regs.eip = (unsigned long)cpu_bringup;
166 ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
168 memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
170 smp_trap_init(ctxt.trap_ctxt);
172 ctxt.ldt_ents = 0;
174 ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[vcpu].address);
175 ctxt.gdt_ents = cpu_gdt_descr[vcpu].size / 8;
177 #ifdef __i386__
178 ctxt.user_regs.cs = __KERNEL_CS;
179 ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
181 ctxt.kernel_ss = __KERNEL_DS;
182 ctxt.kernel_sp = idle->thread.esp0;
184 ctxt.event_callback_cs = __KERNEL_CS;
185 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
186 ctxt.failsafe_callback_cs = __KERNEL_CS;
187 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
189 ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
190 #else /* __x86_64__ */
191 ctxt.user_regs.cs = __KERNEL_CS | 3;
192 ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
194 ctxt.kernel_ss = __KERNEL_DS;
195 ctxt.kernel_sp = idle->thread.rsp0;
197 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
198 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
199 ctxt.syscall_callback_eip = (unsigned long)system_call;
201 ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
203 ctxt.gs_base_kernel = (unsigned long)(cpu_pda(vcpu));
204 #endif
206 BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt));
207 }
209 void __init smp_prepare_cpus(unsigned int max_cpus)
210 {
211 int cpu;
212 struct task_struct *idle;
214 cpu_data[0] = boot_cpu_data;
216 cpu_2_logical_apicid[0] = 0;
217 x86_cpu_to_apicid[0] = 0;
219 current_thread_info()->cpu = 0;
220 cpu_sibling_map[0] = cpumask_of_cpu(0);
221 cpu_core_map[0] = cpumask_of_cpu(0);
223 xen_smp_intr_init(0);
225 for_each_cpu_mask (cpu, cpu_possible_map) {
226 if (cpu == 0)
227 continue;
229 cpu_data[cpu] = boot_cpu_data;
230 cpu_2_logical_apicid[cpu] = cpu;
231 x86_cpu_to_apicid[cpu] = cpu;
233 idle = fork_idle(cpu);
234 if (IS_ERR(idle))
235 panic("failed fork for CPU %d", cpu);
237 #ifdef __x86_64__
238 cpu_pda(cpu)->pcurrent = idle;
239 cpu_pda(cpu)->cpunumber = cpu;
240 per_cpu(init_tss,cpu).rsp0 = idle->thread.rsp;
241 clear_ti_thread_flag(idle->thread_info, TIF_FORK);
242 #endif
244 irq_ctx_init(cpu);
246 cpu_gdt_descr[cpu].address =
247 __get_free_page(GFP_KERNEL|__GFP_ZERO);
248 BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
249 cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
250 memcpy((void *)cpu_gdt_descr[cpu].address,
251 (void *)cpu_gdt_descr[0].address,
252 cpu_gdt_descr[0].size);
253 make_page_readonly(
254 (void *)cpu_gdt_descr[cpu].address,
255 XENFEAT_writable_descriptor_tables);
257 #ifdef CONFIG_HOTPLUG_CPU
258 if (xen_start_info->flags & SIF_INITDOMAIN)
259 cpu_set(cpu, cpu_present_map);
260 #else
261 cpu_set(cpu, cpu_present_map);
262 #endif
264 vcpu_prepare(cpu);
265 }
267 /* Currently, Xen gives no dynamic NUMA/HT info. */
268 for (cpu = 1; cpu < NR_CPUS; cpu++) {
269 cpu_sibling_map[cpu] = cpumask_of_cpu(cpu);
270 cpu_core_map[cpu] = cpumask_of_cpu(cpu);
271 }
273 #ifdef CONFIG_X86_IO_APIC
274 /*
275 * Here we can be sure that there is an IO-APIC in the system. Let's
276 * go and set it up:
277 */
278 if (!skip_ioapic_setup && nr_ioapics)
279 setup_IO_APIC();
280 #endif
281 }
283 void __devinit smp_prepare_boot_cpu(void)
284 {
285 prefill_possible_map();
286 cpu_present_map = cpumask_of_cpu(0);
287 cpu_online_map = cpumask_of_cpu(0);
288 }
290 #ifdef CONFIG_HOTPLUG_CPU
292 /*
293 * Initialize cpu_present_map late to skip SMP boot code in init/main.c.
294 * But do it early enough to catch critical for_each_present_cpu() loops
295 * in i386-specific code.
296 */
297 static int __init initialize_cpu_present_map(void)
298 {
299 cpu_present_map = cpu_possible_map;
300 return 0;
301 }
302 core_initcall(initialize_cpu_present_map);
304 static void vcpu_hotplug(unsigned int cpu)
305 {
306 int err;
307 char dir[32], state[32];
309 if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
310 return;
312 sprintf(dir, "cpu/%d", cpu);
313 err = xenbus_scanf(XBT_NULL, dir, "availability", "%s", state);
314 if (err != 1) {
315 printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
316 return;
317 }
319 if (strcmp(state, "online") == 0) {
320 (void)cpu_up(cpu);
321 } else if (strcmp(state, "offline") == 0) {
322 (void)cpu_down(cpu);
323 } else {
324 printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
325 state, cpu);
326 }
327 }
329 static void handle_vcpu_hotplug_event(
330 struct xenbus_watch *watch, const char **vec, unsigned int len)
331 {
332 int cpu;
333 char *cpustr;
334 const char *node = vec[XS_WATCH_PATH];
336 if ((cpustr = strstr(node, "cpu/")) != NULL) {
337 sscanf(cpustr, "cpu/%d", &cpu);
338 vcpu_hotplug(cpu);
339 }
340 }
342 static int setup_cpu_watcher(struct notifier_block *notifier,
343 unsigned long event, void *data)
344 {
345 int i;
347 static struct xenbus_watch cpu_watch = {
348 .node = "cpu",
349 .callback = handle_vcpu_hotplug_event };
350 (void)register_xenbus_watch(&cpu_watch);
352 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
353 for_each_cpu(i)
354 vcpu_hotplug(i);
355 printk(KERN_INFO "Brought up %ld CPUs\n",
356 (long)num_online_cpus());
357 }
359 return NOTIFY_DONE;
360 }
362 static int __init setup_vcpu_hotplug_event(void)
363 {
364 static struct notifier_block xsn_cpu = {
365 .notifier_call = setup_cpu_watcher };
366 register_xenstore_notifier(&xsn_cpu);
367 return 0;
368 }
370 arch_initcall(setup_vcpu_hotplug_event);
372 int __cpu_disable(void)
373 {
374 cpumask_t map = cpu_online_map;
375 int cpu = smp_processor_id();
377 if (cpu == 0)
378 return -EBUSY;
380 cpu_clear(cpu, map);
381 fixup_irqs(map);
382 cpu_clear(cpu, cpu_online_map);
384 return 0;
385 }
387 void __cpu_die(unsigned int cpu)
388 {
389 while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
390 current->state = TASK_UNINTERRUPTIBLE;
391 schedule_timeout(HZ/10);
392 }
394 xen_smp_intr_exit(cpu);
396 #ifdef CONFIG_SMP_ALTERNATIVES
397 if (num_online_cpus() == 1)
398 unprepare_for_smp();
399 #endif
400 }
402 #else /* !CONFIG_HOTPLUG_CPU */
404 int __cpu_disable(void)
405 {
406 return -ENOSYS;
407 }
409 void __cpu_die(unsigned int cpu)
410 {
411 BUG();
412 }
414 #endif /* CONFIG_HOTPLUG_CPU */
416 int __devinit __cpu_up(unsigned int cpu)
417 {
418 #ifdef CONFIG_SMP_ALTERNATIVES
419 if (num_online_cpus() == 1)
420 prepare_for_smp();
421 #endif
423 xen_smp_intr_init(cpu);
424 cpu_set(cpu, cpu_online_map);
425 if (HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL) != 0)
426 BUG();
428 return 0;
429 }
431 void __init smp_cpus_done(unsigned int max_cpus)
432 {
433 }
435 int setup_profiling_timer(unsigned int multiplier)
436 {
437 /* Dummy function. */
438 return 0;
439 }
441 /*
442 * Local variables:
443 * c-file-style: "linux"
444 * indent-tabs-mode: t
445 * c-indent-level: 8
446 * c-basic-offset: 8
447 * tab-width: 8
448 * End:
449 */