ia64/linux-2.6.18-xen.hg

view drivers/xen/core/smpboot.c @ 466:26e1e96bd46a

x86 xen: New vcpu_op call to get physical CPU identity.

Some AMD machines have APIC IDs that not equal to CPU IDs. In
the default Xen configuration, ACPI calls on these machines
can get confused. This shows up most noticeably when running
AMD PowerNow!. The only solution is for dom0 to get the
hypervisor's cpuid to apicid table when needed (ie, when dom0
vcpus are pinned).

Add a vcpu op to Xen to allow dom0 to query the hypervisor for
architecture dependent physical cpu information if dom0 vcpus are
pinned.

Signed-off-by: Mark Langsdorf <mark.langsdorf@amd.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Mar 05 11:09:41 2008 +0000 (2008-03-05)
parents eb906651be99
children 271d9b9bee40
line source
1 /*
2 * Xen SMP booting functions
3 *
4 * See arch/i386/kernel/smpboot.c for copyright and credits for derived
5 * portions of this file.
6 */
8 #include <linux/module.h>
9 #include <linux/init.h>
10 #include <linux/kernel.h>
11 #include <linux/mm.h>
12 #include <linux/sched.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/smp_lock.h>
15 #include <linux/irq.h>
16 #include <linux/bootmem.h>
17 #include <linux/notifier.h>
18 #include <linux/cpu.h>
19 #include <linux/percpu.h>
20 #include <asm/desc.h>
21 #include <asm/arch_hooks.h>
22 #include <asm/pgalloc.h>
23 #include <xen/evtchn.h>
24 #include <xen/interface/vcpu.h>
25 #include <xen/cpu_hotplug.h>
26 #include <xen/xenbus.h>
28 extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
29 extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
31 extern int local_setup_timer(unsigned int cpu);
32 extern void local_teardown_timer(unsigned int cpu);
34 extern void hypervisor_callback(void);
35 extern void failsafe_callback(void);
36 extern void system_call(void);
37 extern void smp_trap_init(trap_info_t *);
39 /* Number of siblings per CPU package */
40 int smp_num_siblings = 1;
42 cpumask_t cpu_online_map;
43 EXPORT_SYMBOL(cpu_online_map);
44 cpumask_t cpu_possible_map;
45 EXPORT_SYMBOL(cpu_possible_map);
46 cpumask_t cpu_initialized_map;
48 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
49 EXPORT_SYMBOL(cpu_data);
51 #ifdef CONFIG_HOTPLUG_CPU
52 DEFINE_PER_CPU(int, cpu_state) = { 0 };
53 #endif
55 static DEFINE_PER_CPU(int, resched_irq);
56 static DEFINE_PER_CPU(int, callfunc_irq);
57 static char resched_name[NR_CPUS][15];
58 static char callfunc_name[NR_CPUS][15];
60 u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
62 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
63 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
64 EXPORT_SYMBOL(cpu_core_map);
66 #if defined(__i386__)
67 u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
68 EXPORT_SYMBOL(x86_cpu_to_apicid);
69 #elif !defined(CONFIG_X86_IO_APIC)
70 unsigned int maxcpus = NR_CPUS;
71 #endif
73 void __init prefill_possible_map(void)
74 {
75 int i, rc;
77 for_each_possible_cpu(i)
78 if (i != smp_processor_id())
79 return;
81 for (i = 0; i < NR_CPUS; i++) {
82 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
83 if (rc >= 0)
84 cpu_set(i, cpu_possible_map);
85 }
86 }
88 void __init smp_alloc_memory(void)
89 {
90 }
92 static inline void
93 set_cpu_sibling_map(unsigned int cpu)
94 {
95 cpu_data[cpu].phys_proc_id = cpu;
96 cpu_data[cpu].cpu_core_id = 0;
98 cpu_sibling_map[cpu] = cpumask_of_cpu(cpu);
99 cpu_core_map[cpu] = cpumask_of_cpu(cpu);
101 cpu_data[cpu].booted_cores = 1;
102 }
104 static void
105 remove_siblinginfo(unsigned int cpu)
106 {
107 cpu_data[cpu].phys_proc_id = BAD_APICID;
108 cpu_data[cpu].cpu_core_id = BAD_APICID;
110 cpus_clear(cpu_sibling_map[cpu]);
111 cpus_clear(cpu_core_map[cpu]);
113 cpu_data[cpu].booted_cores = 0;
114 }
116 static int __cpuinit xen_smp_intr_init(unsigned int cpu)
117 {
118 int rc;
120 per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
122 sprintf(resched_name[cpu], "resched%u", cpu);
123 rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
124 cpu,
125 smp_reschedule_interrupt,
126 SA_INTERRUPT,
127 resched_name[cpu],
128 NULL);
129 if (rc < 0)
130 goto fail;
131 per_cpu(resched_irq, cpu) = rc;
133 sprintf(callfunc_name[cpu], "callfunc%u", cpu);
134 rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR,
135 cpu,
136 smp_call_function_interrupt,
137 SA_INTERRUPT,
138 callfunc_name[cpu],
139 NULL);
140 if (rc < 0)
141 goto fail;
142 per_cpu(callfunc_irq, cpu) = rc;
144 if ((cpu != 0) && ((rc = local_setup_timer(cpu)) != 0))
145 goto fail;
147 return 0;
149 fail:
150 if (per_cpu(resched_irq, cpu) >= 0)
151 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
152 if (per_cpu(callfunc_irq, cpu) >= 0)
153 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
154 return rc;
155 }
157 #ifdef CONFIG_HOTPLUG_CPU
158 static void xen_smp_intr_exit(unsigned int cpu)
159 {
160 if (cpu != 0)
161 local_teardown_timer(cpu);
163 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
164 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
165 }
166 #endif
168 void __cpuinit cpu_bringup(void)
169 {
170 cpu_init();
171 identify_cpu(cpu_data + smp_processor_id());
172 touch_softlockup_watchdog();
173 preempt_disable();
174 local_irq_enable();
175 }
177 static void __cpuinit cpu_bringup_and_idle(void)
178 {
179 cpu_bringup();
180 cpu_idle();
181 }
183 static void __cpuinit cpu_initialize_context(unsigned int cpu)
184 {
185 /* vcpu_guest_context_t is too large to allocate on the stack.
186 * Hence we allocate statically and protect it with a lock */
187 static vcpu_guest_context_t ctxt;
188 static DEFINE_SPINLOCK(ctxt_lock);
190 struct task_struct *idle = idle_task(cpu);
191 #ifdef __x86_64__
192 struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
193 #else
194 struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
195 #endif
197 if (cpu_test_and_set(cpu, cpu_initialized_map))
198 return;
200 spin_lock(&ctxt_lock);
202 memset(&ctxt, 0, sizeof(ctxt));
204 ctxt.flags = VGCF_IN_KERNEL;
205 ctxt.user_regs.ds = __USER_DS;
206 ctxt.user_regs.es = __USER_DS;
207 ctxt.user_regs.fs = 0;
208 ctxt.user_regs.gs = 0;
209 ctxt.user_regs.ss = __KERNEL_DS;
210 ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle;
211 ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
213 memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
215 smp_trap_init(ctxt.trap_ctxt);
217 ctxt.ldt_ents = 0;
219 ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
220 ctxt.gdt_ents = gdt_descr->size / 8;
222 #ifdef __i386__
223 ctxt.user_regs.cs = __KERNEL_CS;
224 ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
226 ctxt.kernel_ss = __KERNEL_DS;
227 ctxt.kernel_sp = idle->thread.esp0;
229 ctxt.event_callback_cs = __KERNEL_CS;
230 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
231 ctxt.failsafe_callback_cs = __KERNEL_CS;
232 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
234 ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
235 #else /* __x86_64__ */
236 ctxt.user_regs.cs = __KERNEL_CS;
237 ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
239 ctxt.kernel_ss = __KERNEL_DS;
240 ctxt.kernel_sp = idle->thread.rsp0;
242 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
243 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
244 ctxt.syscall_callback_eip = (unsigned long)system_call;
246 ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
248 ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
249 #endif
251 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt))
252 BUG();
254 spin_unlock(&ctxt_lock);
255 }
257 void __init smp_prepare_cpus(unsigned int max_cpus)
258 {
259 unsigned int cpu;
260 struct task_struct *idle;
261 int apicid, acpiid;
262 struct vcpu_get_physid cpu_id;
263 #ifdef __x86_64__
264 struct desc_ptr *gdt_descr;
265 #else
266 struct Xgt_desc_struct *gdt_descr;
267 #endif
269 apicid = 0;
270 if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) {
271 apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
272 acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
273 #ifdef CONFIG_ACPI
274 if (acpiid != 0xff)
275 x86_acpiid_to_apicid[acpiid] = apicid;
276 #endif
277 }
278 boot_cpu_data.apicid = apicid;
279 cpu_data[0] = boot_cpu_data;
281 cpu_2_logical_apicid[0] = apicid;
282 x86_cpu_to_apicid[0] = apicid;
284 current_thread_info()->cpu = 0;
286 for (cpu = 0; cpu < NR_CPUS; cpu++) {
287 cpus_clear(cpu_sibling_map[cpu]);
288 cpus_clear(cpu_core_map[cpu]);
289 }
291 set_cpu_sibling_map(0);
293 if (xen_smp_intr_init(0))
294 BUG();
296 cpu_initialized_map = cpumask_of_cpu(0);
298 /* Restrict the possible_map according to max_cpus. */
299 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
300 for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
301 continue;
302 cpu_clear(cpu, cpu_possible_map);
303 }
305 for_each_possible_cpu (cpu) {
306 if (cpu == 0)
307 continue;
309 #ifdef __x86_64__
310 gdt_descr = &cpu_gdt_descr[cpu];
311 #else
312 gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
313 #endif
314 gdt_descr->address = get_zeroed_page(GFP_KERNEL);
315 if (unlikely(!gdt_descr->address)) {
316 printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
317 cpu);
318 continue;
319 }
320 gdt_descr->size = GDT_SIZE;
321 memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
322 make_page_readonly(
323 (void *)gdt_descr->address,
324 XENFEAT_writable_descriptor_tables);
326 apicid = cpu;
327 if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) {
328 apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
329 acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
330 #ifdef CONFIG_ACPI
331 if (acpiid != 0xff)
332 x86_acpiid_to_apicid[acpiid] = apicid;
333 #endif
334 }
335 cpu_data[cpu] = boot_cpu_data;
336 cpu_data[cpu].apicid = apicid;
338 cpu_2_logical_apicid[cpu] = apicid;
339 x86_cpu_to_apicid[cpu] = apicid;
341 idle = fork_idle(cpu);
342 if (IS_ERR(idle))
343 panic("failed fork for CPU %d", cpu);
345 #ifdef __x86_64__
346 cpu_pda(cpu)->pcurrent = idle;
347 cpu_pda(cpu)->cpunumber = cpu;
348 clear_ti_thread_flag(idle->thread_info, TIF_FORK);
349 #endif
351 irq_ctx_init(cpu);
353 #ifdef CONFIG_HOTPLUG_CPU
354 if (is_initial_xendomain())
355 cpu_set(cpu, cpu_present_map);
356 #else
357 cpu_set(cpu, cpu_present_map);
358 #endif
359 }
361 init_xenbus_allowed_cpumask();
363 #ifdef CONFIG_X86_IO_APIC
364 /*
365 * Here we can be sure that there is an IO-APIC in the system. Let's
366 * go and set it up:
367 */
368 if (!skip_ioapic_setup && nr_ioapics)
369 setup_IO_APIC();
370 #endif
371 }
373 void __devinit smp_prepare_boot_cpu(void)
374 {
375 prefill_possible_map();
376 }
378 #ifdef CONFIG_HOTPLUG_CPU
380 /*
381 * Initialize cpu_present_map late to skip SMP boot code in init/main.c.
382 * But do it early enough to catch critical for_each_present_cpu() loops
383 * in i386-specific code.
384 */
385 static int __init initialize_cpu_present_map(void)
386 {
387 cpu_present_map = cpu_possible_map;
388 return 0;
389 }
390 core_initcall(initialize_cpu_present_map);
392 int __cpu_disable(void)
393 {
394 cpumask_t map = cpu_online_map;
395 unsigned int cpu = smp_processor_id();
397 if (cpu == 0)
398 return -EBUSY;
400 remove_siblinginfo(cpu);
402 cpu_clear(cpu, map);
403 fixup_irqs(map);
404 cpu_clear(cpu, cpu_online_map);
406 return 0;
407 }
409 void __cpu_die(unsigned int cpu)
410 {
411 while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
412 current->state = TASK_UNINTERRUPTIBLE;
413 schedule_timeout(HZ/10);
414 }
416 xen_smp_intr_exit(cpu);
418 if (num_online_cpus() == 1)
419 alternatives_smp_switch(0);
420 }
422 #endif /* CONFIG_HOTPLUG_CPU */
424 int __cpuinit __cpu_up(unsigned int cpu)
425 {
426 int rc;
428 rc = cpu_up_check(cpu);
429 if (rc)
430 return rc;
432 cpu_initialize_context(cpu);
434 if (num_online_cpus() == 1)
435 alternatives_smp_switch(1);
437 /* This must be done before setting cpu_online_map */
438 set_cpu_sibling_map(cpu);
439 wmb();
441 rc = xen_smp_intr_init(cpu);
442 if (rc) {
443 remove_siblinginfo(cpu);
444 return rc;
445 }
447 cpu_set(cpu, cpu_online_map);
449 rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
450 BUG_ON(rc);
452 return 0;
453 }
455 void __init smp_cpus_done(unsigned int max_cpus)
456 {
457 }
459 #ifndef CONFIG_X86_LOCAL_APIC
460 int setup_profiling_timer(unsigned int multiplier)
461 {
462 return -EINVAL;
463 }
464 #endif