ia64/xen-unstable

view linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c @ 7136:1abbb6448b4a

Remove unused debug code.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Sep 29 18:15:49 2005 +0100 (2005-09-29)
parents ef9591d03fdd
children 61b3b357d827 bf6119a58655 a746126c3bb1
line source
1 /*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 *
7 * Much of the core SMP work is based on previous work by Thomas Radke, to
8 * whom a great many thanks are extended.
9 *
10 * Thanks to Intel for making available several different Pentium,
11 * Pentium Pro and Pentium-II/Xeon MP machines.
12 * Original development of Linux SMP code supported by Caldera.
13 *
14 * This code is released under the GNU General Public License version 2 or
15 * later.
16 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIPS report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Martin J. Bligh : Added support for multi-quad systems
33 * Dave Jones : Report invalid combinations of Athlon CPUs.
34 * Rusty Russell : Hacked into shape for new "hotplug" boot process. */
36 #include <linux/module.h>
37 #include <linux/config.h>
38 #include <linux/init.h>
39 #include <linux/kernel.h>
41 #include <linux/mm.h>
42 #include <linux/sched.h>
43 #include <linux/kernel_stat.h>
44 #include <linux/smp_lock.h>
45 #include <linux/irq.h>
46 #include <linux/bootmem.h>
47 #include <linux/notifier.h>
48 #include <linux/cpu.h>
49 #include <linux/percpu.h>
51 #include <linux/delay.h>
52 #include <linux/mc146818rtc.h>
53 #include <asm/tlbflush.h>
54 #include <asm/desc.h>
55 #include <asm/arch_hooks.h>
57 #include <asm/smp_alt.h>
59 #ifndef CONFIG_X86_IO_APIC
60 #define Dprintk(args...)
61 #endif
62 #include <mach_wakecpu.h>
63 #include <smpboot_hooks.h>
65 #include <asm-xen/evtchn.h>
67 /* Set if we find a B stepping CPU */
68 static int __initdata smp_b_stepping;
70 /* Number of siblings per CPU package */
71 int smp_num_siblings = 1;
72 int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
73 EXPORT_SYMBOL(phys_proc_id);
74 int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
75 EXPORT_SYMBOL(cpu_core_id);
77 /* bitmap of online cpus */
78 cpumask_t cpu_online_map;
80 cpumask_t cpu_callin_map;
81 cpumask_t cpu_callout_map;
82 static cpumask_t smp_commenced_mask;
84 /* Per CPU bogomips and other parameters */
85 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
87 u8 x86_cpu_to_apicid[NR_CPUS] =
88 { [0 ... NR_CPUS-1] = 0xff };
89 EXPORT_SYMBOL(x86_cpu_to_apicid);
91 #if 0
92 /*
93 * Trampoline 80x86 program as an array.
94 */
96 extern unsigned char trampoline_data [];
97 extern unsigned char trampoline_end [];
98 static unsigned char *trampoline_base;
99 static int trampoline_exec;
100 #endif
102 #ifdef CONFIG_HOTPLUG_CPU
103 /* State of each CPU. */
104 DEFINE_PER_CPU(int, cpu_state) = { 0 };
105 #endif
107 static DEFINE_PER_CPU(int, resched_irq);
108 static DEFINE_PER_CPU(int, callfunc_irq);
109 static char resched_name[NR_CPUS][15];
110 static char callfunc_name[NR_CPUS][15];
112 #if 0
113 /*
114 * Currently trivial. Write the real->protected mode
115 * bootstrap into the page concerned. The caller
116 * has made sure it's suitably aligned.
117 */
119 static unsigned long __init setup_trampoline(void)
120 {
121 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
122 return virt_to_phys(trampoline_base);
123 }
124 #endif
126 static void map_cpu_to_logical_apicid(void);
128 /*
129 * We are called very early to get the low memory for the
130 * SMP bootup trampoline page.
131 */
132 void __init smp_alloc_memory(void)
133 {
134 #if 0
135 trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
136 /*
137 * Has to be in very low memory so we can execute
138 * real-mode AP code.
139 */
140 if (__pa(trampoline_base) >= 0x9F000)
141 BUG();
142 /*
143 * Make the SMP trampoline executable:
144 */
145 trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
146 #endif
147 }
149 /*
150 * The bootstrap kernel entry code has set these up. Save them for
151 * a given CPU
152 */
154 static void __init smp_store_cpu_info(int id)
155 {
156 struct cpuinfo_x86 *c = cpu_data + id;
158 *c = boot_cpu_data;
159 if (id!=0)
160 identify_cpu(c);
161 /*
162 * Mask B, Pentium, but not Pentium MMX
163 */
164 if (c->x86_vendor == X86_VENDOR_INTEL &&
165 c->x86 == 5 &&
166 c->x86_mask >= 1 && c->x86_mask <= 4 &&
167 c->x86_model <= 3)
168 /*
169 * Remember we have B step Pentia with bugs
170 */
171 smp_b_stepping = 1;
173 /*
174 * Certain Athlons might work (for various values of 'work') in SMP
175 * but they are not certified as MP capable.
176 */
177 if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
179 /* Athlon 660/661 is valid. */
180 if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
181 goto valid_k7;
183 /* Duron 670 is valid */
184 if ((c->x86_model==7) && (c->x86_mask==0))
185 goto valid_k7;
187 /*
188 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
189 * It's worth noting that the A5 stepping (662) of some Athlon XP's
190 * have the MP bit set.
191 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
192 */
193 if (((c->x86_model==6) && (c->x86_mask>=2)) ||
194 ((c->x86_model==7) && (c->x86_mask>=1)) ||
195 (c->x86_model> 7))
196 if (cpu_has_mp)
197 goto valid_k7;
199 /* If we get here, it's not a certified SMP capable AMD system. */
200 tainted |= TAINT_UNSAFE_SMP;
201 }
203 valid_k7:
204 ;
205 }
207 #if 0
208 /*
209 * TSC synchronization.
210 *
211 * We first check whether all CPUs have their TSC's synchronized,
212 * then we print a warning if not, and always resync.
213 */
215 static atomic_t tsc_start_flag = ATOMIC_INIT(0);
216 static atomic_t tsc_count_start = ATOMIC_INIT(0);
217 static atomic_t tsc_count_stop = ATOMIC_INIT(0);
218 static unsigned long long tsc_values[NR_CPUS];
220 #define NR_LOOPS 5
222 static void __init synchronize_tsc_bp (void)
223 {
224 int i;
225 unsigned long long t0;
226 unsigned long long sum, avg;
227 long long delta;
228 unsigned long one_usec;
229 int buggy = 0;
231 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
233 /* convert from kcyc/sec to cyc/usec */
234 one_usec = cpu_khz / 1000;
236 atomic_set(&tsc_start_flag, 1);
237 wmb();
239 /*
240 * We loop a few times to get a primed instruction cache,
241 * then the last pass is more or less synchronized and
242 * the BP and APs set their cycle counters to zero all at
243 * once. This reduces the chance of having random offsets
244 * between the processors, and guarantees that the maximum
245 * delay between the cycle counters is never bigger than
246 * the latency of information-passing (cachelines) between
247 * two CPUs.
248 */
249 for (i = 0; i < NR_LOOPS; i++) {
250 /*
251 * all APs synchronize but they loop on '== num_cpus'
252 */
253 while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
254 mb();
255 atomic_set(&tsc_count_stop, 0);
256 wmb();
257 /*
258 * this lets the APs save their current TSC:
259 */
260 atomic_inc(&tsc_count_start);
262 rdtscll(tsc_values[smp_processor_id()]);
263 /*
264 * We clear the TSC in the last loop:
265 */
266 if (i == NR_LOOPS-1)
267 write_tsc(0, 0);
269 /*
270 * Wait for all APs to leave the synchronization point:
271 */
272 while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
273 mb();
274 atomic_set(&tsc_count_start, 0);
275 wmb();
276 atomic_inc(&tsc_count_stop);
277 }
279 sum = 0;
280 for (i = 0; i < NR_CPUS; i++) {
281 if (cpu_isset(i, cpu_callout_map)) {
282 t0 = tsc_values[i];
283 sum += t0;
284 }
285 }
286 avg = sum;
287 do_div(avg, num_booting_cpus());
289 sum = 0;
290 for (i = 0; i < NR_CPUS; i++) {
291 if (!cpu_isset(i, cpu_callout_map))
292 continue;
293 delta = tsc_values[i] - avg;
294 if (delta < 0)
295 delta = -delta;
296 /*
297 * We report bigger than 2 microseconds clock differences.
298 */
299 if (delta > 2*one_usec) {
300 long realdelta;
301 if (!buggy) {
302 buggy = 1;
303 printk("\n");
304 }
305 realdelta = delta;
306 do_div(realdelta, one_usec);
307 if (tsc_values[i] < avg)
308 realdelta = -realdelta;
310 printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
311 }
313 sum += delta;
314 }
315 if (!buggy)
316 printk("passed.\n");
317 }
319 static void __init synchronize_tsc_ap (void)
320 {
321 int i;
323 /*
324 * Not every cpu is online at the time
325 * this gets called, so we first wait for the BP to
326 * finish SMP initialization:
327 */
328 while (!atomic_read(&tsc_start_flag)) mb();
330 for (i = 0; i < NR_LOOPS; i++) {
331 atomic_inc(&tsc_count_start);
332 while (atomic_read(&tsc_count_start) != num_booting_cpus())
333 mb();
335 rdtscll(tsc_values[smp_processor_id()]);
336 if (i == NR_LOOPS-1)
337 write_tsc(0, 0);
339 atomic_inc(&tsc_count_stop);
340 while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
341 }
342 }
343 #undef NR_LOOPS
344 #endif
346 extern void calibrate_delay(void);
348 static atomic_t init_deasserted;
350 static void __init smp_callin(void)
351 {
352 int cpuid, phys_id;
353 unsigned long timeout;
355 #if 0
356 /*
357 * If waken up by an INIT in an 82489DX configuration
358 * we may get here before an INIT-deassert IPI reaches
359 * our local APIC. We have to wait for the IPI or we'll
360 * lock up on an APIC access.
361 */
362 wait_for_init_deassert(&init_deasserted);
363 #endif
365 /*
366 * (This works even if the APIC is not enabled.)
367 */
368 phys_id = smp_processor_id();
369 cpuid = smp_processor_id();
370 if (cpu_isset(cpuid, cpu_callin_map)) {
371 printk("huh, phys CPU#%d, CPU#%d already present??\n",
372 phys_id, cpuid);
373 BUG();
374 }
375 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
377 /*
378 * STARTUP IPIs are fragile beasts as they might sometimes
379 * trigger some glue motherboard logic. Complete APIC bus
380 * silence for 1 second, this overestimates the time the
381 * boot CPU is spending to send the up to 2 STARTUP IPIs
382 * by a factor of two. This should be enough.
383 */
385 /*
386 * Waiting 2s total for startup (udelay is not yet working)
387 */
388 timeout = jiffies + 2*HZ;
389 while (time_before(jiffies, timeout)) {
390 /*
391 * Has the boot CPU finished it's STARTUP sequence?
392 */
393 if (cpu_isset(cpuid, cpu_callout_map))
394 break;
395 rep_nop();
396 }
398 if (!time_before(jiffies, timeout)) {
399 printk("BUG: CPU%d started up but did not get a callout!\n",
400 cpuid);
401 BUG();
402 }
404 #if 0
405 /*
406 * the boot CPU has finished the init stage and is spinning
407 * on callin_map until we finish. We are free to set up this
408 * CPU, first the APIC. (this is probably redundant on most
409 * boards)
410 */
412 Dprintk("CALLIN, before setup_local_APIC().\n");
413 smp_callin_clear_local_apic();
414 setup_local_APIC();
415 #endif
416 map_cpu_to_logical_apicid();
418 /*
419 * Get our bogomips.
420 */
421 calibrate_delay();
422 Dprintk("Stack at about %p\n",&cpuid);
424 /*
425 * Save our processor parameters
426 */
427 smp_store_cpu_info(cpuid);
429 #if 0
430 disable_APIC_timer();
431 #endif
433 /*
434 * Allow the master to continue.
435 */
436 cpu_set(cpuid, cpu_callin_map);
438 #if 0
439 /*
440 * Synchronize the TSC with the BP
441 */
442 if (cpu_has_tsc && cpu_khz)
443 synchronize_tsc_ap();
444 #endif
445 }
447 static int cpucount;
449 extern void local_setup_timer(void);
451 /*
452 * Activate a secondary processor.
453 */
454 static void __init start_secondary(void *unused)
455 {
456 /*
457 * Dont put anything before smp_callin(), SMP
458 * booting is too fragile that we want to limit the
459 * things done here to the most necessary things.
460 */
461 cpu_init();
462 smp_callin();
463 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
464 rep_nop();
465 local_setup_timer();
466 smp_intr_init();
467 local_irq_enable();
468 /*
469 * low-memory mappings have been cleared, flush them from
470 * the local TLBs too.
471 */
472 local_flush_tlb();
473 cpu_set(smp_processor_id(), cpu_online_map);
475 /* We can take interrupts now: we're officially "up". */
476 local_irq_enable();
478 wmb();
479 cpu_idle();
480 }
482 /*
483 * Everything has been set up for the secondary
484 * CPUs - they just need to reload everything
485 * from the task structure
486 * This function must not return.
487 */
488 void __init initialize_secondary(void)
489 {
490 /*
491 * We don't actually need to load the full TSS,
492 * basically just the stack pointer and the eip.
493 */
495 asm volatile(
496 "movl %0,%%esp\n\t"
497 "jmp *%1"
498 :
499 :"r" (current->thread.esp),"r" (current->thread.eip));
500 }
502 extern struct {
503 void * esp;
504 unsigned short ss;
505 } stack_start;
507 #ifdef CONFIG_NUMA
509 /* which logical CPUs are on which nodes */
510 cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
511 { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
512 /* which node each logical CPU is on */
513 int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
514 EXPORT_SYMBOL(cpu_2_node);
516 /* set up a mapping between cpu and node. */
517 static inline void map_cpu_to_node(int cpu, int node)
518 {
519 printk("Mapping cpu %d to node %d\n", cpu, node);
520 cpu_set(cpu, node_2_cpu_mask[node]);
521 cpu_2_node[cpu] = node;
522 }
524 /* undo a mapping between cpu and node. */
525 static inline void unmap_cpu_to_node(int cpu)
526 {
527 int node;
529 printk("Unmapping cpu %d from all nodes\n", cpu);
530 for (node = 0; node < MAX_NUMNODES; node ++)
531 cpu_clear(cpu, node_2_cpu_mask[node]);
532 cpu_2_node[cpu] = 0;
533 }
534 #else /* !CONFIG_NUMA */
536 #define map_cpu_to_node(cpu, node) ({})
537 #define unmap_cpu_to_node(cpu) ({})
539 #endif /* CONFIG_NUMA */
541 u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
543 static void map_cpu_to_logical_apicid(void)
544 {
545 int cpu = smp_processor_id();
546 int apicid = smp_processor_id();
548 cpu_2_logical_apicid[cpu] = apicid;
549 map_cpu_to_node(cpu, apicid_to_node(apicid));
550 }
552 static void unmap_cpu_to_logical_apicid(int cpu)
553 {
554 cpu_2_logical_apicid[cpu] = BAD_APICID;
555 unmap_cpu_to_node(cpu);
556 }
558 #if APIC_DEBUG
559 static inline void __inquire_remote_apic(int apicid)
560 {
561 int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
562 char *names[] = { "ID", "VERSION", "SPIV" };
563 int timeout, status;
565 printk("Inquiring remote APIC #%d...\n", apicid);
567 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
568 printk("... APIC #%d %s: ", apicid, names[i]);
570 /*
571 * Wait for idle.
572 */
573 apic_wait_icr_idle();
575 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
576 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
578 timeout = 0;
579 do {
580 udelay(100);
581 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
582 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
584 switch (status) {
585 case APIC_ICR_RR_VALID:
586 status = apic_read(APIC_RRR);
587 printk("%08x\n", status);
588 break;
589 default:
590 printk("failed\n");
591 }
592 }
593 }
594 #endif
596 #if 0
597 #ifdef WAKE_SECONDARY_VIA_NMI
598 /*
599 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
600 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
601 * won't ... remember to clear down the APIC, etc later.
602 */
603 static int __init
604 wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
605 {
606 unsigned long send_status = 0, accept_status = 0;
607 int timeout, maxlvt;
609 /* Target chip */
610 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
612 /* Boot on the stack */
613 /* Kick the second */
614 apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
616 Dprintk("Waiting for send to finish...\n");
617 timeout = 0;
618 do {
619 Dprintk("+");
620 udelay(100);
621 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
622 } while (send_status && (timeout++ < 1000));
624 /*
625 * Give the other CPU some time to accept the IPI.
626 */
627 udelay(200);
628 /*
629 * Due to the Pentium erratum 3AP.
630 */
631 maxlvt = get_maxlvt();
632 if (maxlvt > 3) {
633 apic_read_around(APIC_SPIV);
634 apic_write(APIC_ESR, 0);
635 }
636 accept_status = (apic_read(APIC_ESR) & 0xEF);
637 Dprintk("NMI sent.\n");
639 if (send_status)
640 printk("APIC never delivered???\n");
641 if (accept_status)
642 printk("APIC delivery error (%lx).\n", accept_status);
644 return (send_status | accept_status);
645 }
646 #endif /* WAKE_SECONDARY_VIA_NMI */
648 #ifdef WAKE_SECONDARY_VIA_INIT
649 static int __init
650 wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
651 {
652 unsigned long send_status = 0, accept_status = 0;
653 int maxlvt, timeout, num_starts, j;
655 /*
656 * Be paranoid about clearing APIC errors.
657 */
658 if (APIC_INTEGRATED(apic_version[phys_apicid])) {
659 apic_read_around(APIC_SPIV);
660 apic_write(APIC_ESR, 0);
661 apic_read(APIC_ESR);
662 }
664 Dprintk("Asserting INIT.\n");
666 /*
667 * Turn INIT on target chip
668 */
669 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
671 /*
672 * Send IPI
673 */
674 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
675 | APIC_DM_INIT);
677 Dprintk("Waiting for send to finish...\n");
678 timeout = 0;
679 do {
680 Dprintk("+");
681 udelay(100);
682 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
683 } while (send_status && (timeout++ < 1000));
685 mdelay(10);
687 Dprintk("Deasserting INIT.\n");
689 /* Target chip */
690 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
692 /* Send IPI */
693 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
695 Dprintk("Waiting for send to finish...\n");
696 timeout = 0;
697 do {
698 Dprintk("+");
699 udelay(100);
700 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
701 } while (send_status && (timeout++ < 1000));
703 atomic_set(&init_deasserted, 1);
705 /*
706 * Should we send STARTUP IPIs ?
707 *
708 * Determine this based on the APIC version.
709 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
710 */
711 if (APIC_INTEGRATED(apic_version[phys_apicid]))
712 num_starts = 2;
713 else
714 num_starts = 0;
716 /*
717 * Run STARTUP IPI loop.
718 */
719 Dprintk("#startup loops: %d.\n", num_starts);
721 maxlvt = get_maxlvt();
723 for (j = 1; j <= num_starts; j++) {
724 Dprintk("Sending STARTUP #%d.\n",j);
725 apic_read_around(APIC_SPIV);
726 apic_write(APIC_ESR, 0);
727 apic_read(APIC_ESR);
728 Dprintk("After apic_write.\n");
730 /*
731 * STARTUP IPI
732 */
734 /* Target chip */
735 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
737 /* Boot on the stack */
738 /* Kick the second */
739 apic_write_around(APIC_ICR, APIC_DM_STARTUP
740 | (start_eip >> 12));
742 /*
743 * Give the other CPU some time to accept the IPI.
744 */
745 udelay(300);
747 Dprintk("Startup point 1.\n");
749 Dprintk("Waiting for send to finish...\n");
750 timeout = 0;
751 do {
752 Dprintk("+");
753 udelay(100);
754 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
755 } while (send_status && (timeout++ < 1000));
757 /*
758 * Give the other CPU some time to accept the IPI.
759 */
760 udelay(200);
761 /*
762 * Due to the Pentium erratum 3AP.
763 */
764 if (maxlvt > 3) {
765 apic_read_around(APIC_SPIV);
766 apic_write(APIC_ESR, 0);
767 }
768 accept_status = (apic_read(APIC_ESR) & 0xEF);
769 if (send_status || accept_status)
770 break;
771 }
772 Dprintk("After Startup.\n");
774 if (send_status)
775 printk("APIC never delivered???\n");
776 if (accept_status)
777 printk("APIC delivery error (%lx).\n", accept_status);
779 return (send_status | accept_status);
780 }
781 #endif /* WAKE_SECONDARY_VIA_INIT */
782 #endif
784 extern cpumask_t cpu_initialized;
786 static int __init do_boot_cpu(int apicid)
787 /*
788 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
789 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
790 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
791 */
792 {
793 struct task_struct *idle;
794 unsigned long boot_error;
795 int timeout, cpu;
796 unsigned long start_eip;
797 #if 0
798 unsigned short nmi_high = 0, nmi_low = 0;
799 #endif
800 vcpu_guest_context_t ctxt;
801 extern void startup_32_smp(void);
802 extern void hypervisor_callback(void);
803 extern void failsafe_callback(void);
804 extern void smp_trap_init(trap_info_t *);
805 int i;
807 cpu = ++cpucount;
808 /*
809 * We can't use kernel_thread since we must avoid to
810 * reschedule the child.
811 */
812 idle = fork_idle(cpu);
813 if (IS_ERR(idle))
814 panic("failed fork for CPU %d", cpu);
815 idle->thread.eip = (unsigned long) start_secondary;
816 /* start_eip had better be page-aligned! */
817 start_eip = (unsigned long)startup_32_smp;
819 /* So we see what's up */
820 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
821 /* Stack for startup_32 can be just as for start_secondary onwards */
822 stack_start.esp = (void *) idle->thread.esp;
824 irq_ctx_init(cpu);
826 /*
827 * This grunge runs the startup process for
828 * the targeted processor.
829 */
831 atomic_set(&init_deasserted, 0);
833 #if 1
834 cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL|__GFP_ZERO);
835 BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
836 cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
837 memcpy((void *)cpu_gdt_descr[cpu].address,
838 (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
840 memset(&ctxt, 0, sizeof(ctxt));
842 ctxt.user_regs.ds = __USER_DS;
843 ctxt.user_regs.es = __USER_DS;
844 ctxt.user_regs.fs = 0;
845 ctxt.user_regs.gs = 0;
846 ctxt.user_regs.ss = __KERNEL_DS;
847 ctxt.user_regs.cs = __KERNEL_CS;
848 ctxt.user_regs.eip = start_eip;
849 ctxt.user_regs.esp = idle->thread.esp;
850 #define X86_EFLAGS_IOPL_RING1 0x1000
851 ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING1;
853 /* FPU is set up to default initial state. */
854 memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
856 /* Virtual IDT is empty at start-of-day. */
857 for ( i = 0; i < 256; i++ )
858 {
859 ctxt.trap_ctxt[i].vector = i;
860 ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS;
861 }
862 smp_trap_init(ctxt.trap_ctxt);
864 /* No LDT. */
865 ctxt.ldt_ents = 0;
867 {
868 unsigned long va;
869 int f;
871 for (va = cpu_gdt_descr[cpu].address, f = 0;
872 va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
873 va += PAGE_SIZE, f++) {
874 ctxt.gdt_frames[f] = virt_to_mfn(va);
875 make_page_readonly((void *)va);
876 }
877 ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
878 }
880 /* Ring 1 stack is the initial stack. */
881 ctxt.kernel_ss = __KERNEL_DS;
882 ctxt.kernel_sp = idle->thread.esp;
884 /* Callback handlers. */
885 ctxt.event_callback_cs = __KERNEL_CS;
886 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
887 ctxt.failsafe_callback_cs = __KERNEL_CS;
888 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
890 ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
892 boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
893 if (boot_error)
894 printk("boot error: %ld\n", boot_error);
896 if (!boot_error) {
897 /*
898 * allow APs to start initializing.
899 */
900 Dprintk("Before Callout %d.\n", cpu);
901 cpu_set(cpu, cpu_callout_map);
902 Dprintk("After Callout %d.\n", cpu);
904 /*
905 * Wait 5s total for a response
906 */
907 for (timeout = 0; timeout < 50000; timeout++) {
908 if (cpu_isset(cpu, cpu_callin_map))
909 break; /* It has booted */
910 udelay(100);
911 }
913 if (cpu_isset(cpu, cpu_callin_map)) {
914 /* number CPUs logically, starting from 1 (BSP is 0) */
915 Dprintk("OK.\n");
916 printk("CPU%d: ", cpu);
917 print_cpu_info(&cpu_data[cpu]);
918 Dprintk("CPU has booted.\n");
919 } else {
920 boot_error= 1;
921 }
922 }
923 x86_cpu_to_apicid[cpu] = apicid;
924 if (boot_error) {
925 /* Try to put things back the way they were before ... */
926 unmap_cpu_to_logical_apicid(cpu);
927 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
928 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
929 cpucount--;
930 }
932 #else
933 Dprintk("Setting warm reset code and vector.\n");
935 store_NMI_vector(&nmi_high, &nmi_low);
937 smpboot_setup_warm_reset_vector(start_eip);
939 /*
940 * Starting actual IPI sequence...
941 */
942 boot_error = wakeup_secondary_cpu(apicid, start_eip);
944 if (!boot_error) {
945 /*
946 * allow APs to start initializing.
947 */
948 Dprintk("Before Callout %d.\n", cpu);
949 cpu_set(cpu, cpu_callout_map);
950 Dprintk("After Callout %d.\n", cpu);
952 /*
953 * Wait 5s total for a response
954 */
955 for (timeout = 0; timeout < 50000; timeout++) {
956 if (cpu_isset(cpu, cpu_callin_map))
957 break; /* It has booted */
958 udelay(100);
959 }
961 if (cpu_isset(cpu, cpu_callin_map)) {
962 /* number CPUs logically, starting from 1 (BSP is 0) */
963 Dprintk("OK.\n");
964 printk("CPU%d: ", cpu);
965 print_cpu_info(&cpu_data[cpu]);
966 Dprintk("CPU has booted.\n");
967 } else {
968 boot_error= 1;
969 if (*((volatile unsigned char *)trampoline_base)
970 == 0xA5)
971 /* trampoline started but...? */
972 printk("Stuck ??\n");
973 else
974 /* trampoline code not run */
975 printk("Not responding.\n");
976 inquire_remote_apic(apicid);
977 }
978 }
979 x86_cpu_to_apicid[cpu] = apicid;
980 if (boot_error) {
981 /* Try to put things back the way they were before ... */
982 unmap_cpu_to_logical_apicid(cpu);
983 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
984 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
985 cpucount--;
986 }
988 /* mark "stuck" area as not stuck */
989 *((volatile unsigned long *)trampoline_base) = 0;
990 #endif
992 return boot_error;
993 }
995 static void smp_tune_scheduling (void)
996 {
997 unsigned long cachesize; /* kB */
998 unsigned long bandwidth = 350; /* MB/s */
999 /*
1000 * Rough estimation for SMP scheduling, this is the number of
1001 * cycles it takes for a fully memory-limited process to flush
1002 * the SMP-local cache.
1004 * (For a P5 this pretty much means we will choose another idle
1005 * CPU almost always at wakeup time (this is due to the small
1006 * L1 cache), on PIIs it's around 50-100 usecs, depending on
1007 * the cache size)
1008 */
1010 if (!cpu_khz) {
1011 /*
1012 * this basically disables processor-affinity
1013 * scheduling on SMP without a TSC.
1014 */
1015 return;
1016 } else {
1017 cachesize = boot_cpu_data.x86_cache_size;
1018 if (cachesize == -1) {
1019 cachesize = 16; /* Pentiums, 2x8kB cache */
1020 bandwidth = 100;
1025 /*
1026 * Cycle through the processors sending APIC IPIs to boot each.
1027 */
1029 #if 0
1030 static int boot_cpu_logical_apicid;
1031 #endif
1032 /* Where the IO area was mapped on multiquad, always 0 otherwise */
1033 void *xquad_portio;
1035 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
1036 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
1037 EXPORT_SYMBOL(cpu_core_map);
1039 static void __init smp_boot_cpus(unsigned int max_cpus)
1041 int cpu, kicked;
1042 unsigned long bogosum = 0;
1043 #if 0
1044 int apicid, bit;
1045 #endif
1047 /*
1048 * Setup boot CPU information
1049 */
1050 smp_store_cpu_info(0); /* Final full version of the data */
1051 printk("CPU%d: ", 0);
1052 print_cpu_info(&cpu_data[0]);
1054 #if 0
1055 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
1056 boot_cpu_logical_apicid = logical_smp_processor_id();
1057 x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
1058 #else
1059 // boot_cpu_physical_apicid = 0;
1060 // boot_cpu_logical_apicid = 0;
1061 x86_cpu_to_apicid[0] = 0;
1062 #endif
1064 current_thread_info()->cpu = 0;
1065 smp_tune_scheduling();
1066 cpus_clear(cpu_sibling_map[0]);
1067 cpu_set(0, cpu_sibling_map[0]);
1069 cpus_clear(cpu_core_map[0]);
1070 cpu_set(0, cpu_core_map[0]);
1072 #ifdef CONFIG_X86_IO_APIC
1073 /*
1074 * If we couldn't find an SMP configuration at boot time,
1075 * get out of here now!
1076 */
1077 if (!smp_found_config && !acpi_lapic) {
1078 printk(KERN_NOTICE "SMP motherboard not detected.\n");
1079 smpboot_clear_io_apic_irqs();
1080 #if 0
1081 phys_cpu_present_map = physid_mask_of_physid(0);
1082 #endif
1083 #ifdef CONFIG_X86_LOCAL_APIC
1084 if (APIC_init_uniprocessor())
1085 printk(KERN_NOTICE "Local APIC not detected."
1086 " Using dummy APIC emulation.\n");
1087 #endif
1088 map_cpu_to_logical_apicid();
1089 cpu_set(0, cpu_sibling_map[0]);
1090 cpu_set(0, cpu_core_map[0]);
1091 return;
1093 #endif
1095 #if 0
1096 /*
1097 * Should not be necessary because the MP table should list the boot
1098 * CPU too, but we do it for the sake of robustness anyway.
1099 * Makes no sense to do this check in clustered apic mode, so skip it
1100 */
1101 if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
1102 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
1103 boot_cpu_physical_apicid);
1104 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1107 /*
1108 * If we couldn't find a local APIC, then get out of here now!
1109 */
1110 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
1111 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1112 boot_cpu_physical_apicid);
1113 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
1114 smpboot_clear_io_apic_irqs();
1115 phys_cpu_present_map = physid_mask_of_physid(0);
1116 cpu_set(0, cpu_sibling_map[0]);
1117 cpu_set(0, cpu_core_map[0]);
1118 cpu_set(0, cpu_sibling_map[0]);
1119 cpu_set(0, cpu_core_map[0]);
1120 return;
1123 verify_local_APIC();
1124 #endif
1126 /*
1127 * If SMP should be disabled, then really disable it!
1128 */
1129 if (!max_cpus) {
1130 HYPERVISOR_shared_info->n_vcpu = 1;
1131 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
1132 smpboot_clear_io_apic_irqs();
1133 #if 0
1134 phys_cpu_present_map = physid_mask_of_physid(0);
1135 #endif
1136 return;
1139 smp_intr_init();
1141 #if 0
1142 connect_bsp_APIC();
1143 setup_local_APIC();
1144 #endif
1145 map_cpu_to_logical_apicid();
1146 #if 0
1149 setup_portio_remap();
1151 /*
1152 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
1154 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
1155 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
1156 * clustered apic ID.
1157 */
1158 Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
1159 #endif
1160 Dprintk("CPU present map: %lx\n",
1161 (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
1163 kicked = 1;
1164 for (cpu = 1; kicked < NR_CPUS &&
1165 cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
1166 if (max_cpus <= cpucount+1)
1167 continue;
1169 #ifdef CONFIG_SMP_ALTERNATIVES
1170 if (kicked == 1)
1171 prepare_for_smp();
1172 #endif
1173 if (do_boot_cpu(cpu))
1174 printk("CPU #%d not responding - cannot use it.\n",
1175 cpu);
1176 else
1177 ++kicked;
1180 #if 0
1181 /*
1182 * Cleanup possible dangling ends...
1183 */
1184 smpboot_restore_warm_reset_vector();
1185 #endif
1187 /*
1188 * Allow the user to impress friends.
1189 */
1190 Dprintk("Before bogomips.\n");
1191 for (cpu = 0; cpu < NR_CPUS; cpu++)
1192 if (cpu_isset(cpu, cpu_callout_map))
1193 bogosum += cpu_data[cpu].loops_per_jiffy;
1194 printk(KERN_INFO
1195 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
1196 cpucount+1,
1197 bogosum/(500000/HZ),
1198 (bogosum/(5000/HZ))%100);
1200 Dprintk("Before bogocount - setting activated=1.\n");
1202 if (smp_b_stepping)
1203 printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
1205 /*
1206 * Don't taint if we are running SMP kernel on a single non-MP
1207 * approved Athlon
1208 */
1209 if (tainted & TAINT_UNSAFE_SMP) {
1210 if (cpucount)
1211 printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
1212 else
1213 tainted &= ~TAINT_UNSAFE_SMP;
1216 Dprintk("Boot done.\n");
1218 /*
1219 * construct cpu_sibling_map[], so that we can tell sibling CPUs
1220 * efficiently.
1221 */
1222 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1223 cpus_clear(cpu_sibling_map[cpu]);
1224 cpus_clear(cpu_core_map[cpu]);
1227 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1228 struct cpuinfo_x86 *c = cpu_data + cpu;
1229 int siblings = 0;
1230 int i;
1231 if (!cpu_isset(cpu, cpu_callout_map))
1232 continue;
1234 if (smp_num_siblings > 1) {
1235 for (i = 0; i < NR_CPUS; i++) {
1236 if (!cpu_isset(i, cpu_callout_map))
1237 continue;
1238 if (cpu_core_id[cpu] == cpu_core_id[i]) {
1239 siblings++;
1240 cpu_set(i, cpu_sibling_map[cpu]);
1243 } else {
1244 siblings++;
1245 cpu_set(cpu, cpu_sibling_map[cpu]);
1248 if (siblings != smp_num_siblings) {
1249 printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
1250 smp_num_siblings = siblings;
1253 if (c->x86_num_cores > 1) {
1254 for (i = 0; i < NR_CPUS; i++) {
1255 if (!cpu_isset(i, cpu_callout_map))
1256 continue;
1257 if (phys_proc_id[cpu] == phys_proc_id[i]) {
1258 cpu_set(i, cpu_core_map[cpu]);
1261 } else {
1262 cpu_core_map[cpu] = cpu_sibling_map[cpu];
1266 smpboot_setup_io_apic();
1268 #if 0
1269 setup_boot_APIC_clock();
1271 /*
1272 * Synchronize the TSC with the AP
1273 */
1274 if (cpu_has_tsc && cpucount && cpu_khz)
1275 synchronize_tsc_bp();
1276 #endif
1279 /* These are wrappers to interface to the new boot process. Someone
1280 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1281 void __init smp_prepare_cpus(unsigned int max_cpus)
1283 smp_commenced_mask = cpumask_of_cpu(0);
1284 cpu_callin_map = cpumask_of_cpu(0);
1285 mb();
1286 smp_boot_cpus(max_cpus);
1289 void __devinit smp_prepare_boot_cpu(void)
1291 cpu_set(smp_processor_id(), cpu_online_map);
1292 cpu_set(smp_processor_id(), cpu_callout_map);
1295 #ifdef CONFIG_HOTPLUG_CPU
1296 #include <asm-xen/xenbus.h>
1297 /* hotplug down/up funtion pointer and target vcpu */
1298 struct vcpu_hotplug_handler_t {
1299 void (*fn) (int vcpu);
1300 u32 vcpu;
1301 };
1302 static struct vcpu_hotplug_handler_t vcpu_hotplug_handler;
1304 static int vcpu_hotplug_cpu_process(void *unused)
1306 struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
1308 if (handler->fn) {
1309 (*(handler->fn)) (handler->vcpu);
1310 handler->fn = NULL;
1312 return 0;
1315 static void __vcpu_hotplug_handler(void *unused)
1317 int err;
1319 err = kernel_thread(vcpu_hotplug_cpu_process,
1320 NULL, CLONE_FS | CLONE_FILES);
1321 if (err < 0)
1322 printk(KERN_ALERT "Error creating hotplug_cpu process!\n");
1325 static void handle_vcpu_hotplug_event(struct xenbus_watch *, const char *);
1326 static struct notifier_block xsn_cpu;
1328 /* xenbus watch struct */
1329 static struct xenbus_watch cpu_watch = {
1330 .node = "cpu",
1331 .callback = handle_vcpu_hotplug_event
1332 };
1334 /* NB: Assumes xenbus_lock is held! */
1335 static int setup_cpu_watcher(struct notifier_block *notifier,
1336 unsigned long event, void *data)
1338 int err = 0;
1340 BUG_ON(down_trylock(&xenbus_lock) == 0);
1341 err = register_xenbus_watch(&cpu_watch);
1343 if (err) {
1344 printk("Failed to register watch on /cpu\n");
1347 return NOTIFY_DONE;
1350 static void handle_vcpu_hotplug_event(struct xenbus_watch *watch, const char *node)
1352 static DECLARE_WORK(vcpu_hotplug_work, __vcpu_hotplug_handler, NULL);
1353 struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
1354 ssize_t ret;
1355 int err, cpu;
1356 char state[8];
1357 char dir[32];
1358 char *cpustr;
1360 /* get a pointer to start of cpu string */
1361 if ((cpustr = strstr(node, "cpu/")) != NULL) {
1363 /* find which cpu state changed, note vcpu for handler */
1364 sscanf(cpustr, "cpu/%d", &cpu);
1365 handler->vcpu = cpu;
1367 /* calc the dir for xenbus read */
1368 sprintf(dir, "cpu/%d", cpu);
1370 /* make sure watch that was triggered is changes to the correct key */
1371 if ((strcmp(node + strlen(dir), "/availability")) != 0)
1372 return;
1374 /* get the state value */
1375 err = xenbus_scanf(dir, "availability", "%s", state);
1377 if (err != 1) {
1378 printk(KERN_ERR
1379 "XENBUS: Unable to read cpu state\n");
1380 return;
1383 /* if we detect a state change, take action */
1384 if (strcmp(state, "online") == 0) {
1385 /* offline -> online */
1386 if (!cpu_isset(cpu, cpu_online_map)) {
1387 handler->fn = (void *)&cpu_up;
1388 ret = schedule_work(&vcpu_hotplug_work);
1390 } else if (strcmp(state, "offline") == 0) {
1391 /* online -> offline */
1392 if (cpu_isset(cpu, cpu_online_map)) {
1393 handler->fn = (void *)&cpu_down;
1394 ret = schedule_work(&vcpu_hotplug_work);
1396 } else {
1397 printk(KERN_ERR
1398 "XENBUS: unknown state(%s) on node(%s)\n", state,
1399 node);
1402 return;
1405 static int __init setup_vcpu_hotplug_event(void)
1407 xsn_cpu.notifier_call = setup_cpu_watcher;
1409 register_xenstore_notifier(&xsn_cpu);
1411 return 0;
1414 subsys_initcall(setup_vcpu_hotplug_event);
1416 /* must be called with the cpucontrol mutex held */
1417 static int __devinit cpu_enable(unsigned int cpu)
1419 #ifdef CONFIG_SMP_ALTERNATIVES
1420 if (num_online_cpus() == 1)
1421 prepare_for_smp();
1422 #endif
1424 /* get the target out of its holding state */
1425 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1426 wmb();
1428 /* wait for the processor to ack it. timeout? */
1429 while (!cpu_online(cpu))
1430 cpu_relax();
1432 fixup_irqs(cpu_online_map);
1434 /* counter the disable in fixup_irqs() */
1435 local_irq_enable();
1436 return 0;
1439 int __cpu_disable(void)
1441 cpumask_t map = cpu_online_map;
1442 int cpu = smp_processor_id();
1444 /*
1445 * Perhaps use cpufreq to drop frequency, but that could go
1446 * into generic code.
1448 * We won't take down the boot processor on i386 due to some
1449 * interrupts only being able to be serviced by the BSP.
1450 * Especially so if we're not using an IOAPIC -zwane
1451 */
1452 if (cpu == 0)
1453 return -EBUSY;
1455 cpu_clear(cpu, map);
1456 fixup_irqs(map);
1458 /* It's now safe to remove this processor from the online map */
1459 cpu_clear(cpu, cpu_online_map);
1461 #ifdef CONFIG_SMP_ALTERNATIVES
1462 if (num_online_cpus() == 1)
1463 unprepare_for_smp();
1464 #endif
1466 return 0;
1469 void __cpu_die(unsigned int cpu)
1471 /* We don't do anything here: idle task is faking death itself. */
1472 unsigned int i;
1474 for (i = 0; i < 10; i++) {
1475 /* They ack this in play_dead by setting CPU_DEAD */
1476 if (per_cpu(cpu_state, cpu) == CPU_DEAD)
1477 return;
1478 current->state = TASK_UNINTERRUPTIBLE;
1479 schedule_timeout(HZ/10);
1481 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1484 #else /* ... !CONFIG_HOTPLUG_CPU */
1485 int __cpu_disable(void)
1487 return -ENOSYS;
1490 void __cpu_die(unsigned int cpu)
1492 /* We said "no" in __cpu_disable */
1493 BUG();
1495 #endif /* CONFIG_HOTPLUG_CPU */
1497 int __devinit __cpu_up(unsigned int cpu)
1499 /* In case one didn't come up */
1500 if (!cpu_isset(cpu, cpu_callin_map)) {
1501 printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
1502 local_irq_enable();
1503 return -EIO;
1506 #ifdef CONFIG_HOTPLUG_CPU
1507 #ifdef CONFIG_XEN
1508 /* Tell hypervisor to bring vcpu up. */
1509 HYPERVISOR_vcpu_up(cpu);
1510 #endif
1511 /* Already up, and in cpu_quiescent now? */
1512 if (cpu_isset(cpu, smp_commenced_mask)) {
1513 cpu_enable(cpu);
1514 return 0;
1516 #endif
1518 local_irq_enable();
1519 /* Unleash the CPU! */
1520 cpu_set(cpu, smp_commenced_mask);
1521 while (!cpu_isset(cpu, cpu_online_map))
1522 mb();
1523 return 0;
1526 void __init smp_cpus_done(unsigned int max_cpus)
1528 #if 1
1529 #else
1530 #ifdef CONFIG_X86_IO_APIC
1531 setup_ioapic_dest();
1532 #endif
1533 zap_low_mappings();
1534 /*
1535 * Disable executability of the SMP trampoline:
1536 */
1537 set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
1538 #endif
1541 extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
1542 extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
1544 void smp_intr_init(void)
1546 int cpu = smp_processor_id();
1548 per_cpu(resched_irq, cpu) =
1549 bind_ipi_to_irq(RESCHEDULE_VECTOR);
1550 sprintf(resched_name[cpu], "resched%d", cpu);
1551 BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
1552 SA_INTERRUPT, resched_name[cpu], NULL));
1554 per_cpu(callfunc_irq, cpu) =
1555 bind_ipi_to_irq(CALL_FUNCTION_VECTOR);
1556 sprintf(callfunc_name[cpu], "callfunc%d", cpu);
1557 BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
1558 smp_call_function_interrupt,
1559 SA_INTERRUPT, callfunc_name[cpu], NULL));
1562 static void smp_intr_exit(void)
1564 int cpu = smp_processor_id();
1566 free_irq(per_cpu(resched_irq, cpu), NULL);
1567 unbind_ipi_from_irq(RESCHEDULE_VECTOR);
1569 free_irq(per_cpu(callfunc_irq, cpu), NULL);
1570 unbind_ipi_from_irq(CALL_FUNCTION_VECTOR);
1573 extern void local_setup_timer_irq(void);
1574 extern void local_teardown_timer_irq(void);
1576 void smp_suspend(void)
1578 local_teardown_timer_irq();
1579 smp_intr_exit();
1582 void smp_resume(void)
1584 smp_intr_init();
1585 local_setup_timer_irq();
1588 static atomic_t vcpus_rebooting;
1590 static void restore_vcpu_ready(void)
1593 atomic_dec(&vcpus_rebooting);
1596 void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
1598 int r;
1599 int gdt_pages;
1600 r = HYPERVISOR_vcpu_pickle(vcpu, ctxt);
1601 if (r != 0)
1602 panic("pickling vcpu %d -> %d!\n", vcpu, r);
1604 /* Translate from machine to physical addresses where necessary,
1605 so that they can be translated to our new machine address space
1606 after resume. libxc is responsible for doing this to vcpu0,
1607 but we do it to the others. */
1608 gdt_pages = (ctxt->gdt_ents + 511) / 512;
1609 ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]);
1610 for (r = 0; r < gdt_pages; r++)
1611 ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]);
1614 int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
1616 int r;
1617 int gdt_pages = (ctxt->gdt_ents + 511) / 512;
1619 /* This is kind of a hack, and implicitly relies on the fact that
1620 the vcpu stops in a place where all of the call clobbered
1621 registers are already dead. */
1622 ctxt->user_regs.esp -= 4;
1623 ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip;
1624 ctxt->user_regs.eip = (unsigned long)restore_vcpu_ready;
1626 /* De-canonicalise. libxc handles this for vcpu 0, but we need
1627 to do it for the other vcpus. */
1628 ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]);
1629 for (r = 0; r < gdt_pages; r++)
1630 ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]);
1632 atomic_set(&vcpus_rebooting, 1);
1633 r = HYPERVISOR_boot_vcpu(vcpu, ctxt);
1634 if (r != 0) {
1635 printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r);
1636 return -1;
1639 /* Make sure we wait for the new vcpu to come up before trying to do
1640 anything with it or starting the next one. */
1641 while (atomic_read(&vcpus_rebooting))
1642 barrier();
1644 return 0;