ia64/xen-unstable

view linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c @ 6328:d8fd24b43080

No 'boot error' message if booting secondary vcpu succeeds.
author kaf24@firebug.cl.cam.ac.uk
date Mon Aug 22 10:18:14 2005 +0000 (2005-08-22)
parents 1a0723cd37f1
children 531ad4bde8f2
line source
1 /*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 *
7 * Much of the core SMP work is based on previous work by Thomas Radke, to
8 * whom a great many thanks are extended.
9 *
10 * Thanks to Intel for making available several different Pentium,
11 * Pentium Pro and Pentium-II/Xeon MP machines.
12 * Original development of Linux SMP code supported by Caldera.
13 *
14 * This code is released under the GNU General Public License version 2 or
15 * later.
16 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIPS report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Martin J. Bligh : Added support for multi-quad systems
33 * Dave Jones : Report invalid combinations of Athlon CPUs.
34 * Rusty Russell : Hacked into shape for new "hotplug" boot process. */
36 #include <linux/module.h>
37 #include <linux/config.h>
38 #include <linux/init.h>
39 #include <linux/kernel.h>
41 #include <linux/mm.h>
42 #include <linux/sched.h>
43 #include <linux/kernel_stat.h>
44 #include <linux/smp_lock.h>
45 #include <linux/irq.h>
46 #include <linux/bootmem.h>
47 #include <linux/notifier.h>
48 #include <linux/cpu.h>
49 #include <linux/percpu.h>
51 #include <linux/delay.h>
52 #include <linux/mc146818rtc.h>
53 #include <asm/tlbflush.h>
54 #include <asm/desc.h>
55 #include <asm/arch_hooks.h>
57 #include <asm/smp_alt.h>
59 #ifndef CONFIG_X86_IO_APIC
60 #define Dprintk(args...)
61 #endif
62 #include <mach_wakecpu.h>
63 #include <smpboot_hooks.h>
65 #include <asm-xen/evtchn.h>
67 /* Set if we find a B stepping CPU */
68 static int __initdata smp_b_stepping;
70 /* Number of siblings per CPU package */
71 int smp_num_siblings = 1;
72 int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
73 EXPORT_SYMBOL(phys_proc_id);
74 int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
75 EXPORT_SYMBOL(cpu_core_id);
77 /* bitmap of online cpus */
78 cpumask_t cpu_online_map;
80 cpumask_t cpu_callin_map;
81 cpumask_t cpu_callout_map;
82 static cpumask_t smp_commenced_mask;
84 /* Per CPU bogomips and other parameters */
85 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
87 u8 x86_cpu_to_apicid[NR_CPUS] =
88 { [0 ... NR_CPUS-1] = 0xff };
89 EXPORT_SYMBOL(x86_cpu_to_apicid);
91 #if 0
92 /*
93 * Trampoline 80x86 program as an array.
94 */
96 extern unsigned char trampoline_data [];
97 extern unsigned char trampoline_end [];
98 static unsigned char *trampoline_base;
99 static int trampoline_exec;
100 #endif
102 #ifdef CONFIG_HOTPLUG_CPU
103 /* State of each CPU. */
104 DEFINE_PER_CPU(int, cpu_state) = { 0 };
105 #endif
107 static DEFINE_PER_CPU(int, resched_irq);
108 static DEFINE_PER_CPU(int, callfunc_irq);
109 static char resched_name[NR_CPUS][15];
110 static char callfunc_name[NR_CPUS][15];
112 #if 0
113 /*
114 * Currently trivial. Write the real->protected mode
115 * bootstrap into the page concerned. The caller
116 * has made sure it's suitably aligned.
117 */
119 static unsigned long __init setup_trampoline(void)
120 {
121 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
122 return virt_to_phys(trampoline_base);
123 }
124 #endif
126 static void map_cpu_to_logical_apicid(void);
128 /*
129 * We are called very early to get the low memory for the
130 * SMP bootup trampoline page.
131 */
132 void __init smp_alloc_memory(void)
133 {
134 #if 1
135 int cpu;
137 for (cpu = 1; cpu < NR_CPUS; cpu++) {
138 cpu_gdt_descr[cpu].address = (unsigned long)
139 alloc_bootmem_low_pages(PAGE_SIZE);
140 /* XXX free unused pages later */
141 }
142 #else
143 trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
144 /*
145 * Has to be in very low memory so we can execute
146 * real-mode AP code.
147 */
148 if (__pa(trampoline_base) >= 0x9F000)
149 BUG();
150 /*
151 * Make the SMP trampoline executable:
152 */
153 trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
154 #endif
155 }
157 /*
158 * The bootstrap kernel entry code has set these up. Save them for
159 * a given CPU
160 */
162 static void __init smp_store_cpu_info(int id)
163 {
164 struct cpuinfo_x86 *c = cpu_data + id;
166 *c = boot_cpu_data;
167 if (id!=0)
168 identify_cpu(c);
169 /*
170 * Mask B, Pentium, but not Pentium MMX
171 */
172 if (c->x86_vendor == X86_VENDOR_INTEL &&
173 c->x86 == 5 &&
174 c->x86_mask >= 1 && c->x86_mask <= 4 &&
175 c->x86_model <= 3)
176 /*
177 * Remember we have B step Pentia with bugs
178 */
179 smp_b_stepping = 1;
181 /*
182 * Certain Athlons might work (for various values of 'work') in SMP
183 * but they are not certified as MP capable.
184 */
185 if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
187 /* Athlon 660/661 is valid. */
188 if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
189 goto valid_k7;
191 /* Duron 670 is valid */
192 if ((c->x86_model==7) && (c->x86_mask==0))
193 goto valid_k7;
195 /*
196 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
197 * It's worth noting that the A5 stepping (662) of some Athlon XP's
198 * have the MP bit set.
199 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
200 */
201 if (((c->x86_model==6) && (c->x86_mask>=2)) ||
202 ((c->x86_model==7) && (c->x86_mask>=1)) ||
203 (c->x86_model> 7))
204 if (cpu_has_mp)
205 goto valid_k7;
207 /* If we get here, it's not a certified SMP capable AMD system. */
208 tainted |= TAINT_UNSAFE_SMP;
209 }
211 valid_k7:
212 ;
213 }
215 #if 0
216 /*
217 * TSC synchronization.
218 *
219 * We first check whether all CPUs have their TSC's synchronized,
220 * then we print a warning if not, and always resync.
221 */
223 static atomic_t tsc_start_flag = ATOMIC_INIT(0);
224 static atomic_t tsc_count_start = ATOMIC_INIT(0);
225 static atomic_t tsc_count_stop = ATOMIC_INIT(0);
226 static unsigned long long tsc_values[NR_CPUS];
228 #define NR_LOOPS 5
230 static void __init synchronize_tsc_bp (void)
231 {
232 int i;
233 unsigned long long t0;
234 unsigned long long sum, avg;
235 long long delta;
236 unsigned long one_usec;
237 int buggy = 0;
239 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
241 /* convert from kcyc/sec to cyc/usec */
242 one_usec = cpu_khz / 1000;
244 atomic_set(&tsc_start_flag, 1);
245 wmb();
247 /*
248 * We loop a few times to get a primed instruction cache,
249 * then the last pass is more or less synchronized and
250 * the BP and APs set their cycle counters to zero all at
251 * once. This reduces the chance of having random offsets
252 * between the processors, and guarantees that the maximum
253 * delay between the cycle counters is never bigger than
254 * the latency of information-passing (cachelines) between
255 * two CPUs.
256 */
257 for (i = 0; i < NR_LOOPS; i++) {
258 /*
259 * all APs synchronize but they loop on '== num_cpus'
260 */
261 while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
262 mb();
263 atomic_set(&tsc_count_stop, 0);
264 wmb();
265 /*
266 * this lets the APs save their current TSC:
267 */
268 atomic_inc(&tsc_count_start);
270 rdtscll(tsc_values[smp_processor_id()]);
271 /*
272 * We clear the TSC in the last loop:
273 */
274 if (i == NR_LOOPS-1)
275 write_tsc(0, 0);
277 /*
278 * Wait for all APs to leave the synchronization point:
279 */
280 while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
281 mb();
282 atomic_set(&tsc_count_start, 0);
283 wmb();
284 atomic_inc(&tsc_count_stop);
285 }
287 sum = 0;
288 for (i = 0; i < NR_CPUS; i++) {
289 if (cpu_isset(i, cpu_callout_map)) {
290 t0 = tsc_values[i];
291 sum += t0;
292 }
293 }
294 avg = sum;
295 do_div(avg, num_booting_cpus());
297 sum = 0;
298 for (i = 0; i < NR_CPUS; i++) {
299 if (!cpu_isset(i, cpu_callout_map))
300 continue;
301 delta = tsc_values[i] - avg;
302 if (delta < 0)
303 delta = -delta;
304 /*
305 * We report bigger than 2 microseconds clock differences.
306 */
307 if (delta > 2*one_usec) {
308 long realdelta;
309 if (!buggy) {
310 buggy = 1;
311 printk("\n");
312 }
313 realdelta = delta;
314 do_div(realdelta, one_usec);
315 if (tsc_values[i] < avg)
316 realdelta = -realdelta;
318 printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
319 }
321 sum += delta;
322 }
323 if (!buggy)
324 printk("passed.\n");
325 }
327 static void __init synchronize_tsc_ap (void)
328 {
329 int i;
331 /*
332 * Not every cpu is online at the time
333 * this gets called, so we first wait for the BP to
334 * finish SMP initialization:
335 */
336 while (!atomic_read(&tsc_start_flag)) mb();
338 for (i = 0; i < NR_LOOPS; i++) {
339 atomic_inc(&tsc_count_start);
340 while (atomic_read(&tsc_count_start) != num_booting_cpus())
341 mb();
343 rdtscll(tsc_values[smp_processor_id()]);
344 if (i == NR_LOOPS-1)
345 write_tsc(0, 0);
347 atomic_inc(&tsc_count_stop);
348 while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
349 }
350 }
351 #undef NR_LOOPS
352 #endif
354 extern void calibrate_delay(void);
356 static atomic_t init_deasserted;
358 static void __init smp_callin(void)
359 {
360 int cpuid, phys_id;
361 unsigned long timeout;
363 #if 0
364 /*
365 * If waken up by an INIT in an 82489DX configuration
366 * we may get here before an INIT-deassert IPI reaches
367 * our local APIC. We have to wait for the IPI or we'll
368 * lock up on an APIC access.
369 */
370 wait_for_init_deassert(&init_deasserted);
371 #endif
373 /*
374 * (This works even if the APIC is not enabled.)
375 */
376 phys_id = smp_processor_id();
377 cpuid = smp_processor_id();
378 if (cpu_isset(cpuid, cpu_callin_map)) {
379 printk("huh, phys CPU#%d, CPU#%d already present??\n",
380 phys_id, cpuid);
381 BUG();
382 }
383 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
385 /*
386 * STARTUP IPIs are fragile beasts as they might sometimes
387 * trigger some glue motherboard logic. Complete APIC bus
388 * silence for 1 second, this overestimates the time the
389 * boot CPU is spending to send the up to 2 STARTUP IPIs
390 * by a factor of two. This should be enough.
391 */
393 /*
394 * Waiting 2s total for startup (udelay is not yet working)
395 */
396 timeout = jiffies + 2*HZ;
397 while (time_before(jiffies, timeout)) {
398 /*
399 * Has the boot CPU finished it's STARTUP sequence?
400 */
401 if (cpu_isset(cpuid, cpu_callout_map))
402 break;
403 rep_nop();
404 }
406 if (!time_before(jiffies, timeout)) {
407 printk("BUG: CPU%d started up but did not get a callout!\n",
408 cpuid);
409 BUG();
410 }
412 #if 0
413 /*
414 * the boot CPU has finished the init stage and is spinning
415 * on callin_map until we finish. We are free to set up this
416 * CPU, first the APIC. (this is probably redundant on most
417 * boards)
418 */
420 Dprintk("CALLIN, before setup_local_APIC().\n");
421 smp_callin_clear_local_apic();
422 setup_local_APIC();
423 #endif
424 map_cpu_to_logical_apicid();
426 /*
427 * Get our bogomips.
428 */
429 calibrate_delay();
430 Dprintk("Stack at about %p\n",&cpuid);
432 /*
433 * Save our processor parameters
434 */
435 smp_store_cpu_info(cpuid);
437 #if 0
438 disable_APIC_timer();
439 #endif
441 /*
442 * Allow the master to continue.
443 */
444 cpu_set(cpuid, cpu_callin_map);
446 #if 0
447 /*
448 * Synchronize the TSC with the BP
449 */
450 if (cpu_has_tsc && cpu_khz)
451 synchronize_tsc_ap();
452 #endif
453 }
455 static int cpucount;
458 static irqreturn_t ldebug_interrupt(
459 int irq, void *dev_id, struct pt_regs *regs)
460 {
461 return IRQ_HANDLED;
462 }
464 static DEFINE_PER_CPU(int, ldebug_irq);
465 static char ldebug_name[NR_CPUS][15];
467 void ldebug_setup(void)
468 {
469 int cpu = smp_processor_id();
471 per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG);
472 sprintf(ldebug_name[cpu], "ldebug%d", cpu);
473 BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt,
474 SA_INTERRUPT, ldebug_name[cpu], NULL));
475 }
478 extern void local_setup_timer(void);
480 /*
481 * Activate a secondary processor.
482 */
483 static void __init start_secondary(void *unused)
484 {
485 /*
486 * Dont put anything before smp_callin(), SMP
487 * booting is too fragile that we want to limit the
488 * things done here to the most necessary things.
489 */
490 cpu_init();
491 smp_callin();
492 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
493 rep_nop();
494 local_setup_timer();
495 ldebug_setup();
496 smp_intr_init();
497 local_irq_enable();
498 /*
499 * low-memory mappings have been cleared, flush them from
500 * the local TLBs too.
501 */
502 local_flush_tlb();
503 cpu_set(smp_processor_id(), cpu_online_map);
505 /* We can take interrupts now: we're officially "up". */
506 local_irq_enable();
508 wmb();
509 cpu_idle();
510 }
512 /*
513 * Everything has been set up for the secondary
514 * CPUs - they just need to reload everything
515 * from the task structure
516 * This function must not return.
517 */
518 void __init initialize_secondary(void)
519 {
520 /*
521 * We don't actually need to load the full TSS,
522 * basically just the stack pointer and the eip.
523 */
525 asm volatile(
526 "movl %0,%%esp\n\t"
527 "jmp *%1"
528 :
529 :"r" (current->thread.esp),"r" (current->thread.eip));
530 }
532 extern struct {
533 void * esp;
534 unsigned short ss;
535 } stack_start;
537 #ifdef CONFIG_NUMA
539 /* which logical CPUs are on which nodes */
540 cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
541 { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
542 /* which node each logical CPU is on */
543 int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
544 EXPORT_SYMBOL(cpu_2_node);
546 /* set up a mapping between cpu and node. */
547 static inline void map_cpu_to_node(int cpu, int node)
548 {
549 printk("Mapping cpu %d to node %d\n", cpu, node);
550 cpu_set(cpu, node_2_cpu_mask[node]);
551 cpu_2_node[cpu] = node;
552 }
554 /* undo a mapping between cpu and node. */
555 static inline void unmap_cpu_to_node(int cpu)
556 {
557 int node;
559 printk("Unmapping cpu %d from all nodes\n", cpu);
560 for (node = 0; node < MAX_NUMNODES; node ++)
561 cpu_clear(cpu, node_2_cpu_mask[node]);
562 cpu_2_node[cpu] = 0;
563 }
564 #else /* !CONFIG_NUMA */
566 #define map_cpu_to_node(cpu, node) ({})
567 #define unmap_cpu_to_node(cpu) ({})
569 #endif /* CONFIG_NUMA */
571 u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
573 static void map_cpu_to_logical_apicid(void)
574 {
575 int cpu = smp_processor_id();
576 int apicid = smp_processor_id();
578 cpu_2_logical_apicid[cpu] = apicid;
579 map_cpu_to_node(cpu, apicid_to_node(apicid));
580 }
582 static void unmap_cpu_to_logical_apicid(int cpu)
583 {
584 cpu_2_logical_apicid[cpu] = BAD_APICID;
585 unmap_cpu_to_node(cpu);
586 }
588 #if APIC_DEBUG
589 static inline void __inquire_remote_apic(int apicid)
590 {
591 int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
592 char *names[] = { "ID", "VERSION", "SPIV" };
593 int timeout, status;
595 printk("Inquiring remote APIC #%d...\n", apicid);
597 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
598 printk("... APIC #%d %s: ", apicid, names[i]);
600 /*
601 * Wait for idle.
602 */
603 apic_wait_icr_idle();
605 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
606 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
608 timeout = 0;
609 do {
610 udelay(100);
611 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
612 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
614 switch (status) {
615 case APIC_ICR_RR_VALID:
616 status = apic_read(APIC_RRR);
617 printk("%08x\n", status);
618 break;
619 default:
620 printk("failed\n");
621 }
622 }
623 }
624 #endif
626 #if 0
627 #ifdef WAKE_SECONDARY_VIA_NMI
628 /*
629 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
630 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
631 * won't ... remember to clear down the APIC, etc later.
632 */
633 static int __init
634 wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
635 {
636 unsigned long send_status = 0, accept_status = 0;
637 int timeout, maxlvt;
639 /* Target chip */
640 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
642 /* Boot on the stack */
643 /* Kick the second */
644 apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
646 Dprintk("Waiting for send to finish...\n");
647 timeout = 0;
648 do {
649 Dprintk("+");
650 udelay(100);
651 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
652 } while (send_status && (timeout++ < 1000));
654 /*
655 * Give the other CPU some time to accept the IPI.
656 */
657 udelay(200);
658 /*
659 * Due to the Pentium erratum 3AP.
660 */
661 maxlvt = get_maxlvt();
662 if (maxlvt > 3) {
663 apic_read_around(APIC_SPIV);
664 apic_write(APIC_ESR, 0);
665 }
666 accept_status = (apic_read(APIC_ESR) & 0xEF);
667 Dprintk("NMI sent.\n");
669 if (send_status)
670 printk("APIC never delivered???\n");
671 if (accept_status)
672 printk("APIC delivery error (%lx).\n", accept_status);
674 return (send_status | accept_status);
675 }
676 #endif /* WAKE_SECONDARY_VIA_NMI */
678 #ifdef WAKE_SECONDARY_VIA_INIT
679 static int __init
680 wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
681 {
682 unsigned long send_status = 0, accept_status = 0;
683 int maxlvt, timeout, num_starts, j;
685 /*
686 * Be paranoid about clearing APIC errors.
687 */
688 if (APIC_INTEGRATED(apic_version[phys_apicid])) {
689 apic_read_around(APIC_SPIV);
690 apic_write(APIC_ESR, 0);
691 apic_read(APIC_ESR);
692 }
694 Dprintk("Asserting INIT.\n");
696 /*
697 * Turn INIT on target chip
698 */
699 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
701 /*
702 * Send IPI
703 */
704 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
705 | APIC_DM_INIT);
707 Dprintk("Waiting for send to finish...\n");
708 timeout = 0;
709 do {
710 Dprintk("+");
711 udelay(100);
712 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
713 } while (send_status && (timeout++ < 1000));
715 mdelay(10);
717 Dprintk("Deasserting INIT.\n");
719 /* Target chip */
720 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
722 /* Send IPI */
723 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
725 Dprintk("Waiting for send to finish...\n");
726 timeout = 0;
727 do {
728 Dprintk("+");
729 udelay(100);
730 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
731 } while (send_status && (timeout++ < 1000));
733 atomic_set(&init_deasserted, 1);
735 /*
736 * Should we send STARTUP IPIs ?
737 *
738 * Determine this based on the APIC version.
739 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
740 */
741 if (APIC_INTEGRATED(apic_version[phys_apicid]))
742 num_starts = 2;
743 else
744 num_starts = 0;
746 /*
747 * Run STARTUP IPI loop.
748 */
749 Dprintk("#startup loops: %d.\n", num_starts);
751 maxlvt = get_maxlvt();
753 for (j = 1; j <= num_starts; j++) {
754 Dprintk("Sending STARTUP #%d.\n",j);
755 apic_read_around(APIC_SPIV);
756 apic_write(APIC_ESR, 0);
757 apic_read(APIC_ESR);
758 Dprintk("After apic_write.\n");
760 /*
761 * STARTUP IPI
762 */
764 /* Target chip */
765 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
767 /* Boot on the stack */
768 /* Kick the second */
769 apic_write_around(APIC_ICR, APIC_DM_STARTUP
770 | (start_eip >> 12));
772 /*
773 * Give the other CPU some time to accept the IPI.
774 */
775 udelay(300);
777 Dprintk("Startup point 1.\n");
779 Dprintk("Waiting for send to finish...\n");
780 timeout = 0;
781 do {
782 Dprintk("+");
783 udelay(100);
784 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
785 } while (send_status && (timeout++ < 1000));
787 /*
788 * Give the other CPU some time to accept the IPI.
789 */
790 udelay(200);
791 /*
792 * Due to the Pentium erratum 3AP.
793 */
794 if (maxlvt > 3) {
795 apic_read_around(APIC_SPIV);
796 apic_write(APIC_ESR, 0);
797 }
798 accept_status = (apic_read(APIC_ESR) & 0xEF);
799 if (send_status || accept_status)
800 break;
801 }
802 Dprintk("After Startup.\n");
804 if (send_status)
805 printk("APIC never delivered???\n");
806 if (accept_status)
807 printk("APIC delivery error (%lx).\n", accept_status);
809 return (send_status | accept_status);
810 }
811 #endif /* WAKE_SECONDARY_VIA_INIT */
812 #endif
814 extern cpumask_t cpu_initialized;
816 static int __init do_boot_cpu(int apicid)
817 /*
818 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
819 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
820 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
821 */
822 {
823 struct task_struct *idle;
824 unsigned long boot_error;
825 int timeout, cpu;
826 unsigned long start_eip;
827 #if 0
828 unsigned short nmi_high = 0, nmi_low = 0;
829 #endif
830 vcpu_guest_context_t ctxt;
831 extern void startup_32_smp(void);
832 extern void hypervisor_callback(void);
833 extern void failsafe_callback(void);
834 extern void smp_trap_init(trap_info_t *);
835 int i;
837 cpu = ++cpucount;
838 /*
839 * We can't use kernel_thread since we must avoid to
840 * reschedule the child.
841 */
842 idle = fork_idle(cpu);
843 if (IS_ERR(idle))
844 panic("failed fork for CPU %d", cpu);
845 idle->thread.eip = (unsigned long) start_secondary;
846 /* start_eip had better be page-aligned! */
847 start_eip = (unsigned long)startup_32_smp;
849 /* So we see what's up */
850 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
851 /* Stack for startup_32 can be just as for start_secondary onwards */
852 stack_start.esp = (void *) idle->thread.esp;
854 irq_ctx_init(cpu);
856 /*
857 * This grunge runs the startup process for
858 * the targeted processor.
859 */
861 atomic_set(&init_deasserted, 0);
863 #if 1
864 if (cpu_gdt_descr[0].size > PAGE_SIZE)
865 BUG();
866 cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
867 printk("GDT: copying %d bytes from %lx to %lx\n",
868 cpu_gdt_descr[0].size, cpu_gdt_descr[0].address,
869 cpu_gdt_descr[cpu].address);
870 memcpy((void *)cpu_gdt_descr[cpu].address,
871 (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
873 memset(&ctxt, 0, sizeof(ctxt));
875 ctxt.user_regs.ds = __USER_DS;
876 ctxt.user_regs.es = __USER_DS;
877 ctxt.user_regs.fs = 0;
878 ctxt.user_regs.gs = 0;
879 ctxt.user_regs.ss = __KERNEL_DS;
880 ctxt.user_regs.cs = __KERNEL_CS;
881 ctxt.user_regs.eip = start_eip;
882 ctxt.user_regs.esp = idle->thread.esp;
883 #define X86_EFLAGS_IOPL_RING1 0x1000
884 ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING1;
886 /* FPU is set up to default initial state. */
887 memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
889 /* Virtual IDT is empty at start-of-day. */
890 for ( i = 0; i < 256; i++ )
891 {
892 ctxt.trap_ctxt[i].vector = i;
893 ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS;
894 }
895 smp_trap_init(ctxt.trap_ctxt);
897 /* No LDT. */
898 ctxt.ldt_ents = 0;
900 {
901 unsigned long va;
902 int f;
904 for (va = cpu_gdt_descr[cpu].address, f = 0;
905 va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
906 va += PAGE_SIZE, f++) {
907 ctxt.gdt_frames[f] = virt_to_mfn(va);
908 make_page_readonly((void *)va);
909 }
910 ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
911 }
913 /* Ring 1 stack is the initial stack. */
914 ctxt.kernel_ss = __KERNEL_DS;
915 ctxt.kernel_sp = idle->thread.esp;
917 /* Callback handlers. */
918 ctxt.event_callback_cs = __KERNEL_CS;
919 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
920 ctxt.failsafe_callback_cs = __KERNEL_CS;
921 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
923 ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
925 boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
926 if (boot_error)
927 printk("boot error: %ld\n", boot_error);
929 if (!boot_error) {
930 /*
931 * allow APs to start initializing.
932 */
933 Dprintk("Before Callout %d.\n", cpu);
934 cpu_set(cpu, cpu_callout_map);
935 Dprintk("After Callout %d.\n", cpu);
937 /*
938 * Wait 5s total for a response
939 */
940 for (timeout = 0; timeout < 50000; timeout++) {
941 if (cpu_isset(cpu, cpu_callin_map))
942 break; /* It has booted */
943 udelay(100);
944 }
946 if (cpu_isset(cpu, cpu_callin_map)) {
947 /* number CPUs logically, starting from 1 (BSP is 0) */
948 Dprintk("OK.\n");
949 printk("CPU%d: ", cpu);
950 print_cpu_info(&cpu_data[cpu]);
951 Dprintk("CPU has booted.\n");
952 } else {
953 boot_error= 1;
954 }
955 }
956 x86_cpu_to_apicid[cpu] = apicid;
957 if (boot_error) {
958 /* Try to put things back the way they were before ... */
959 unmap_cpu_to_logical_apicid(cpu);
960 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
961 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
962 cpucount--;
963 }
965 #else
966 Dprintk("Setting warm reset code and vector.\n");
968 store_NMI_vector(&nmi_high, &nmi_low);
970 smpboot_setup_warm_reset_vector(start_eip);
972 /*
973 * Starting actual IPI sequence...
974 */
975 boot_error = wakeup_secondary_cpu(apicid, start_eip);
977 if (!boot_error) {
978 /*
979 * allow APs to start initializing.
980 */
981 Dprintk("Before Callout %d.\n", cpu);
982 cpu_set(cpu, cpu_callout_map);
983 Dprintk("After Callout %d.\n", cpu);
985 /*
986 * Wait 5s total for a response
987 */
988 for (timeout = 0; timeout < 50000; timeout++) {
989 if (cpu_isset(cpu, cpu_callin_map))
990 break; /* It has booted */
991 udelay(100);
992 }
994 if (cpu_isset(cpu, cpu_callin_map)) {
995 /* number CPUs logically, starting from 1 (BSP is 0) */
996 Dprintk("OK.\n");
997 printk("CPU%d: ", cpu);
998 print_cpu_info(&cpu_data[cpu]);
999 Dprintk("CPU has booted.\n");
1000 } else {
1001 boot_error= 1;
1002 if (*((volatile unsigned char *)trampoline_base)
1003 == 0xA5)
1004 /* trampoline started but...? */
1005 printk("Stuck ??\n");
1006 else
1007 /* trampoline code not run */
1008 printk("Not responding.\n");
1009 inquire_remote_apic(apicid);
1012 x86_cpu_to_apicid[cpu] = apicid;
1013 if (boot_error) {
1014 /* Try to put things back the way they were before ... */
1015 unmap_cpu_to_logical_apicid(cpu);
1016 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
1017 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
1018 cpucount--;
1021 /* mark "stuck" area as not stuck */
1022 *((volatile unsigned long *)trampoline_base) = 0;
1023 #endif
1025 return boot_error;
1028 static void smp_tune_scheduling (void)
1030 unsigned long cachesize; /* kB */
1031 unsigned long bandwidth = 350; /* MB/s */
1032 /*
1033 * Rough estimation for SMP scheduling, this is the number of
1034 * cycles it takes for a fully memory-limited process to flush
1035 * the SMP-local cache.
1037 * (For a P5 this pretty much means we will choose another idle
1038 * CPU almost always at wakeup time (this is due to the small
1039 * L1 cache), on PIIs it's around 50-100 usecs, depending on
1040 * the cache size)
1041 */
1043 if (!cpu_khz) {
1044 /*
1045 * this basically disables processor-affinity
1046 * scheduling on SMP without a TSC.
1047 */
1048 return;
1049 } else {
1050 cachesize = boot_cpu_data.x86_cache_size;
1051 if (cachesize == -1) {
1052 cachesize = 16; /* Pentiums, 2x8kB cache */
1053 bandwidth = 100;
1058 /*
1059 * Cycle through the processors sending APIC IPIs to boot each.
1060 */
1062 #if 0
1063 static int boot_cpu_logical_apicid;
1064 #endif
1065 /* Where the IO area was mapped on multiquad, always 0 otherwise */
1066 void *xquad_portio;
1068 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
1069 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
1070 EXPORT_SYMBOL(cpu_core_map);
1072 static void __init smp_boot_cpus(unsigned int max_cpus)
1074 int cpu, kicked;
1075 unsigned long bogosum = 0;
1076 #if 0
1077 int apicid, bit;
1078 #endif
1080 /*
1081 * Setup boot CPU information
1082 */
1083 smp_store_cpu_info(0); /* Final full version of the data */
1084 printk("CPU%d: ", 0);
1085 print_cpu_info(&cpu_data[0]);
1087 #if 0
1088 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
1089 boot_cpu_logical_apicid = logical_smp_processor_id();
1090 x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
1091 #else
1092 // boot_cpu_physical_apicid = 0;
1093 // boot_cpu_logical_apicid = 0;
1094 x86_cpu_to_apicid[0] = 0;
1095 #endif
1097 current_thread_info()->cpu = 0;
1098 smp_tune_scheduling();
1099 cpus_clear(cpu_sibling_map[0]);
1100 cpu_set(0, cpu_sibling_map[0]);
1102 cpus_clear(cpu_core_map[0]);
1103 cpu_set(0, cpu_core_map[0]);
1105 #ifdef CONFIG_X86_IO_APIC
1106 /*
1107 * If we couldn't find an SMP configuration at boot time,
1108 * get out of here now!
1109 */
1110 if (!smp_found_config && !acpi_lapic) {
1111 printk(KERN_NOTICE "SMP motherboard not detected.\n");
1112 smpboot_clear_io_apic_irqs();
1113 #if 0
1114 phys_cpu_present_map = physid_mask_of_physid(0);
1115 #endif
1116 #ifdef CONFIG_X86_LOCAL_APIC
1117 if (APIC_init_uniprocessor())
1118 printk(KERN_NOTICE "Local APIC not detected."
1119 " Using dummy APIC emulation.\n");
1120 #endif
1121 map_cpu_to_logical_apicid();
1122 cpu_set(0, cpu_sibling_map[0]);
1123 cpu_set(0, cpu_core_map[0]);
1124 return;
1126 #endif
1128 #if 0
1129 /*
1130 * Should not be necessary because the MP table should list the boot
1131 * CPU too, but we do it for the sake of robustness anyway.
1132 * Makes no sense to do this check in clustered apic mode, so skip it
1133 */
1134 if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
1135 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
1136 boot_cpu_physical_apicid);
1137 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1140 /*
1141 * If we couldn't find a local APIC, then get out of here now!
1142 */
1143 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
1144 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1145 boot_cpu_physical_apicid);
1146 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
1147 smpboot_clear_io_apic_irqs();
1148 phys_cpu_present_map = physid_mask_of_physid(0);
1149 cpu_set(0, cpu_sibling_map[0]);
1150 cpu_set(0, cpu_core_map[0]);
1151 cpu_set(0, cpu_sibling_map[0]);
1152 cpu_set(0, cpu_core_map[0]);
1153 return;
1156 verify_local_APIC();
1157 #endif
1159 /*
1160 * If SMP should be disabled, then really disable it!
1161 */
1162 if (!max_cpus) {
1163 HYPERVISOR_shared_info->n_vcpu = 1;
1164 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
1165 smpboot_clear_io_apic_irqs();
1166 #if 0
1167 phys_cpu_present_map = physid_mask_of_physid(0);
1168 #endif
1169 return;
1172 smp_intr_init();
1174 #if 0
1175 connect_bsp_APIC();
1176 setup_local_APIC();
1177 #endif
1178 map_cpu_to_logical_apicid();
1179 #if 0
1182 setup_portio_remap();
1184 /*
1185 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
1187 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
1188 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
1189 * clustered apic ID.
1190 */
1191 Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
1192 #endif
1193 Dprintk("CPU present map: %lx\n",
1194 (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
1196 kicked = 1;
1197 for (cpu = 1; kicked < NR_CPUS &&
1198 cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
1199 if (max_cpus <= cpucount+1)
1200 continue;
1202 #ifdef CONFIG_SMP_ALTERNATIVES
1203 if (kicked == 1)
1204 prepare_for_smp();
1205 #endif
1206 if (do_boot_cpu(cpu))
1207 printk("CPU #%d not responding - cannot use it.\n",
1208 cpu);
1209 else
1210 ++kicked;
1213 #if 0
1214 /*
1215 * Cleanup possible dangling ends...
1216 */
1217 smpboot_restore_warm_reset_vector();
1218 #endif
1220 /*
1221 * Allow the user to impress friends.
1222 */
1223 Dprintk("Before bogomips.\n");
1224 for (cpu = 0; cpu < NR_CPUS; cpu++)
1225 if (cpu_isset(cpu, cpu_callout_map))
1226 bogosum += cpu_data[cpu].loops_per_jiffy;
1227 printk(KERN_INFO
1228 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
1229 cpucount+1,
1230 bogosum/(500000/HZ),
1231 (bogosum/(5000/HZ))%100);
1233 Dprintk("Before bogocount - setting activated=1.\n");
1235 if (smp_b_stepping)
1236 printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
1238 /*
1239 * Don't taint if we are running SMP kernel on a single non-MP
1240 * approved Athlon
1241 */
1242 if (tainted & TAINT_UNSAFE_SMP) {
1243 if (cpucount)
1244 printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
1245 else
1246 tainted &= ~TAINT_UNSAFE_SMP;
1249 Dprintk("Boot done.\n");
1251 /*
1252 * construct cpu_sibling_map[], so that we can tell sibling CPUs
1253 * efficiently.
1254 */
1255 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1256 cpus_clear(cpu_sibling_map[cpu]);
1257 cpus_clear(cpu_core_map[cpu]);
1260 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1261 struct cpuinfo_x86 *c = cpu_data + cpu;
1262 int siblings = 0;
1263 int i;
1264 if (!cpu_isset(cpu, cpu_callout_map))
1265 continue;
1267 if (smp_num_siblings > 1) {
1268 for (i = 0; i < NR_CPUS; i++) {
1269 if (!cpu_isset(i, cpu_callout_map))
1270 continue;
1271 if (cpu_core_id[cpu] == cpu_core_id[i]) {
1272 siblings++;
1273 cpu_set(i, cpu_sibling_map[cpu]);
1276 } else {
1277 siblings++;
1278 cpu_set(cpu, cpu_sibling_map[cpu]);
1281 if (siblings != smp_num_siblings) {
1282 printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
1283 smp_num_siblings = siblings;
1285 if (c->x86_num_cores > 1) {
1286 for (i = 0; i < NR_CPUS; i++) {
1287 if (!cpu_isset(i, cpu_callout_map))
1288 continue;
1289 if (phys_proc_id[cpu] == phys_proc_id[i]) {
1290 cpu_set(i, cpu_core_map[cpu]);
1293 } else {
1294 cpu_core_map[cpu] = cpu_sibling_map[cpu];
1298 smpboot_setup_io_apic();
1300 #if 0
1301 setup_boot_APIC_clock();
1303 /*
1304 * Synchronize the TSC with the AP
1305 */
1306 if (cpu_has_tsc && cpucount && cpu_khz)
1307 synchronize_tsc_bp();
1308 #endif
1311 /* These are wrappers to interface to the new boot process. Someone
1312 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1313 void __init smp_prepare_cpus(unsigned int max_cpus)
1315 smp_commenced_mask = cpumask_of_cpu(0);
1316 cpu_callin_map = cpumask_of_cpu(0);
1317 mb();
1318 smp_boot_cpus(max_cpus);
1321 void __devinit smp_prepare_boot_cpu(void)
1323 cpu_set(smp_processor_id(), cpu_online_map);
1324 cpu_set(smp_processor_id(), cpu_callout_map);
1327 #ifdef CONFIG_HOTPLUG_CPU
1328 #include <asm-xen/xenbus.h>
1329 /* hotplug down/up funtion pointer and target vcpu */
1330 struct vcpu_hotplug_handler_t {
1331 void (*fn) (int vcpu);
1332 u32 vcpu;
1333 };
1334 static struct vcpu_hotplug_handler_t vcpu_hotplug_handler;
1336 static int vcpu_hotplug_cpu_process(void *unused)
1338 struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
1340 if (handler->fn) {
1341 (*(handler->fn)) (handler->vcpu);
1342 handler->fn = NULL;
1344 return 0;
1347 static void __vcpu_hotplug_handler(void *unused)
1349 int err;
1351 err = kernel_thread(vcpu_hotplug_cpu_process,
1352 NULL, CLONE_FS | CLONE_FILES);
1353 if (err < 0)
1354 printk(KERN_ALERT "Error creating hotplug_cpu process!\n");
1357 static void handle_vcpu_hotplug_event(struct xenbus_watch *, const char *);
1358 static struct notifier_block xsn_cpu;
1360 /* xenbus watch struct */
1361 static struct xenbus_watch cpu_watch = {
1362 .node = "cpu",
1363 .callback = handle_vcpu_hotplug_event
1364 };
1366 /* NB: Assumes xenbus_lock is held! */
1367 static int setup_cpu_watcher(struct notifier_block *notifier,
1368 unsigned long event, void *data)
1370 int err = 0;
1372 BUG_ON(down_trylock(&xenbus_lock) == 0);
1373 err = register_xenbus_watch(&cpu_watch);
1375 if (err) {
1376 printk("Failed to register watch on /cpu\n");
1379 return NOTIFY_DONE;
1382 static void handle_vcpu_hotplug_event(struct xenbus_watch *watch, const char *node)
1384 static DECLARE_WORK(vcpu_hotplug_work, __vcpu_hotplug_handler, NULL);
1385 struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
1386 ssize_t ret;
1387 int err, cpu;
1388 char state[8];
1389 char dir[32];
1390 char *cpustr;
1392 /* get a pointer to start of cpu string */
1393 if ((cpustr = strstr(node, "cpu/")) != NULL) {
1395 /* find which cpu state changed, note vcpu for handler */
1396 sscanf(cpustr, "cpu/%d", &cpu);
1397 handler->vcpu = cpu;
1399 /* calc the dir for xenbus read */
1400 sprintf(dir, "cpu/%d", cpu);
1402 /* make sure watch that was triggered is changes to the correct key */
1403 if ((strcmp(node + strlen(dir), "/availability")) != 0)
1404 return;
1406 /* get the state value */
1407 xenbus_transaction_start("cpu");
1408 err = xenbus_scanf(dir, "availability", "%s", state);
1409 xenbus_transaction_end(0);
1411 if (err != 1) {
1412 printk(KERN_ERR
1413 "XENBUS: Unable to read cpu state\n");
1414 return;
1417 /* if we detect a state change, take action */
1418 if (strcmp(state, "online") == 0) {
1419 /* offline -> online */
1420 if (!cpu_isset(cpu, cpu_online_map)) {
1421 handler->fn = (void *)&cpu_up;
1422 ret = schedule_work(&vcpu_hotplug_work);
1424 } else if (strcmp(state, "offline") == 0) {
1425 /* online -> offline */
1426 if (cpu_isset(cpu, cpu_online_map)) {
1427 handler->fn = (void *)&cpu_down;
1428 ret = schedule_work(&vcpu_hotplug_work);
1430 } else {
1431 printk(KERN_ERR
1432 "XENBUS: unknown state(%s) on node(%s)\n", state,
1433 node);
1436 return;
1439 static int __init setup_vcpu_hotplug_event(void)
1441 xsn_cpu.notifier_call = setup_cpu_watcher;
1443 register_xenstore_notifier(&xsn_cpu);
1445 return 0;
1448 subsys_initcall(setup_vcpu_hotplug_event);
1450 /* must be called with the cpucontrol mutex held */
1451 static int __devinit cpu_enable(unsigned int cpu)
1453 #ifdef CONFIG_SMP_ALTERNATIVES
1454 if (num_online_cpus() == 1)
1455 prepare_for_smp();
1456 #endif
1458 /* get the target out of its holding state */
1459 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1460 wmb();
1462 /* wait for the processor to ack it. timeout? */
1463 while (!cpu_online(cpu))
1464 cpu_relax();
1466 fixup_irqs(cpu_online_map);
1468 /* counter the disable in fixup_irqs() */
1469 local_irq_enable();
1470 return 0;
1473 int __cpu_disable(void)
1475 cpumask_t map = cpu_online_map;
1476 int cpu = smp_processor_id();
1478 /*
1479 * Perhaps use cpufreq to drop frequency, but that could go
1480 * into generic code.
1482 * We won't take down the boot processor on i386 due to some
1483 * interrupts only being able to be serviced by the BSP.
1484 * Especially so if we're not using an IOAPIC -zwane
1485 */
1486 if (cpu == 0)
1487 return -EBUSY;
1489 cpu_clear(cpu, map);
1490 fixup_irqs(map);
1492 /* It's now safe to remove this processor from the online map */
1493 cpu_clear(cpu, cpu_online_map);
1495 #ifdef CONFIG_SMP_ALTERNATIVES
1496 if (num_online_cpus() == 1)
1497 unprepare_for_smp();
1498 #endif
1500 return 0;
1503 void __cpu_die(unsigned int cpu)
1505 /* We don't do anything here: idle task is faking death itself. */
1506 unsigned int i;
1508 for (i = 0; i < 10; i++) {
1509 /* They ack this in play_dead by setting CPU_DEAD */
1510 if (per_cpu(cpu_state, cpu) == CPU_DEAD)
1511 return;
1512 current->state = TASK_UNINTERRUPTIBLE;
1513 schedule_timeout(HZ/10);
1515 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1518 #else /* ... !CONFIG_HOTPLUG_CPU */
1519 int __cpu_disable(void)
1521 return -ENOSYS;
1524 void __cpu_die(unsigned int cpu)
1526 /* We said "no" in __cpu_disable */
1527 BUG();
1529 #endif /* CONFIG_HOTPLUG_CPU */
1531 int __devinit __cpu_up(unsigned int cpu)
1533 /* In case one didn't come up */
1534 if (!cpu_isset(cpu, cpu_callin_map)) {
1535 printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
1536 local_irq_enable();
1537 return -EIO;
1540 #ifdef CONFIG_HOTPLUG_CPU
1541 #ifdef CONFIG_XEN
1542 /* Tell hypervisor to bring vcpu up. */
1543 HYPERVISOR_vcpu_up(cpu);
1544 #endif
1545 /* Already up, and in cpu_quiescent now? */
1546 if (cpu_isset(cpu, smp_commenced_mask)) {
1547 cpu_enable(cpu);
1548 return 0;
1550 #endif
1552 local_irq_enable();
1553 /* Unleash the CPU! */
1554 cpu_set(cpu, smp_commenced_mask);
1555 while (!cpu_isset(cpu, cpu_online_map))
1556 mb();
1557 return 0;
1560 void __init smp_cpus_done(unsigned int max_cpus)
1562 #if 1
1563 #else
1564 #ifdef CONFIG_X86_IO_APIC
1565 setup_ioapic_dest();
1566 #endif
1567 zap_low_mappings();
1568 /*
1569 * Disable executability of the SMP trampoline:
1570 */
1571 set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
1572 #endif
1575 extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
1576 extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
1578 void smp_intr_init(void)
1580 int cpu = smp_processor_id();
1582 per_cpu(resched_irq, cpu) =
1583 bind_ipi_to_irq(RESCHEDULE_VECTOR);
1584 sprintf(resched_name[cpu], "resched%d", cpu);
1585 BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
1586 SA_INTERRUPT, resched_name[cpu], NULL));
1588 per_cpu(callfunc_irq, cpu) =
1589 bind_ipi_to_irq(CALL_FUNCTION_VECTOR);
1590 sprintf(callfunc_name[cpu], "callfunc%d", cpu);
1591 BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
1592 smp_call_function_interrupt,
1593 SA_INTERRUPT, callfunc_name[cpu], NULL));
1596 static void smp_intr_exit(void)
1598 int cpu = smp_processor_id();
1600 free_irq(per_cpu(resched_irq, cpu), NULL);
1601 unbind_ipi_from_irq(RESCHEDULE_VECTOR);
1603 free_irq(per_cpu(callfunc_irq, cpu), NULL);
1604 unbind_ipi_from_irq(CALL_FUNCTION_VECTOR);
1607 void smp_suspend(void)
1609 /* XXX todo: take down time and ipi's on all cpus */
1610 local_teardown_timer_irq();
1611 smp_intr_exit();
1614 void smp_resume(void)
1616 /* XXX todo: restore time and ipi's on all cpus */
1617 smp_intr_init();
1618 local_setup_timer_irq();
1621 DECLARE_PER_CPU(int, timer_irq);
1623 void _restore_vcpu(void)
1625 int cpu = smp_processor_id();
1626 extern atomic_t vcpus_rebooting;
1628 /* We are the first thing the vcpu runs when it comes back,
1629 and we are supposed to restore the IPIs and timer
1630 interrupts etc. When we return, the vcpu's idle loop will
1631 start up again. */
1632 _bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu));
1633 _bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu));
1634 _bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) );
1635 _bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu) );
1636 atomic_dec(&vcpus_rebooting);