ia64/xen-unstable

view linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c @ 6432:b54144915ae6

merge?
author cl349@firebug.cl.cam.ac.uk
date Thu Aug 25 16:26:30 2005 +0000 (2005-08-25)
parents 3428d58a85e1 8d31f9a9c423
children 0610add7c3fe
line source
1 /*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 *
7 * Much of the core SMP work is based on previous work by Thomas Radke, to
8 * whom a great many thanks are extended.
9 *
10 * Thanks to Intel for making available several different Pentium,
11 * Pentium Pro and Pentium-II/Xeon MP machines.
12 * Original development of Linux SMP code supported by Caldera.
13 *
14 * This code is released under the GNU General Public License version 2 or
15 * later.
16 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIPS report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Martin J. Bligh : Added support for multi-quad systems
33 * Dave Jones : Report invalid combinations of Athlon CPUs.
34 * Rusty Russell : Hacked into shape for new "hotplug" boot process. */
36 #include <linux/module.h>
37 #include <linux/config.h>
38 #include <linux/init.h>
39 #include <linux/kernel.h>
41 #include <linux/mm.h>
42 #include <linux/sched.h>
43 #include <linux/kernel_stat.h>
44 #include <linux/smp_lock.h>
45 #include <linux/irq.h>
46 #include <linux/bootmem.h>
47 #include <linux/notifier.h>
48 #include <linux/cpu.h>
49 #include <linux/percpu.h>
51 #include <linux/delay.h>
52 #include <linux/mc146818rtc.h>
53 #include <asm/tlbflush.h>
54 #include <asm/desc.h>
55 #include <asm/arch_hooks.h>
57 #include <asm/smp_alt.h>
59 #ifndef CONFIG_X86_IO_APIC
60 #define Dprintk(args...)
61 #endif
62 #include <mach_wakecpu.h>
63 #include <smpboot_hooks.h>
65 #include <asm-xen/evtchn.h>
67 /* Set if we find a B stepping CPU */
68 static int __initdata smp_b_stepping;
70 /* Number of siblings per CPU package */
71 int smp_num_siblings = 1;
72 int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
73 EXPORT_SYMBOL(phys_proc_id);
74 int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
75 EXPORT_SYMBOL(cpu_core_id);
77 /* bitmap of online cpus */
78 cpumask_t cpu_online_map;
80 cpumask_t cpu_callin_map;
81 cpumask_t cpu_callout_map;
82 static cpumask_t smp_commenced_mask;
84 /* Per CPU bogomips and other parameters */
85 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
87 u8 x86_cpu_to_apicid[NR_CPUS] =
88 { [0 ... NR_CPUS-1] = 0xff };
89 EXPORT_SYMBOL(x86_cpu_to_apicid);
91 #if 0
92 /*
93 * Trampoline 80x86 program as an array.
94 */
96 extern unsigned char trampoline_data [];
97 extern unsigned char trampoline_end [];
98 static unsigned char *trampoline_base;
99 static int trampoline_exec;
100 #endif
102 #ifdef CONFIG_HOTPLUG_CPU
103 /* State of each CPU. */
104 DEFINE_PER_CPU(int, cpu_state) = { 0 };
105 #endif
107 static DEFINE_PER_CPU(int, resched_irq);
108 static DEFINE_PER_CPU(int, callfunc_irq);
109 static char resched_name[NR_CPUS][15];
110 static char callfunc_name[NR_CPUS][15];
112 #if 0
113 /*
114 * Currently trivial. Write the real->protected mode
115 * bootstrap into the page concerned. The caller
116 * has made sure it's suitably aligned.
117 */
119 static unsigned long __init setup_trampoline(void)
120 {
121 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
122 return virt_to_phys(trampoline_base);
123 }
124 #endif
126 static void map_cpu_to_logical_apicid(void);
128 /*
129 * We are called very early to get the low memory for the
130 * SMP bootup trampoline page.
131 */
132 void __init smp_alloc_memory(void)
133 {
134 #if 0
135 trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
136 /*
137 * Has to be in very low memory so we can execute
138 * real-mode AP code.
139 */
140 if (__pa(trampoline_base) >= 0x9F000)
141 BUG();
142 /*
143 * Make the SMP trampoline executable:
144 */
145 trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
146 #endif
147 }
149 /*
150 * The bootstrap kernel entry code has set these up. Save them for
151 * a given CPU
152 */
154 static void __init smp_store_cpu_info(int id)
155 {
156 struct cpuinfo_x86 *c = cpu_data + id;
158 *c = boot_cpu_data;
159 if (id!=0)
160 identify_cpu(c);
161 /*
162 * Mask B, Pentium, but not Pentium MMX
163 */
164 if (c->x86_vendor == X86_VENDOR_INTEL &&
165 c->x86 == 5 &&
166 c->x86_mask >= 1 && c->x86_mask <= 4 &&
167 c->x86_model <= 3)
168 /*
169 * Remember we have B step Pentia with bugs
170 */
171 smp_b_stepping = 1;
173 /*
174 * Certain Athlons might work (for various values of 'work') in SMP
175 * but they are not certified as MP capable.
176 */
177 if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
179 /* Athlon 660/661 is valid. */
180 if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
181 goto valid_k7;
183 /* Duron 670 is valid */
184 if ((c->x86_model==7) && (c->x86_mask==0))
185 goto valid_k7;
187 /*
188 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
189 * It's worth noting that the A5 stepping (662) of some Athlon XP's
190 * have the MP bit set.
191 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
192 */
193 if (((c->x86_model==6) && (c->x86_mask>=2)) ||
194 ((c->x86_model==7) && (c->x86_mask>=1)) ||
195 (c->x86_model> 7))
196 if (cpu_has_mp)
197 goto valid_k7;
199 /* If we get here, it's not a certified SMP capable AMD system. */
200 tainted |= TAINT_UNSAFE_SMP;
201 }
203 valid_k7:
204 ;
205 }
207 #if 0
208 /*
209 * TSC synchronization.
210 *
211 * We first check whether all CPUs have their TSC's synchronized,
212 * then we print a warning if not, and always resync.
213 */
215 static atomic_t tsc_start_flag = ATOMIC_INIT(0);
216 static atomic_t tsc_count_start = ATOMIC_INIT(0);
217 static atomic_t tsc_count_stop = ATOMIC_INIT(0);
218 static unsigned long long tsc_values[NR_CPUS];
220 #define NR_LOOPS 5
222 static void __init synchronize_tsc_bp (void)
223 {
224 int i;
225 unsigned long long t0;
226 unsigned long long sum, avg;
227 long long delta;
228 unsigned long one_usec;
229 int buggy = 0;
231 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
233 /* convert from kcyc/sec to cyc/usec */
234 one_usec = cpu_khz / 1000;
236 atomic_set(&tsc_start_flag, 1);
237 wmb();
239 /*
240 * We loop a few times to get a primed instruction cache,
241 * then the last pass is more or less synchronized and
242 * the BP and APs set their cycle counters to zero all at
243 * once. This reduces the chance of having random offsets
244 * between the processors, and guarantees that the maximum
245 * delay between the cycle counters is never bigger than
246 * the latency of information-passing (cachelines) between
247 * two CPUs.
248 */
249 for (i = 0; i < NR_LOOPS; i++) {
250 /*
251 * all APs synchronize but they loop on '== num_cpus'
252 */
253 while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
254 mb();
255 atomic_set(&tsc_count_stop, 0);
256 wmb();
257 /*
258 * this lets the APs save their current TSC:
259 */
260 atomic_inc(&tsc_count_start);
262 rdtscll(tsc_values[smp_processor_id()]);
263 /*
264 * We clear the TSC in the last loop:
265 */
266 if (i == NR_LOOPS-1)
267 write_tsc(0, 0);
269 /*
270 * Wait for all APs to leave the synchronization point:
271 */
272 while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
273 mb();
274 atomic_set(&tsc_count_start, 0);
275 wmb();
276 atomic_inc(&tsc_count_stop);
277 }
279 sum = 0;
280 for (i = 0; i < NR_CPUS; i++) {
281 if (cpu_isset(i, cpu_callout_map)) {
282 t0 = tsc_values[i];
283 sum += t0;
284 }
285 }
286 avg = sum;
287 do_div(avg, num_booting_cpus());
289 sum = 0;
290 for (i = 0; i < NR_CPUS; i++) {
291 if (!cpu_isset(i, cpu_callout_map))
292 continue;
293 delta = tsc_values[i] - avg;
294 if (delta < 0)
295 delta = -delta;
296 /*
297 * We report bigger than 2 microseconds clock differences.
298 */
299 if (delta > 2*one_usec) {
300 long realdelta;
301 if (!buggy) {
302 buggy = 1;
303 printk("\n");
304 }
305 realdelta = delta;
306 do_div(realdelta, one_usec);
307 if (tsc_values[i] < avg)
308 realdelta = -realdelta;
310 printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
311 }
313 sum += delta;
314 }
315 if (!buggy)
316 printk("passed.\n");
317 }
319 static void __init synchronize_tsc_ap (void)
320 {
321 int i;
323 /*
324 * Not every cpu is online at the time
325 * this gets called, so we first wait for the BP to
326 * finish SMP initialization:
327 */
328 while (!atomic_read(&tsc_start_flag)) mb();
330 for (i = 0; i < NR_LOOPS; i++) {
331 atomic_inc(&tsc_count_start);
332 while (atomic_read(&tsc_count_start) != num_booting_cpus())
333 mb();
335 rdtscll(tsc_values[smp_processor_id()]);
336 if (i == NR_LOOPS-1)
337 write_tsc(0, 0);
339 atomic_inc(&tsc_count_stop);
340 while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
341 }
342 }
343 #undef NR_LOOPS
344 #endif
346 extern void calibrate_delay(void);
348 static atomic_t init_deasserted;
350 static void __init smp_callin(void)
351 {
352 int cpuid, phys_id;
353 unsigned long timeout;
355 #if 0
356 /*
357 * If waken up by an INIT in an 82489DX configuration
358 * we may get here before an INIT-deassert IPI reaches
359 * our local APIC. We have to wait for the IPI or we'll
360 * lock up on an APIC access.
361 */
362 wait_for_init_deassert(&init_deasserted);
363 #endif
365 /*
366 * (This works even if the APIC is not enabled.)
367 */
368 phys_id = smp_processor_id();
369 cpuid = smp_processor_id();
370 if (cpu_isset(cpuid, cpu_callin_map)) {
371 printk("huh, phys CPU#%d, CPU#%d already present??\n",
372 phys_id, cpuid);
373 BUG();
374 }
375 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
377 /*
378 * STARTUP IPIs are fragile beasts as they might sometimes
379 * trigger some glue motherboard logic. Complete APIC bus
380 * silence for 1 second, this overestimates the time the
381 * boot CPU is spending to send the up to 2 STARTUP IPIs
382 * by a factor of two. This should be enough.
383 */
385 /*
386 * Waiting 2s total for startup (udelay is not yet working)
387 */
388 timeout = jiffies + 2*HZ;
389 while (time_before(jiffies, timeout)) {
390 /*
391 * Has the boot CPU finished it's STARTUP sequence?
392 */
393 if (cpu_isset(cpuid, cpu_callout_map))
394 break;
395 rep_nop();
396 }
398 if (!time_before(jiffies, timeout)) {
399 printk("BUG: CPU%d started up but did not get a callout!\n",
400 cpuid);
401 BUG();
402 }
404 #if 0
405 /*
406 * the boot CPU has finished the init stage and is spinning
407 * on callin_map until we finish. We are free to set up this
408 * CPU, first the APIC. (this is probably redundant on most
409 * boards)
410 */
412 Dprintk("CALLIN, before setup_local_APIC().\n");
413 smp_callin_clear_local_apic();
414 setup_local_APIC();
415 #endif
416 map_cpu_to_logical_apicid();
418 /*
419 * Get our bogomips.
420 */
421 calibrate_delay();
422 Dprintk("Stack at about %p\n",&cpuid);
424 /*
425 * Save our processor parameters
426 */
427 smp_store_cpu_info(cpuid);
429 #if 0
430 disable_APIC_timer();
431 #endif
433 /*
434 * Allow the master to continue.
435 */
436 cpu_set(cpuid, cpu_callin_map);
438 #if 0
439 /*
440 * Synchronize the TSC with the BP
441 */
442 if (cpu_has_tsc && cpu_khz)
443 synchronize_tsc_ap();
444 #endif
445 }
447 static int cpucount;
450 static irqreturn_t ldebug_interrupt(
451 int irq, void *dev_id, struct pt_regs *regs)
452 {
453 return IRQ_HANDLED;
454 }
456 static DEFINE_PER_CPU(int, ldebug_irq);
457 static char ldebug_name[NR_CPUS][15];
459 void ldebug_setup(void)
460 {
461 int cpu = smp_processor_id();
463 per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG);
464 sprintf(ldebug_name[cpu], "ldebug%d", cpu);
465 BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt,
466 SA_INTERRUPT, ldebug_name[cpu], NULL));
467 }
470 extern void local_setup_timer(void);
472 /*
473 * Activate a secondary processor.
474 */
475 static void __init start_secondary(void *unused)
476 {
477 /*
478 * Dont put anything before smp_callin(), SMP
479 * booting is too fragile that we want to limit the
480 * things done here to the most necessary things.
481 */
482 cpu_init();
483 smp_callin();
484 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
485 rep_nop();
486 local_setup_timer();
487 ldebug_setup();
488 smp_intr_init();
489 local_irq_enable();
490 /*
491 * low-memory mappings have been cleared, flush them from
492 * the local TLBs too.
493 */
494 local_flush_tlb();
495 cpu_set(smp_processor_id(), cpu_online_map);
497 /* We can take interrupts now: we're officially "up". */
498 local_irq_enable();
500 wmb();
501 cpu_idle();
502 }
504 /*
505 * Everything has been set up for the secondary
506 * CPUs - they just need to reload everything
507 * from the task structure
508 * This function must not return.
509 */
510 void __init initialize_secondary(void)
511 {
512 /*
513 * We don't actually need to load the full TSS,
514 * basically just the stack pointer and the eip.
515 */
517 asm volatile(
518 "movl %0,%%esp\n\t"
519 "jmp *%1"
520 :
521 :"r" (current->thread.esp),"r" (current->thread.eip));
522 }
524 extern struct {
525 void * esp;
526 unsigned short ss;
527 } stack_start;
529 #ifdef CONFIG_NUMA
531 /* which logical CPUs are on which nodes */
532 cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
533 { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
534 /* which node each logical CPU is on */
535 int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
536 EXPORT_SYMBOL(cpu_2_node);
538 /* set up a mapping between cpu and node. */
539 static inline void map_cpu_to_node(int cpu, int node)
540 {
541 printk("Mapping cpu %d to node %d\n", cpu, node);
542 cpu_set(cpu, node_2_cpu_mask[node]);
543 cpu_2_node[cpu] = node;
544 }
546 /* undo a mapping between cpu and node. */
547 static inline void unmap_cpu_to_node(int cpu)
548 {
549 int node;
551 printk("Unmapping cpu %d from all nodes\n", cpu);
552 for (node = 0; node < MAX_NUMNODES; node ++)
553 cpu_clear(cpu, node_2_cpu_mask[node]);
554 cpu_2_node[cpu] = 0;
555 }
556 #else /* !CONFIG_NUMA */
558 #define map_cpu_to_node(cpu, node) ({})
559 #define unmap_cpu_to_node(cpu) ({})
561 #endif /* CONFIG_NUMA */
563 u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
565 static void map_cpu_to_logical_apicid(void)
566 {
567 int cpu = smp_processor_id();
568 int apicid = smp_processor_id();
570 cpu_2_logical_apicid[cpu] = apicid;
571 map_cpu_to_node(cpu, apicid_to_node(apicid));
572 }
574 static void unmap_cpu_to_logical_apicid(int cpu)
575 {
576 cpu_2_logical_apicid[cpu] = BAD_APICID;
577 unmap_cpu_to_node(cpu);
578 }
580 #if APIC_DEBUG
581 static inline void __inquire_remote_apic(int apicid)
582 {
583 int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
584 char *names[] = { "ID", "VERSION", "SPIV" };
585 int timeout, status;
587 printk("Inquiring remote APIC #%d...\n", apicid);
589 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
590 printk("... APIC #%d %s: ", apicid, names[i]);
592 /*
593 * Wait for idle.
594 */
595 apic_wait_icr_idle();
597 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
598 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
600 timeout = 0;
601 do {
602 udelay(100);
603 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
604 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
606 switch (status) {
607 case APIC_ICR_RR_VALID:
608 status = apic_read(APIC_RRR);
609 printk("%08x\n", status);
610 break;
611 default:
612 printk("failed\n");
613 }
614 }
615 }
616 #endif
618 #if 0
619 #ifdef WAKE_SECONDARY_VIA_NMI
620 /*
621 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
622 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
623 * won't ... remember to clear down the APIC, etc later.
624 */
625 static int __init
626 wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
627 {
628 unsigned long send_status = 0, accept_status = 0;
629 int timeout, maxlvt;
631 /* Target chip */
632 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
634 /* Boot on the stack */
635 /* Kick the second */
636 apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
638 Dprintk("Waiting for send to finish...\n");
639 timeout = 0;
640 do {
641 Dprintk("+");
642 udelay(100);
643 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
644 } while (send_status && (timeout++ < 1000));
646 /*
647 * Give the other CPU some time to accept the IPI.
648 */
649 udelay(200);
650 /*
651 * Due to the Pentium erratum 3AP.
652 */
653 maxlvt = get_maxlvt();
654 if (maxlvt > 3) {
655 apic_read_around(APIC_SPIV);
656 apic_write(APIC_ESR, 0);
657 }
658 accept_status = (apic_read(APIC_ESR) & 0xEF);
659 Dprintk("NMI sent.\n");
661 if (send_status)
662 printk("APIC never delivered???\n");
663 if (accept_status)
664 printk("APIC delivery error (%lx).\n", accept_status);
666 return (send_status | accept_status);
667 }
668 #endif /* WAKE_SECONDARY_VIA_NMI */
670 #ifdef WAKE_SECONDARY_VIA_INIT
671 static int __init
672 wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
673 {
674 unsigned long send_status = 0, accept_status = 0;
675 int maxlvt, timeout, num_starts, j;
677 /*
678 * Be paranoid about clearing APIC errors.
679 */
680 if (APIC_INTEGRATED(apic_version[phys_apicid])) {
681 apic_read_around(APIC_SPIV);
682 apic_write(APIC_ESR, 0);
683 apic_read(APIC_ESR);
684 }
686 Dprintk("Asserting INIT.\n");
688 /*
689 * Turn INIT on target chip
690 */
691 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
693 /*
694 * Send IPI
695 */
696 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
697 | APIC_DM_INIT);
699 Dprintk("Waiting for send to finish...\n");
700 timeout = 0;
701 do {
702 Dprintk("+");
703 udelay(100);
704 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
705 } while (send_status && (timeout++ < 1000));
707 mdelay(10);
709 Dprintk("Deasserting INIT.\n");
711 /* Target chip */
712 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
714 /* Send IPI */
715 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
717 Dprintk("Waiting for send to finish...\n");
718 timeout = 0;
719 do {
720 Dprintk("+");
721 udelay(100);
722 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
723 } while (send_status && (timeout++ < 1000));
725 atomic_set(&init_deasserted, 1);
727 /*
728 * Should we send STARTUP IPIs ?
729 *
730 * Determine this based on the APIC version.
731 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
732 */
733 if (APIC_INTEGRATED(apic_version[phys_apicid]))
734 num_starts = 2;
735 else
736 num_starts = 0;
738 /*
739 * Run STARTUP IPI loop.
740 */
741 Dprintk("#startup loops: %d.\n", num_starts);
743 maxlvt = get_maxlvt();
745 for (j = 1; j <= num_starts; j++) {
746 Dprintk("Sending STARTUP #%d.\n",j);
747 apic_read_around(APIC_SPIV);
748 apic_write(APIC_ESR, 0);
749 apic_read(APIC_ESR);
750 Dprintk("After apic_write.\n");
752 /*
753 * STARTUP IPI
754 */
756 /* Target chip */
757 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
759 /* Boot on the stack */
760 /* Kick the second */
761 apic_write_around(APIC_ICR, APIC_DM_STARTUP
762 | (start_eip >> 12));
764 /*
765 * Give the other CPU some time to accept the IPI.
766 */
767 udelay(300);
769 Dprintk("Startup point 1.\n");
771 Dprintk("Waiting for send to finish...\n");
772 timeout = 0;
773 do {
774 Dprintk("+");
775 udelay(100);
776 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
777 } while (send_status && (timeout++ < 1000));
779 /*
780 * Give the other CPU some time to accept the IPI.
781 */
782 udelay(200);
783 /*
784 * Due to the Pentium erratum 3AP.
785 */
786 if (maxlvt > 3) {
787 apic_read_around(APIC_SPIV);
788 apic_write(APIC_ESR, 0);
789 }
790 accept_status = (apic_read(APIC_ESR) & 0xEF);
791 if (send_status || accept_status)
792 break;
793 }
794 Dprintk("After Startup.\n");
796 if (send_status)
797 printk("APIC never delivered???\n");
798 if (accept_status)
799 printk("APIC delivery error (%lx).\n", accept_status);
801 return (send_status | accept_status);
802 }
803 #endif /* WAKE_SECONDARY_VIA_INIT */
804 #endif
806 extern cpumask_t cpu_initialized;
808 static int __init do_boot_cpu(int apicid)
809 /*
810 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
811 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
812 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
813 */
814 {
815 struct task_struct *idle;
816 unsigned long boot_error;
817 int timeout, cpu;
818 unsigned long start_eip;
819 #if 0
820 unsigned short nmi_high = 0, nmi_low = 0;
821 #endif
822 vcpu_guest_context_t ctxt;
823 extern void startup_32_smp(void);
824 extern void hypervisor_callback(void);
825 extern void failsafe_callback(void);
826 extern void smp_trap_init(trap_info_t *);
827 int i;
829 cpu = ++cpucount;
830 /*
831 * We can't use kernel_thread since we must avoid to
832 * reschedule the child.
833 */
834 idle = fork_idle(cpu);
835 if (IS_ERR(idle))
836 panic("failed fork for CPU %d", cpu);
837 idle->thread.eip = (unsigned long) start_secondary;
838 /* start_eip had better be page-aligned! */
839 start_eip = (unsigned long)startup_32_smp;
841 /* So we see what's up */
842 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
843 /* Stack for startup_32 can be just as for start_secondary onwards */
844 stack_start.esp = (void *) idle->thread.esp;
846 irq_ctx_init(cpu);
848 /*
849 * This grunge runs the startup process for
850 * the targeted processor.
851 */
853 atomic_set(&init_deasserted, 0);
855 #if 1
856 cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL);
857 BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
858 cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
859 printk("GDT: copying %d bytes from %lx to %lx\n",
860 cpu_gdt_descr[0].size, cpu_gdt_descr[0].address,
861 cpu_gdt_descr[cpu].address);
862 memcpy((void *)cpu_gdt_descr[cpu].address,
863 (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
865 memset(&ctxt, 0, sizeof(ctxt));
867 ctxt.user_regs.ds = __USER_DS;
868 ctxt.user_regs.es = __USER_DS;
869 ctxt.user_regs.fs = 0;
870 ctxt.user_regs.gs = 0;
871 ctxt.user_regs.ss = __KERNEL_DS;
872 ctxt.user_regs.cs = __KERNEL_CS;
873 ctxt.user_regs.eip = start_eip;
874 ctxt.user_regs.esp = idle->thread.esp;
875 #define X86_EFLAGS_IOPL_RING1 0x1000
876 ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING1;
878 /* FPU is set up to default initial state. */
879 memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
881 /* Virtual IDT is empty at start-of-day. */
882 for ( i = 0; i < 256; i++ )
883 {
884 ctxt.trap_ctxt[i].vector = i;
885 ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS;
886 }
887 smp_trap_init(ctxt.trap_ctxt);
889 /* No LDT. */
890 ctxt.ldt_ents = 0;
892 {
893 unsigned long va;
894 int f;
896 for (va = cpu_gdt_descr[cpu].address, f = 0;
897 va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
898 va += PAGE_SIZE, f++) {
899 ctxt.gdt_frames[f] = virt_to_mfn(va);
900 make_page_readonly((void *)va);
901 }
902 ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
903 }
905 /* Ring 1 stack is the initial stack. */
906 ctxt.kernel_ss = __KERNEL_DS;
907 ctxt.kernel_sp = idle->thread.esp;
909 /* Callback handlers. */
910 ctxt.event_callback_cs = __KERNEL_CS;
911 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
912 ctxt.failsafe_callback_cs = __KERNEL_CS;
913 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
915 ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
917 boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
918 if (boot_error)
919 printk("boot error: %ld\n", boot_error);
921 if (!boot_error) {
922 /*
923 * allow APs to start initializing.
924 */
925 Dprintk("Before Callout %d.\n", cpu);
926 cpu_set(cpu, cpu_callout_map);
927 Dprintk("After Callout %d.\n", cpu);
929 /*
930 * Wait 5s total for a response
931 */
932 for (timeout = 0; timeout < 50000; timeout++) {
933 if (cpu_isset(cpu, cpu_callin_map))
934 break; /* It has booted */
935 udelay(100);
936 }
938 if (cpu_isset(cpu, cpu_callin_map)) {
939 /* number CPUs logically, starting from 1 (BSP is 0) */
940 Dprintk("OK.\n");
941 printk("CPU%d: ", cpu);
942 print_cpu_info(&cpu_data[cpu]);
943 Dprintk("CPU has booted.\n");
944 } else {
945 boot_error= 1;
946 }
947 }
948 x86_cpu_to_apicid[cpu] = apicid;
949 if (boot_error) {
950 /* Try to put things back the way they were before ... */
951 unmap_cpu_to_logical_apicid(cpu);
952 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
953 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
954 cpucount--;
955 }
957 #else
958 Dprintk("Setting warm reset code and vector.\n");
960 store_NMI_vector(&nmi_high, &nmi_low);
962 smpboot_setup_warm_reset_vector(start_eip);
964 /*
965 * Starting actual IPI sequence...
966 */
967 boot_error = wakeup_secondary_cpu(apicid, start_eip);
969 if (!boot_error) {
970 /*
971 * allow APs to start initializing.
972 */
973 Dprintk("Before Callout %d.\n", cpu);
974 cpu_set(cpu, cpu_callout_map);
975 Dprintk("After Callout %d.\n", cpu);
977 /*
978 * Wait 5s total for a response
979 */
980 for (timeout = 0; timeout < 50000; timeout++) {
981 if (cpu_isset(cpu, cpu_callin_map))
982 break; /* It has booted */
983 udelay(100);
984 }
986 if (cpu_isset(cpu, cpu_callin_map)) {
987 /* number CPUs logically, starting from 1 (BSP is 0) */
988 Dprintk("OK.\n");
989 printk("CPU%d: ", cpu);
990 print_cpu_info(&cpu_data[cpu]);
991 Dprintk("CPU has booted.\n");
992 } else {
993 boot_error= 1;
994 if (*((volatile unsigned char *)trampoline_base)
995 == 0xA5)
996 /* trampoline started but...? */
997 printk("Stuck ??\n");
998 else
999 /* trampoline code not run */
1000 printk("Not responding.\n");
1001 inquire_remote_apic(apicid);
1004 x86_cpu_to_apicid[cpu] = apicid;
1005 if (boot_error) {
1006 /* Try to put things back the way they were before ... */
1007 unmap_cpu_to_logical_apicid(cpu);
1008 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
1009 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
1010 cpucount--;
1013 /* mark "stuck" area as not stuck */
1014 *((volatile unsigned long *)trampoline_base) = 0;
1015 #endif
1017 return boot_error;
1020 static void smp_tune_scheduling (void)
1022 unsigned long cachesize; /* kB */
1023 unsigned long bandwidth = 350; /* MB/s */
1024 /*
1025 * Rough estimation for SMP scheduling, this is the number of
1026 * cycles it takes for a fully memory-limited process to flush
1027 * the SMP-local cache.
1029 * (For a P5 this pretty much means we will choose another idle
1030 * CPU almost always at wakeup time (this is due to the small
1031 * L1 cache), on PIIs it's around 50-100 usecs, depending on
1032 * the cache size)
1033 */
1035 if (!cpu_khz) {
1036 /*
1037 * this basically disables processor-affinity
1038 * scheduling on SMP without a TSC.
1039 */
1040 return;
1041 } else {
1042 cachesize = boot_cpu_data.x86_cache_size;
1043 if (cachesize == -1) {
1044 cachesize = 16; /* Pentiums, 2x8kB cache */
1045 bandwidth = 100;
1050 /*
1051 * Cycle through the processors sending APIC IPIs to boot each.
1052 */
1054 #if 0
1055 static int boot_cpu_logical_apicid;
1056 #endif
1057 /* Where the IO area was mapped on multiquad, always 0 otherwise */
1058 void *xquad_portio;
1060 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
1061 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
1062 EXPORT_SYMBOL(cpu_core_map);
1064 static void __init smp_boot_cpus(unsigned int max_cpus)
1066 int cpu, kicked;
1067 unsigned long bogosum = 0;
1068 #if 0
1069 int apicid, bit;
1070 #endif
1072 /*
1073 * Setup boot CPU information
1074 */
1075 smp_store_cpu_info(0); /* Final full version of the data */
1076 printk("CPU%d: ", 0);
1077 print_cpu_info(&cpu_data[0]);
1079 #if 0
1080 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
1081 boot_cpu_logical_apicid = logical_smp_processor_id();
1082 x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
1083 #else
1084 // boot_cpu_physical_apicid = 0;
1085 // boot_cpu_logical_apicid = 0;
1086 x86_cpu_to_apicid[0] = 0;
1087 #endif
1089 current_thread_info()->cpu = 0;
1090 smp_tune_scheduling();
1091 cpus_clear(cpu_sibling_map[0]);
1092 cpu_set(0, cpu_sibling_map[0]);
1094 cpus_clear(cpu_core_map[0]);
1095 cpu_set(0, cpu_core_map[0]);
1097 #ifdef CONFIG_X86_IO_APIC
1098 /*
1099 * If we couldn't find an SMP configuration at boot time,
1100 * get out of here now!
1101 */
1102 if (!smp_found_config && !acpi_lapic) {
1103 printk(KERN_NOTICE "SMP motherboard not detected.\n");
1104 smpboot_clear_io_apic_irqs();
1105 #if 0
1106 phys_cpu_present_map = physid_mask_of_physid(0);
1107 #endif
1108 #ifdef CONFIG_X86_LOCAL_APIC
1109 if (APIC_init_uniprocessor())
1110 printk(KERN_NOTICE "Local APIC not detected."
1111 " Using dummy APIC emulation.\n");
1112 #endif
1113 map_cpu_to_logical_apicid();
1114 cpu_set(0, cpu_sibling_map[0]);
1115 cpu_set(0, cpu_core_map[0]);
1116 return;
1118 #endif
1120 #if 0
1121 /*
1122 * Should not be necessary because the MP table should list the boot
1123 * CPU too, but we do it for the sake of robustness anyway.
1124 * Makes no sense to do this check in clustered apic mode, so skip it
1125 */
1126 if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
1127 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
1128 boot_cpu_physical_apicid);
1129 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1132 /*
1133 * If we couldn't find a local APIC, then get out of here now!
1134 */
1135 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
1136 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1137 boot_cpu_physical_apicid);
1138 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
1139 smpboot_clear_io_apic_irqs();
1140 phys_cpu_present_map = physid_mask_of_physid(0);
1141 cpu_set(0, cpu_sibling_map[0]);
1142 cpu_set(0, cpu_core_map[0]);
1143 cpu_set(0, cpu_sibling_map[0]);
1144 cpu_set(0, cpu_core_map[0]);
1145 return;
1148 verify_local_APIC();
1149 #endif
1151 /*
1152 * If SMP should be disabled, then really disable it!
1153 */
1154 if (!max_cpus) {
1155 HYPERVISOR_shared_info->n_vcpu = 1;
1156 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
1157 smpboot_clear_io_apic_irqs();
1158 #if 0
1159 phys_cpu_present_map = physid_mask_of_physid(0);
1160 #endif
1161 return;
1164 smp_intr_init();
1166 #if 0
1167 connect_bsp_APIC();
1168 setup_local_APIC();
1169 #endif
1170 map_cpu_to_logical_apicid();
1171 #if 0
1174 setup_portio_remap();
1176 /*
1177 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
1179 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
1180 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
1181 * clustered apic ID.
1182 */
1183 Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
1184 #endif
1185 Dprintk("CPU present map: %lx\n",
1186 (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
1188 kicked = 1;
1189 for (cpu = 1; kicked < NR_CPUS &&
1190 cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
1191 if (max_cpus <= cpucount+1)
1192 continue;
1194 #ifdef CONFIG_SMP_ALTERNATIVES
1195 if (kicked == 1)
1196 prepare_for_smp();
1197 #endif
1198 if (do_boot_cpu(cpu))
1199 printk("CPU #%d not responding - cannot use it.\n",
1200 cpu);
1201 else
1202 ++kicked;
1205 #if 0
1206 /*
1207 * Cleanup possible dangling ends...
1208 */
1209 smpboot_restore_warm_reset_vector();
1210 #endif
1212 /*
1213 * Allow the user to impress friends.
1214 */
1215 Dprintk("Before bogomips.\n");
1216 for (cpu = 0; cpu < NR_CPUS; cpu++)
1217 if (cpu_isset(cpu, cpu_callout_map))
1218 bogosum += cpu_data[cpu].loops_per_jiffy;
1219 printk(KERN_INFO
1220 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
1221 cpucount+1,
1222 bogosum/(500000/HZ),
1223 (bogosum/(5000/HZ))%100);
1225 Dprintk("Before bogocount - setting activated=1.\n");
1227 if (smp_b_stepping)
1228 printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
1230 /*
1231 * Don't taint if we are running SMP kernel on a single non-MP
1232 * approved Athlon
1233 */
1234 if (tainted & TAINT_UNSAFE_SMP) {
1235 if (cpucount)
1236 printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
1237 else
1238 tainted &= ~TAINT_UNSAFE_SMP;
1241 Dprintk("Boot done.\n");
1243 /*
1244 * construct cpu_sibling_map[], so that we can tell sibling CPUs
1245 * efficiently.
1246 */
1247 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1248 cpus_clear(cpu_sibling_map[cpu]);
1249 cpus_clear(cpu_core_map[cpu]);
1252 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1253 struct cpuinfo_x86 *c = cpu_data + cpu;
1254 int siblings = 0;
1255 int i;
1256 if (!cpu_isset(cpu, cpu_callout_map))
1257 continue;
1259 if (smp_num_siblings > 1) {
1260 for (i = 0; i < NR_CPUS; i++) {
1261 if (!cpu_isset(i, cpu_callout_map))
1262 continue;
1263 if (cpu_core_id[cpu] == cpu_core_id[i]) {
1264 siblings++;
1265 cpu_set(i, cpu_sibling_map[cpu]);
1268 } else {
1269 siblings++;
1270 cpu_set(cpu, cpu_sibling_map[cpu]);
1273 if (siblings != smp_num_siblings) {
1274 printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
1275 smp_num_siblings = siblings;
1277 if (c->x86_num_cores > 1) {
1278 for (i = 0; i < NR_CPUS; i++) {
1279 if (!cpu_isset(i, cpu_callout_map))
1280 continue;
1281 if (phys_proc_id[cpu] == phys_proc_id[i]) {
1282 cpu_set(i, cpu_core_map[cpu]);
1285 } else {
1286 cpu_core_map[cpu] = cpu_sibling_map[cpu];
1290 smpboot_setup_io_apic();
1292 #if 0
1293 setup_boot_APIC_clock();
1295 /*
1296 * Synchronize the TSC with the AP
1297 */
1298 if (cpu_has_tsc && cpucount && cpu_khz)
1299 synchronize_tsc_bp();
1300 #endif
1303 /* These are wrappers to interface to the new boot process. Someone
1304 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1305 void __init smp_prepare_cpus(unsigned int max_cpus)
1307 smp_commenced_mask = cpumask_of_cpu(0);
1308 cpu_callin_map = cpumask_of_cpu(0);
1309 mb();
1310 smp_boot_cpus(max_cpus);
1313 void __devinit smp_prepare_boot_cpu(void)
1315 cpu_set(smp_processor_id(), cpu_online_map);
1316 cpu_set(smp_processor_id(), cpu_callout_map);
1319 #ifdef CONFIG_HOTPLUG_CPU
1320 #include <asm-xen/xenbus.h>
1321 /* hotplug down/up funtion pointer and target vcpu */
1322 struct vcpu_hotplug_handler_t {
1323 void (*fn) (int vcpu);
1324 u32 vcpu;
1325 };
1326 static struct vcpu_hotplug_handler_t vcpu_hotplug_handler;
1328 static int vcpu_hotplug_cpu_process(void *unused)
1330 struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
1332 if (handler->fn) {
1333 (*(handler->fn)) (handler->vcpu);
1334 handler->fn = NULL;
1336 return 0;
1339 static void __vcpu_hotplug_handler(void *unused)
1341 int err;
1343 err = kernel_thread(vcpu_hotplug_cpu_process,
1344 NULL, CLONE_FS | CLONE_FILES);
1345 if (err < 0)
1346 printk(KERN_ALERT "Error creating hotplug_cpu process!\n");
1349 static void handle_vcpu_hotplug_event(struct xenbus_watch *, const char *);
1350 static struct notifier_block xsn_cpu;
1352 /* xenbus watch struct */
1353 static struct xenbus_watch cpu_watch = {
1354 .node = "cpu",
1355 .callback = handle_vcpu_hotplug_event
1356 };
1358 /* NB: Assumes xenbus_lock is held! */
1359 static int setup_cpu_watcher(struct notifier_block *notifier,
1360 unsigned long event, void *data)
1362 int err = 0;
1364 BUG_ON(down_trylock(&xenbus_lock) == 0);
1365 err = register_xenbus_watch(&cpu_watch);
1367 if (err) {
1368 printk("Failed to register watch on /cpu\n");
1371 return NOTIFY_DONE;
1374 static void handle_vcpu_hotplug_event(struct xenbus_watch *watch, const char *node)
1376 static DECLARE_WORK(vcpu_hotplug_work, __vcpu_hotplug_handler, NULL);
1377 struct vcpu_hotplug_handler_t *handler = &vcpu_hotplug_handler;
1378 ssize_t ret;
1379 int err, cpu;
1380 char state[8];
1381 char dir[32];
1382 char *cpustr;
1384 /* get a pointer to start of cpu string */
1385 if ((cpustr = strstr(node, "cpu/")) != NULL) {
1387 /* find which cpu state changed, note vcpu for handler */
1388 sscanf(cpustr, "cpu/%d", &cpu);
1389 handler->vcpu = cpu;
1391 /* calc the dir for xenbus read */
1392 sprintf(dir, "cpu/%d", cpu);
1394 /* make sure watch that was triggered is changes to the correct key */
1395 if ((strcmp(node + strlen(dir), "/availability")) != 0)
1396 return;
1398 /* get the state value */
1399 xenbus_transaction_start("cpu");
1400 err = xenbus_scanf(dir, "availability", "%s", state);
1401 xenbus_transaction_end(0);
1403 if (err != 1) {
1404 printk(KERN_ERR
1405 "XENBUS: Unable to read cpu state\n");
1406 return;
1409 /* if we detect a state change, take action */
1410 if (strcmp(state, "online") == 0) {
1411 /* offline -> online */
1412 if (!cpu_isset(cpu, cpu_online_map)) {
1413 handler->fn = (void *)&cpu_up;
1414 ret = schedule_work(&vcpu_hotplug_work);
1416 } else if (strcmp(state, "offline") == 0) {
1417 /* online -> offline */
1418 if (cpu_isset(cpu, cpu_online_map)) {
1419 handler->fn = (void *)&cpu_down;
1420 ret = schedule_work(&vcpu_hotplug_work);
1422 } else {
1423 printk(KERN_ERR
1424 "XENBUS: unknown state(%s) on node(%s)\n", state,
1425 node);
1428 return;
1431 static int __init setup_vcpu_hotplug_event(void)
1433 xsn_cpu.notifier_call = setup_cpu_watcher;
1435 register_xenstore_notifier(&xsn_cpu);
1437 return 0;
1440 subsys_initcall(setup_vcpu_hotplug_event);
1442 /* must be called with the cpucontrol mutex held */
1443 static int __devinit cpu_enable(unsigned int cpu)
1445 #ifdef CONFIG_SMP_ALTERNATIVES
1446 if (num_online_cpus() == 1)
1447 prepare_for_smp();
1448 #endif
1450 /* get the target out of its holding state */
1451 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1452 wmb();
1454 /* wait for the processor to ack it. timeout? */
1455 while (!cpu_online(cpu))
1456 cpu_relax();
1458 fixup_irqs(cpu_online_map);
1460 /* counter the disable in fixup_irqs() */
1461 local_irq_enable();
1462 return 0;
1465 int __cpu_disable(void)
1467 cpumask_t map = cpu_online_map;
1468 int cpu = smp_processor_id();
1470 /*
1471 * Perhaps use cpufreq to drop frequency, but that could go
1472 * into generic code.
1474 * We won't take down the boot processor on i386 due to some
1475 * interrupts only being able to be serviced by the BSP.
1476 * Especially so if we're not using an IOAPIC -zwane
1477 */
1478 if (cpu == 0)
1479 return -EBUSY;
1481 cpu_clear(cpu, map);
1482 fixup_irqs(map);
1484 /* It's now safe to remove this processor from the online map */
1485 cpu_clear(cpu, cpu_online_map);
1487 #ifdef CONFIG_SMP_ALTERNATIVES
1488 if (num_online_cpus() == 1)
1489 unprepare_for_smp();
1490 #endif
1492 return 0;
1495 void __cpu_die(unsigned int cpu)
1497 /* We don't do anything here: idle task is faking death itself. */
1498 unsigned int i;
1500 for (i = 0; i < 10; i++) {
1501 /* They ack this in play_dead by setting CPU_DEAD */
1502 if (per_cpu(cpu_state, cpu) == CPU_DEAD)
1503 return;
1504 current->state = TASK_UNINTERRUPTIBLE;
1505 schedule_timeout(HZ/10);
1507 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1510 #else /* ... !CONFIG_HOTPLUG_CPU */
1511 int __cpu_disable(void)
1513 return -ENOSYS;
1516 void __cpu_die(unsigned int cpu)
1518 /* We said "no" in __cpu_disable */
1519 BUG();
1521 #endif /* CONFIG_HOTPLUG_CPU */
1523 int __devinit __cpu_up(unsigned int cpu)
1525 /* In case one didn't come up */
1526 if (!cpu_isset(cpu, cpu_callin_map)) {
1527 printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
1528 local_irq_enable();
1529 return -EIO;
1532 #ifdef CONFIG_HOTPLUG_CPU
1533 #ifdef CONFIG_XEN
1534 /* Tell hypervisor to bring vcpu up. */
1535 HYPERVISOR_vcpu_up(cpu);
1536 #endif
1537 /* Already up, and in cpu_quiescent now? */
1538 if (cpu_isset(cpu, smp_commenced_mask)) {
1539 cpu_enable(cpu);
1540 return 0;
1542 #endif
1544 local_irq_enable();
1545 /* Unleash the CPU! */
1546 cpu_set(cpu, smp_commenced_mask);
1547 while (!cpu_isset(cpu, cpu_online_map))
1548 mb();
1549 return 0;
1552 void __init smp_cpus_done(unsigned int max_cpus)
1554 #if 1
1555 #else
1556 #ifdef CONFIG_X86_IO_APIC
1557 setup_ioapic_dest();
1558 #endif
1559 zap_low_mappings();
1560 /*
1561 * Disable executability of the SMP trampoline:
1562 */
1563 set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
1564 #endif
1567 extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
1568 extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
1570 void smp_intr_init(void)
1572 int cpu = smp_processor_id();
1574 per_cpu(resched_irq, cpu) =
1575 bind_ipi_to_irq(RESCHEDULE_VECTOR);
1576 sprintf(resched_name[cpu], "resched%d", cpu);
1577 BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
1578 SA_INTERRUPT, resched_name[cpu], NULL));
1580 per_cpu(callfunc_irq, cpu) =
1581 bind_ipi_to_irq(CALL_FUNCTION_VECTOR);
1582 sprintf(callfunc_name[cpu], "callfunc%d", cpu);
1583 BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
1584 smp_call_function_interrupt,
1585 SA_INTERRUPT, callfunc_name[cpu], NULL));
1588 static void smp_intr_exit(void)
1590 int cpu = smp_processor_id();
1592 free_irq(per_cpu(resched_irq, cpu), NULL);
1593 unbind_ipi_from_irq(RESCHEDULE_VECTOR);
1595 free_irq(per_cpu(callfunc_irq, cpu), NULL);
1596 unbind_ipi_from_irq(CALL_FUNCTION_VECTOR);
1599 extern void local_setup_timer_irq(void);
1600 extern void local_teardown_timer_irq(void);
1602 void smp_suspend(void)
1604 /* XXX todo: take down time and ipi's on all cpus */
1605 local_teardown_timer_irq();
1606 smp_intr_exit();
1609 void smp_resume(void)
1611 /* XXX todo: restore time and ipi's on all cpus */
1612 smp_intr_init();
1613 local_setup_timer_irq();
1616 DECLARE_PER_CPU(int, timer_irq);
1618 void _restore_vcpu(void)
1620 int cpu = smp_processor_id();
1621 extern atomic_t vcpus_rebooting;
1623 /* We are the first thing the vcpu runs when it comes back,
1624 and we are supposed to restore the IPIs and timer
1625 interrupts etc. When we return, the vcpu's idle loop will
1626 start up again. */
1627 _bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu));
1628 _bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu));
1629 _bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) );
1630 _bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu) );
1631 atomic_dec(&vcpus_rebooting);