ia64/xen-unstable

view linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c @ 4828:4f1ddb677017

bitkeeper revision 1.1389.5.52 (4280b9beG5xwTCb0TZxrZ0Cx_ku3fQ)

smpboot.c:
start_secondary doesn't return anything.
Dprintk gets defined in apic.h now.
Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Tue May 10 13:40:14 2005 +0000 (2005-05-10)
parents 1311d7d58c41
children 6d36a84988c8
line source
1 /*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 *
7 * Much of the core SMP work is based on previous work by Thomas Radke, to
8 * whom a great many thanks are extended.
9 *
10 * Thanks to Intel for making available several different Pentium,
11 * Pentium Pro and Pentium-II/Xeon MP machines.
12 * Original development of Linux SMP code supported by Caldera.
13 *
14 * This code is released under the GNU General Public License version 2 or
15 * later.
16 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIPS report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Martin J. Bligh : Added support for multi-quad systems
33 * Dave Jones : Report invalid combinations of Athlon CPUs.
34 * Rusty Russell : Hacked into shape for new "hotplug" boot process. */
36 #include <linux/module.h>
37 #include <linux/config.h>
38 #include <linux/init.h>
39 #include <linux/kernel.h>
41 #include <linux/mm.h>
42 #include <linux/sched.h>
43 #include <linux/kernel_stat.h>
44 #include <linux/smp_lock.h>
45 #include <linux/irq.h>
46 #include <linux/bootmem.h>
48 #include <linux/delay.h>
49 #include <linux/mc146818rtc.h>
50 #include <asm/tlbflush.h>
51 #include <asm/desc.h>
52 #include <asm/arch_hooks.h>
54 #include <mach_wakecpu.h>
55 #include <smpboot_hooks.h>
57 /* Set if we find a B stepping CPU */
58 static int __initdata smp_b_stepping;
60 /* Number of siblings per CPU package */
61 int smp_num_siblings = 1;
62 int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
63 EXPORT_SYMBOL(phys_proc_id);
65 /* bitmap of online cpus */
66 cpumask_t cpu_online_map;
68 cpumask_t cpu_callin_map;
69 cpumask_t cpu_callout_map;
70 static cpumask_t smp_commenced_mask;
72 /* Per CPU bogomips and other parameters */
73 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
75 u8 x86_cpu_to_apicid[NR_CPUS] =
76 { [0 ... NR_CPUS-1] = 0xff };
77 EXPORT_SYMBOL(x86_cpu_to_apicid);
79 /* Set when the idlers are all forked */
80 int smp_threads_ready;
82 #if 0
83 /*
84 * Trampoline 80x86 program as an array.
85 */
87 extern unsigned char trampoline_data [];
88 extern unsigned char trampoline_end [];
89 static unsigned char *trampoline_base;
90 static int trampoline_exec;
92 /*
93 * Currently trivial. Write the real->protected mode
94 * bootstrap into the page concerned. The caller
95 * has made sure it's suitably aligned.
96 */
98 static unsigned long __init setup_trampoline(void)
99 {
100 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
101 return virt_to_phys(trampoline_base);
102 }
103 #endif
105 /*
106 * We are called very early to get the low memory for the
107 * SMP bootup trampoline page.
108 */
109 void __init smp_alloc_memory(void)
110 {
111 #if 1
112 int cpu;
114 for (cpu = 1; cpu < NR_CPUS; cpu++) {
115 cpu_gdt_descr[cpu].address = (unsigned long)
116 alloc_bootmem_low_pages(PAGE_SIZE);
117 /* XXX free unused pages later */
118 }
119 #else
120 trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
121 /*
122 * Has to be in very low memory so we can execute
123 * real-mode AP code.
124 */
125 if (__pa(trampoline_base) >= 0x9F000)
126 BUG();
127 /*
128 * Make the SMP trampoline executable:
129 */
130 trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
131 #endif
132 }
134 /*
135 * The bootstrap kernel entry code has set these up. Save them for
136 * a given CPU
137 */
139 static void __init smp_store_cpu_info(int id)
140 {
141 struct cpuinfo_x86 *c = cpu_data + id;
143 *c = boot_cpu_data;
144 if (id!=0)
145 identify_cpu(c);
146 /*
147 * Mask B, Pentium, but not Pentium MMX
148 */
149 if (c->x86_vendor == X86_VENDOR_INTEL &&
150 c->x86 == 5 &&
151 c->x86_mask >= 1 && c->x86_mask <= 4 &&
152 c->x86_model <= 3)
153 /*
154 * Remember we have B step Pentia with bugs
155 */
156 smp_b_stepping = 1;
158 /*
159 * Certain Athlons might work (for various values of 'work') in SMP
160 * but they are not certified as MP capable.
161 */
162 if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
164 /* Athlon 660/661 is valid. */
165 if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
166 goto valid_k7;
168 /* Duron 670 is valid */
169 if ((c->x86_model==7) && (c->x86_mask==0))
170 goto valid_k7;
172 /*
173 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
174 * It's worth noting that the A5 stepping (662) of some Athlon XP's
175 * have the MP bit set.
176 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
177 */
178 if (((c->x86_model==6) && (c->x86_mask>=2)) ||
179 ((c->x86_model==7) && (c->x86_mask>=1)) ||
180 (c->x86_model> 7))
181 if (cpu_has_mp)
182 goto valid_k7;
184 /* If we get here, it's not a certified SMP capable AMD system. */
185 tainted |= TAINT_UNSAFE_SMP;
186 }
188 valid_k7:
189 ;
190 }
192 #if 0
193 /*
194 * TSC synchronization.
195 *
196 * We first check whether all CPUs have their TSC's synchronized,
197 * then we print a warning if not, and always resync.
198 */
200 static atomic_t tsc_start_flag = ATOMIC_INIT(0);
201 static atomic_t tsc_count_start = ATOMIC_INIT(0);
202 static atomic_t tsc_count_stop = ATOMIC_INIT(0);
203 static unsigned long long tsc_values[NR_CPUS];
205 #define NR_LOOPS 5
207 static void __init synchronize_tsc_bp (void)
208 {
209 int i;
210 unsigned long long t0;
211 unsigned long long sum, avg;
212 long long delta;
213 unsigned long one_usec;
214 int buggy = 0;
216 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
218 /* convert from kcyc/sec to cyc/usec */
219 one_usec = cpu_khz / 1000;
221 atomic_set(&tsc_start_flag, 1);
222 wmb();
224 /*
225 * We loop a few times to get a primed instruction cache,
226 * then the last pass is more or less synchronized and
227 * the BP and APs set their cycle counters to zero all at
228 * once. This reduces the chance of having random offsets
229 * between the processors, and guarantees that the maximum
230 * delay between the cycle counters is never bigger than
231 * the latency of information-passing (cachelines) between
232 * two CPUs.
233 */
234 for (i = 0; i < NR_LOOPS; i++) {
235 /*
236 * all APs synchronize but they loop on '== num_cpus'
237 */
238 while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
239 mb();
240 atomic_set(&tsc_count_stop, 0);
241 wmb();
242 /*
243 * this lets the APs save their current TSC:
244 */
245 atomic_inc(&tsc_count_start);
247 rdtscll(tsc_values[smp_processor_id()]);
248 /*
249 * We clear the TSC in the last loop:
250 */
251 if (i == NR_LOOPS-1)
252 write_tsc(0, 0);
254 /*
255 * Wait for all APs to leave the synchronization point:
256 */
257 while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
258 mb();
259 atomic_set(&tsc_count_start, 0);
260 wmb();
261 atomic_inc(&tsc_count_stop);
262 }
264 sum = 0;
265 for (i = 0; i < NR_CPUS; i++) {
266 if (cpu_isset(i, cpu_callout_map)) {
267 t0 = tsc_values[i];
268 sum += t0;
269 }
270 }
271 avg = sum;
272 do_div(avg, num_booting_cpus());
274 sum = 0;
275 for (i = 0; i < NR_CPUS; i++) {
276 if (!cpu_isset(i, cpu_callout_map))
277 continue;
278 delta = tsc_values[i] - avg;
279 if (delta < 0)
280 delta = -delta;
281 /*
282 * We report bigger than 2 microseconds clock differences.
283 */
284 if (delta > 2*one_usec) {
285 long realdelta;
286 if (!buggy) {
287 buggy = 1;
288 printk("\n");
289 }
290 realdelta = delta;
291 do_div(realdelta, one_usec);
292 if (tsc_values[i] < avg)
293 realdelta = -realdelta;
295 printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
296 }
298 sum += delta;
299 }
300 if (!buggy)
301 printk("passed.\n");
302 }
304 static void __init synchronize_tsc_ap (void)
305 {
306 int i;
308 /*
309 * Not every cpu is online at the time
310 * this gets called, so we first wait for the BP to
311 * finish SMP initialization:
312 */
313 while (!atomic_read(&tsc_start_flag)) mb();
315 for (i = 0; i < NR_LOOPS; i++) {
316 atomic_inc(&tsc_count_start);
317 while (atomic_read(&tsc_count_start) != num_booting_cpus())
318 mb();
320 rdtscll(tsc_values[smp_processor_id()]);
321 if (i == NR_LOOPS-1)
322 write_tsc(0, 0);
324 atomic_inc(&tsc_count_stop);
325 while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
326 }
327 }
328 #undef NR_LOOPS
329 #endif
331 extern void calibrate_delay(void);
333 static atomic_t init_deasserted;
335 void __init smp_callin(void)
336 {
337 int cpuid, phys_id;
338 unsigned long timeout;
340 #if 0
341 /*
342 * If waken up by an INIT in an 82489DX configuration
343 * we may get here before an INIT-deassert IPI reaches
344 * our local APIC. We have to wait for the IPI or we'll
345 * lock up on an APIC access.
346 */
347 wait_for_init_deassert(&init_deasserted);
348 #endif
350 /*
351 * (This works even if the APIC is not enabled.)
352 */
353 phys_id = smp_processor_id();
354 cpuid = smp_processor_id();
355 if (cpu_isset(cpuid, cpu_callin_map)) {
356 printk("huh, phys CPU#%d, CPU#%d already present??\n",
357 phys_id, cpuid);
358 BUG();
359 }
360 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
362 /*
363 * STARTUP IPIs are fragile beasts as they might sometimes
364 * trigger some glue motherboard logic. Complete APIC bus
365 * silence for 1 second, this overestimates the time the
366 * boot CPU is spending to send the up to 2 STARTUP IPIs
367 * by a factor of two. This should be enough.
368 */
370 /*
371 * Waiting 2s total for startup (udelay is not yet working)
372 */
373 timeout = jiffies + 2*HZ;
374 while (time_before(jiffies, timeout)) {
375 /*
376 * Has the boot CPU finished it's STARTUP sequence?
377 */
378 if (cpu_isset(cpuid, cpu_callout_map))
379 break;
380 rep_nop();
381 }
383 if (!time_before(jiffies, timeout)) {
384 printk("BUG: CPU%d started up but did not get a callout!\n",
385 cpuid);
386 BUG();
387 }
389 #if 0
390 /*
391 * the boot CPU has finished the init stage and is spinning
392 * on callin_map until we finish. We are free to set up this
393 * CPU, first the APIC. (this is probably redundant on most
394 * boards)
395 */
397 Dprintk("CALLIN, before setup_local_APIC().\n");
398 smp_callin_clear_local_apic();
399 setup_local_APIC();
400 #endif
401 map_cpu_to_logical_apicid();
403 /*
404 * Get our bogomips.
405 */
406 calibrate_delay();
407 Dprintk("Stack at about %p\n",&cpuid);
409 /*
410 * Save our processor parameters
411 */
412 smp_store_cpu_info(cpuid);
414 #if 0
415 disable_APIC_timer();
416 #endif
418 /*
419 * Allow the master to continue.
420 */
421 cpu_set(cpuid, cpu_callin_map);
423 #if 0
424 /*
425 * Synchronize the TSC with the BP
426 */
427 if (cpu_has_tsc && cpu_khz)
428 synchronize_tsc_ap();
429 #endif
430 }
432 int cpucount;
435 static irqreturn_t ldebug_interrupt(
436 int irq, void *dev_id, struct pt_regs *regs)
437 {
438 return IRQ_HANDLED;
439 }
441 static DEFINE_PER_CPU(int, ldebug_irq);
442 static char ldebug_name[NR_CPUS][15];
444 void ldebug_setup(void)
445 {
446 int cpu = smp_processor_id();
448 per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG);
449 sprintf(ldebug_name[cpu], "ldebug%d", cpu);
450 BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt,
451 SA_INTERRUPT, ldebug_name[cpu], NULL));
452 }
455 extern void local_setup_timer(void);
457 /*
458 * Activate a secondary processor.
459 */
460 static void __init start_secondary(void *unused)
461 {
462 /*
463 * Dont put anything before smp_callin(), SMP
464 * booting is too fragile that we want to limit the
465 * things done here to the most necessary things.
466 */
467 cpu_init();
468 smp_callin();
469 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
470 rep_nop();
471 local_setup_timer();
472 ldebug_setup();
473 smp_intr_init();
474 local_irq_enable();
475 /*
476 * low-memory mappings have been cleared, flush them from
477 * the local TLBs too.
478 */
479 local_flush_tlb();
480 cpu_set(smp_processor_id(), cpu_online_map);
482 /* We can take interrupts now: we're officially "up". */
483 local_irq_enable();
485 wmb();
486 if (0) {
487 char *msg2 = "delay2\n";
488 int timeout;
489 for (timeout = 0; timeout < 50000; timeout++) {
490 udelay(1000);
491 if (timeout == 2000) {
492 (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2);
493 timeout = 0;
494 }
495 }
496 }
497 cpu_idle();
498 }
500 /*
501 * Everything has been set up for the secondary
502 * CPUs - they just need to reload everything
503 * from the task structure
504 * This function must not return.
505 */
506 void __init initialize_secondary(void)
507 {
508 /*
509 * We don't actually need to load the full TSS,
510 * basically just the stack pointer and the eip.
511 */
513 asm volatile(
514 "movl %0,%%esp\n\t"
515 "jmp *%1"
516 :
517 :"r" (current->thread.esp),"r" (current->thread.eip));
518 }
520 extern struct {
521 void * esp;
522 unsigned short ss;
523 } stack_start;
525 #ifdef CONFIG_NUMA
527 /* which logical CPUs are on which nodes */
528 cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
529 { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
530 /* which node each logical CPU is on */
531 int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
532 EXPORT_SYMBOL(cpu_2_node);
534 /* set up a mapping between cpu and node. */
535 static inline void map_cpu_to_node(int cpu, int node)
536 {
537 printk("Mapping cpu %d to node %d\n", cpu, node);
538 cpu_set(cpu, node_2_cpu_mask[node]);
539 cpu_2_node[cpu] = node;
540 }
542 /* undo a mapping between cpu and node. */
543 static inline void unmap_cpu_to_node(int cpu)
544 {
545 int node;
547 printk("Unmapping cpu %d from all nodes\n", cpu);
548 for (node = 0; node < MAX_NUMNODES; node ++)
549 cpu_clear(cpu, node_2_cpu_mask[node]);
550 cpu_2_node[cpu] = 0;
551 }
552 #else /* !CONFIG_NUMA */
554 #define map_cpu_to_node(cpu, node) ({})
555 #define unmap_cpu_to_node(cpu) ({})
557 #endif /* CONFIG_NUMA */
559 u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
561 void map_cpu_to_logical_apicid(void)
562 {
563 int cpu = smp_processor_id();
564 int apicid = smp_processor_id();
566 cpu_2_logical_apicid[cpu] = apicid;
567 map_cpu_to_node(cpu, apicid_to_node(apicid));
568 }
570 void unmap_cpu_to_logical_apicid(int cpu)
571 {
572 cpu_2_logical_apicid[cpu] = BAD_APICID;
573 unmap_cpu_to_node(cpu);
574 }
576 #if APIC_DEBUG
577 static inline void __inquire_remote_apic(int apicid)
578 {
579 int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
580 char *names[] = { "ID", "VERSION", "SPIV" };
581 int timeout, status;
583 printk("Inquiring remote APIC #%d...\n", apicid);
585 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
586 printk("... APIC #%d %s: ", apicid, names[i]);
588 /*
589 * Wait for idle.
590 */
591 apic_wait_icr_idle();
593 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
594 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
596 timeout = 0;
597 do {
598 udelay(100);
599 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
600 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
602 switch (status) {
603 case APIC_ICR_RR_VALID:
604 status = apic_read(APIC_RRR);
605 printk("%08x\n", status);
606 break;
607 default:
608 printk("failed\n");
609 }
610 }
611 }
612 #endif
614 #if 0
615 #ifdef WAKE_SECONDARY_VIA_NMI
616 /*
617 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
618 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
619 * won't ... remember to clear down the APIC, etc later.
620 */
621 static int __init
622 wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
623 {
624 unsigned long send_status = 0, accept_status = 0;
625 int timeout, maxlvt;
627 /* Target chip */
628 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
630 /* Boot on the stack */
631 /* Kick the second */
632 apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
634 Dprintk("Waiting for send to finish...\n");
635 timeout = 0;
636 do {
637 Dprintk("+");
638 udelay(100);
639 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
640 } while (send_status && (timeout++ < 1000));
642 /*
643 * Give the other CPU some time to accept the IPI.
644 */
645 udelay(200);
646 /*
647 * Due to the Pentium erratum 3AP.
648 */
649 maxlvt = get_maxlvt();
650 if (maxlvt > 3) {
651 apic_read_around(APIC_SPIV);
652 apic_write(APIC_ESR, 0);
653 }
654 accept_status = (apic_read(APIC_ESR) & 0xEF);
655 Dprintk("NMI sent.\n");
657 if (send_status)
658 printk("APIC never delivered???\n");
659 if (accept_status)
660 printk("APIC delivery error (%lx).\n", accept_status);
662 return (send_status | accept_status);
663 }
664 #endif /* WAKE_SECONDARY_VIA_NMI */
666 #ifdef WAKE_SECONDARY_VIA_INIT
667 static int __init
668 wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
669 {
670 unsigned long send_status = 0, accept_status = 0;
671 int maxlvt, timeout, num_starts, j;
673 /*
674 * Be paranoid about clearing APIC errors.
675 */
676 if (APIC_INTEGRATED(apic_version[phys_apicid])) {
677 apic_read_around(APIC_SPIV);
678 apic_write(APIC_ESR, 0);
679 apic_read(APIC_ESR);
680 }
682 Dprintk("Asserting INIT.\n");
684 /*
685 * Turn INIT on target chip
686 */
687 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
689 /*
690 * Send IPI
691 */
692 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
693 | APIC_DM_INIT);
695 Dprintk("Waiting for send to finish...\n");
696 timeout = 0;
697 do {
698 Dprintk("+");
699 udelay(100);
700 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
701 } while (send_status && (timeout++ < 1000));
703 mdelay(10);
705 Dprintk("Deasserting INIT.\n");
707 /* Target chip */
708 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
710 /* Send IPI */
711 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
713 Dprintk("Waiting for send to finish...\n");
714 timeout = 0;
715 do {
716 Dprintk("+");
717 udelay(100);
718 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
719 } while (send_status && (timeout++ < 1000));
721 atomic_set(&init_deasserted, 1);
723 /*
724 * Should we send STARTUP IPIs ?
725 *
726 * Determine this based on the APIC version.
727 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
728 */
729 if (APIC_INTEGRATED(apic_version[phys_apicid]))
730 num_starts = 2;
731 else
732 num_starts = 0;
734 /*
735 * Run STARTUP IPI loop.
736 */
737 Dprintk("#startup loops: %d.\n", num_starts);
739 maxlvt = get_maxlvt();
741 for (j = 1; j <= num_starts; j++) {
742 Dprintk("Sending STARTUP #%d.\n",j);
743 apic_read_around(APIC_SPIV);
744 apic_write(APIC_ESR, 0);
745 apic_read(APIC_ESR);
746 Dprintk("After apic_write.\n");
748 /*
749 * STARTUP IPI
750 */
752 /* Target chip */
753 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
755 /* Boot on the stack */
756 /* Kick the second */
757 apic_write_around(APIC_ICR, APIC_DM_STARTUP
758 | (start_eip >> 12));
760 /*
761 * Give the other CPU some time to accept the IPI.
762 */
763 udelay(300);
765 Dprintk("Startup point 1.\n");
767 Dprintk("Waiting for send to finish...\n");
768 timeout = 0;
769 do {
770 Dprintk("+");
771 udelay(100);
772 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
773 } while (send_status && (timeout++ < 1000));
775 /*
776 * Give the other CPU some time to accept the IPI.
777 */
778 udelay(200);
779 /*
780 * Due to the Pentium erratum 3AP.
781 */
782 if (maxlvt > 3) {
783 apic_read_around(APIC_SPIV);
784 apic_write(APIC_ESR, 0);
785 }
786 accept_status = (apic_read(APIC_ESR) & 0xEF);
787 if (send_status || accept_status)
788 break;
789 }
790 Dprintk("After Startup.\n");
792 if (send_status)
793 printk("APIC never delivered???\n");
794 if (accept_status)
795 printk("APIC delivery error (%lx).\n", accept_status);
797 return (send_status | accept_status);
798 }
799 #endif /* WAKE_SECONDARY_VIA_INIT */
800 #endif
802 extern cpumask_t cpu_initialized;
804 static int __init do_boot_cpu(int apicid)
805 /*
806 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
807 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
808 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
809 */
810 {
811 struct task_struct *idle;
812 unsigned long boot_error;
813 int timeout, cpu;
814 unsigned long start_eip;
815 #if 0
816 unsigned short nmi_high = 0, nmi_low = 0;
817 #endif
818 vcpu_guest_context_t ctxt;
819 extern void startup_32_smp(void);
820 extern void hypervisor_callback(void);
821 extern void failsafe_callback(void);
822 extern int smp_trap_init(trap_info_t *);
823 int i;
825 cpu = ++cpucount;
826 /*
827 * We can't use kernel_thread since we must avoid to
828 * reschedule the child.
829 */
830 idle = fork_idle(cpu);
831 if (IS_ERR(idle))
832 panic("failed fork for CPU %d", cpu);
833 idle->thread.eip = (unsigned long) start_secondary;
834 /* start_eip had better be page-aligned! */
835 start_eip = (unsigned long)startup_32_smp;
837 /* So we see what's up */
838 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
839 /* Stack for startup_32 can be just as for start_secondary onwards */
840 stack_start.esp = (void *) idle->thread.esp;
842 irq_ctx_init(cpu);
844 /*
845 * This grunge runs the startup process for
846 * the targeted processor.
847 */
849 atomic_set(&init_deasserted, 0);
851 #if 1
852 if (cpu_gdt_descr[0].size > PAGE_SIZE)
853 BUG();
854 cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
855 memcpy((void *)cpu_gdt_descr[cpu].address,
856 (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
857 memset((char *)cpu_gdt_descr[cpu].address +
858 FIRST_RESERVED_GDT_ENTRY * 8, 0,
859 NR_RESERVED_GDT_ENTRIES * 8);
861 memset(&ctxt, 0, sizeof(ctxt));
863 ctxt.user_regs.ds = __USER_DS;
864 ctxt.user_regs.es = __USER_DS;
865 ctxt.user_regs.fs = 0;
866 ctxt.user_regs.gs = 0;
867 ctxt.user_regs.ss = __KERNEL_DS;
868 ctxt.user_regs.cs = __KERNEL_CS;
869 ctxt.user_regs.eip = start_eip;
870 ctxt.user_regs.esp = idle->thread.esp;
871 ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
873 /* FPU is set up to default initial state. */
874 memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
876 /* Virtual IDT is empty at start-of-day. */
877 for ( i = 0; i < 256; i++ )
878 {
879 ctxt.trap_ctxt[i].vector = i;
880 ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS;
881 }
882 ctxt.fast_trap_idx = smp_trap_init(ctxt.trap_ctxt);
884 /* No LDT. */
885 ctxt.ldt_ents = 0;
887 {
888 unsigned long va;
889 int f;
891 for (va = cpu_gdt_descr[cpu].address, f = 0;
892 va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
893 va += PAGE_SIZE, f++) {
894 ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
895 make_page_readonly((void *)va);
896 }
897 ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
898 }
900 /* Ring 1 stack is the initial stack. */
901 ctxt.kernel_ss = __KERNEL_DS;
902 ctxt.kernel_sp = idle->thread.esp;
904 /* Callback handlers. */
905 ctxt.event_callback_cs = __KERNEL_CS;
906 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
907 ctxt.failsafe_callback_cs = __KERNEL_CS;
908 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
910 ctxt.pt_base = (unsigned long)virt_to_machine(swapper_pg_dir);
912 boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
914 if (!boot_error) {
915 /*
916 * allow APs to start initializing.
917 */
918 Dprintk("Before Callout %d.\n", cpu);
919 cpu_set(cpu, cpu_callout_map);
920 Dprintk("After Callout %d.\n", cpu);
922 /*
923 * Wait 5s total for a response
924 */
925 for (timeout = 0; timeout < 50000; timeout++) {
926 if (cpu_isset(cpu, cpu_callin_map))
927 break; /* It has booted */
928 udelay(100);
929 }
931 if (cpu_isset(cpu, cpu_callin_map)) {
932 /* number CPUs logically, starting from 1 (BSP is 0) */
933 Dprintk("OK.\n");
934 printk("CPU%d: ", cpu);
935 print_cpu_info(&cpu_data[cpu]);
936 Dprintk("CPU has booted.\n");
937 } else {
938 boot_error= 1;
939 }
940 }
941 x86_cpu_to_apicid[cpu] = apicid;
942 if (boot_error) {
943 /* Try to put things back the way they were before ... */
944 unmap_cpu_to_logical_apicid(cpu);
945 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
946 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
947 cpucount--;
948 }
950 #else
951 Dprintk("Setting warm reset code and vector.\n");
953 store_NMI_vector(&nmi_high, &nmi_low);
955 smpboot_setup_warm_reset_vector(start_eip);
957 /*
958 * Starting actual IPI sequence...
959 */
960 boot_error = wakeup_secondary_cpu(apicid, start_eip);
962 if (!boot_error) {
963 /*
964 * allow APs to start initializing.
965 */
966 Dprintk("Before Callout %d.\n", cpu);
967 cpu_set(cpu, cpu_callout_map);
968 Dprintk("After Callout %d.\n", cpu);
970 /*
971 * Wait 5s total for a response
972 */
973 for (timeout = 0; timeout < 50000; timeout++) {
974 if (cpu_isset(cpu, cpu_callin_map))
975 break; /* It has booted */
976 udelay(100);
977 }
979 if (cpu_isset(cpu, cpu_callin_map)) {
980 /* number CPUs logically, starting from 1 (BSP is 0) */
981 Dprintk("OK.\n");
982 printk("CPU%d: ", cpu);
983 print_cpu_info(&cpu_data[cpu]);
984 Dprintk("CPU has booted.\n");
985 } else {
986 boot_error= 1;
987 if (*((volatile unsigned char *)trampoline_base)
988 == 0xA5)
989 /* trampoline started but...? */
990 printk("Stuck ??\n");
991 else
992 /* trampoline code not run */
993 printk("Not responding.\n");
994 inquire_remote_apic(apicid);
995 }
996 }
997 x86_cpu_to_apicid[cpu] = apicid;
998 if (boot_error) {
999 /* Try to put things back the way they were before ... */
1000 unmap_cpu_to_logical_apicid(cpu);
1001 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
1002 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
1003 cpucount--;
1006 /* mark "stuck" area as not stuck */
1007 *((volatile unsigned long *)trampoline_base) = 0;
1008 #endif
1010 return boot_error;
1013 cycles_t cacheflush_time;
1014 unsigned long cache_decay_ticks;
1016 static void smp_tune_scheduling (void)
1018 unsigned long cachesize; /* kB */
1019 unsigned long bandwidth = 350; /* MB/s */
1020 /*
1021 * Rough estimation for SMP scheduling, this is the number of
1022 * cycles it takes for a fully memory-limited process to flush
1023 * the SMP-local cache.
1025 * (For a P5 this pretty much means we will choose another idle
1026 * CPU almost always at wakeup time (this is due to the small
1027 * L1 cache), on PIIs it's around 50-100 usecs, depending on
1028 * the cache size)
1029 */
1031 if (!cpu_khz) {
1032 /*
1033 * this basically disables processor-affinity
1034 * scheduling on SMP without a TSC.
1035 */
1036 cacheflush_time = 0;
1037 return;
1038 } else {
1039 cachesize = boot_cpu_data.x86_cache_size;
1040 if (cachesize == -1) {
1041 cachesize = 16; /* Pentiums, 2x8kB cache */
1042 bandwidth = 100;
1045 cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
1048 cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
1050 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
1051 (long)cacheflush_time/(cpu_khz/1000),
1052 ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
1053 printk("task migration cache decay timeout: %ld msecs.\n",
1054 cache_decay_ticks);
1057 /*
1058 * Cycle through the processors sending APIC IPIs to boot each.
1059 */
1061 #if 0
1062 static int boot_cpu_logical_apicid;
1063 #endif
1064 /* Where the IO area was mapped on multiquad, always 0 otherwise */
1065 void *xquad_portio;
1067 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
1069 static void __init smp_boot_cpus(unsigned int max_cpus)
1071 int cpu, kicked;
1072 unsigned long bogosum = 0;
1073 #if 0
1074 int apicid, bit;
1075 #endif
1077 /*
1078 * Setup boot CPU information
1079 */
1080 smp_store_cpu_info(0); /* Final full version of the data */
1081 printk("CPU%d: ", 0);
1082 print_cpu_info(&cpu_data[0]);
1084 #if 0
1085 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
1086 boot_cpu_logical_apicid = logical_smp_processor_id();
1087 x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
1088 #else
1089 // boot_cpu_physical_apicid = 0;
1090 // boot_cpu_logical_apicid = 0;
1091 x86_cpu_to_apicid[0] = 0;
1092 #endif
1094 current_thread_info()->cpu = 0;
1095 smp_tune_scheduling();
1096 cpus_clear(cpu_sibling_map[0]);
1097 cpu_set(0, cpu_sibling_map[0]);
1099 /*
1100 * If we couldn't find an SMP configuration at boot time,
1101 * get out of here now!
1102 */
1103 if (!smp_found_config /* && !acpi_lapic) */) {
1104 printk(KERN_NOTICE "SMP motherboard not detected.\n");
1105 smpboot_clear_io_apic_irqs();
1106 #if 0
1107 phys_cpu_present_map = physid_mask_of_physid(0);
1108 if (APIC_init_uniprocessor())
1109 printk(KERN_NOTICE "Local APIC not detected."
1110 " Using dummy APIC emulation.\n");
1111 #endif
1112 map_cpu_to_logical_apicid();
1113 return;
1116 #if 0
1117 /*
1118 * Should not be necessary because the MP table should list the boot
1119 * CPU too, but we do it for the sake of robustness anyway.
1120 * Makes no sense to do this check in clustered apic mode, so skip it
1121 */
1122 if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
1123 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
1124 boot_cpu_physical_apicid);
1125 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1128 /*
1129 * If we couldn't find a local APIC, then get out of here now!
1130 */
1131 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
1132 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1133 boot_cpu_physical_apicid);
1134 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
1135 smpboot_clear_io_apic_irqs();
1136 phys_cpu_present_map = physid_mask_of_physid(0);
1137 return;
1140 verify_local_APIC();
1141 #endif
1143 /*
1144 * If SMP should be disabled, then really disable it!
1145 */
1146 if (!max_cpus) {
1147 HYPERVISOR_shared_info->n_vcpu = 1;
1148 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
1149 smpboot_clear_io_apic_irqs();
1150 #if 0
1151 phys_cpu_present_map = physid_mask_of_physid(0);
1152 #endif
1153 return;
1156 smp_intr_init();
1158 #if 0
1159 connect_bsp_APIC();
1160 setup_local_APIC();
1161 #endif
1162 map_cpu_to_logical_apicid();
1163 #if 0
1166 setup_portio_remap();
1168 /*
1169 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
1171 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
1172 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
1173 * clustered apic ID.
1174 */
1175 Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
1176 #endif
1177 Dprintk("CPU present map: %lx\n",
1178 (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
1180 kicked = 1;
1181 for (cpu = 1; kicked < NR_CPUS &&
1182 cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
1183 if (max_cpus <= cpucount+1)
1184 continue;
1186 if (do_boot_cpu(cpu))
1187 printk("CPU #%d not responding - cannot use it.\n",
1188 cpu);
1189 else
1190 ++kicked;
1193 #if 0
1194 /*
1195 * Cleanup possible dangling ends...
1196 */
1197 smpboot_restore_warm_reset_vector();
1198 #endif
1200 /*
1201 * Allow the user to impress friends.
1202 */
1203 Dprintk("Before bogomips.\n");
1204 for (cpu = 0; cpu < NR_CPUS; cpu++)
1205 if (cpu_isset(cpu, cpu_callout_map))
1206 bogosum += cpu_data[cpu].loops_per_jiffy;
1207 printk(KERN_INFO
1208 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
1209 cpucount+1,
1210 bogosum/(500000/HZ),
1211 (bogosum/(5000/HZ))%100);
1213 Dprintk("Before bogocount - setting activated=1.\n");
1215 if (smp_b_stepping)
1216 printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
1218 /*
1219 * Don't taint if we are running SMP kernel on a single non-MP
1220 * approved Athlon
1221 */
1222 if (tainted & TAINT_UNSAFE_SMP) {
1223 if (cpucount)
1224 printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
1225 else
1226 tainted &= ~TAINT_UNSAFE_SMP;
1229 Dprintk("Boot done.\n");
1231 /*
1232 * construct cpu_sibling_map[], so that we can tell sibling CPUs
1233 * efficiently.
1234 */
1235 for (cpu = 0; cpu < NR_CPUS; cpu++)
1236 cpus_clear(cpu_sibling_map[cpu]);
1238 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1239 int siblings = 0;
1240 int i;
1241 if (!cpu_isset(cpu, cpu_callout_map))
1242 continue;
1244 if (smp_num_siblings > 1) {
1245 for (i = 0; i < NR_CPUS; i++) {
1246 if (!cpu_isset(i, cpu_callout_map))
1247 continue;
1248 if (phys_proc_id[cpu] == phys_proc_id[i]) {
1249 siblings++;
1250 cpu_set(i, cpu_sibling_map[cpu]);
1253 } else {
1254 siblings++;
1255 cpu_set(cpu, cpu_sibling_map[cpu]);
1258 if (siblings != smp_num_siblings)
1259 printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
1262 #if 0
1263 if (nmi_watchdog == NMI_LOCAL_APIC)
1264 check_nmi_watchdog();
1265 #endif
1267 smpboot_setup_io_apic();
1269 #if 0
1270 setup_boot_APIC_clock();
1272 /*
1273 * Synchronize the TSC with the AP
1274 */
1275 if (cpu_has_tsc && cpucount && cpu_khz)
1276 synchronize_tsc_bp();
1277 #endif
1280 /* These are wrappers to interface to the new boot process. Someone
1281 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1282 void __init smp_prepare_cpus(unsigned int max_cpus)
1284 smp_boot_cpus(max_cpus);
1287 void __devinit smp_prepare_boot_cpu(void)
1289 cpu_set(smp_processor_id(), cpu_online_map);
1290 cpu_set(smp_processor_id(), cpu_callout_map);
1293 int __devinit __cpu_up(unsigned int cpu)
1295 /* This only works at boot for x86. See "rewrite" above. */
1296 if (cpu_isset(cpu, smp_commenced_mask)) {
1297 local_irq_enable();
1298 return -ENOSYS;
1301 /* In case one didn't come up */
1302 if (!cpu_isset(cpu, cpu_callin_map)) {
1303 local_irq_enable();
1304 return -EIO;
1307 local_irq_enable();
1308 /* Unleash the CPU! */
1309 cpu_set(cpu, smp_commenced_mask);
1310 while (!cpu_isset(cpu, cpu_online_map))
1311 mb();
1312 return 0;
1315 void __init smp_cpus_done(unsigned int max_cpus)
1317 #if 1
1318 #else
1319 #ifdef CONFIG_X86_IO_APIC
1320 setup_ioapic_dest();
1321 #endif
1322 zap_low_mappings();
1323 /*
1324 * Disable executability of the SMP trampoline:
1325 */
1326 set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
1327 #endif
1330 extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
1331 extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
1333 static DEFINE_PER_CPU(int, resched_irq);
1334 static DEFINE_PER_CPU(int, callfunc_irq);
1335 static char resched_name[NR_CPUS][15];
1336 static char callfunc_name[NR_CPUS][15];
1338 void __init smp_intr_init(void)
1340 int cpu = smp_processor_id();
1342 per_cpu(resched_irq, cpu) =
1343 bind_ipi_on_cpu_to_irq(cpu, RESCHEDULE_VECTOR);
1344 sprintf(resched_name[cpu], "resched%d", cpu);
1345 BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
1346 SA_INTERRUPT, resched_name[cpu], NULL));
1348 per_cpu(callfunc_irq, cpu) =
1349 bind_ipi_on_cpu_to_irq(cpu, CALL_FUNCTION_VECTOR);
1350 sprintf(callfunc_name[cpu], "callfunc%d", cpu);
1351 BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
1352 smp_call_function_interrupt,
1353 SA_INTERRUPT, callfunc_name[cpu], NULL));