ia64/xen-unstable

view xen/arch/x86/smpboot.c @ 6552:a9873d384da4

Merge.
author adsharma@los-vmm.sc.intel.com
date Thu Aug 25 12:24:48 2005 -0700 (2005-08-25)
parents 112d44270733 fa0754a9f64f
children dfaf788ab18c
line source
1 /*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 *
7 * Much of the core SMP work is based on previous work by Thomas Radke, to
8 * whom a great many thanks are extended.
9 *
10 * Thanks to Intel for making available several different Pentium,
11 * Pentium Pro and Pentium-II/Xeon MP machines.
12 * Original development of Linux SMP code supported by Caldera.
13 *
14 * This code is released under the GNU General Public License version 2 or
15 * later.
16 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIPS report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Martin J. Bligh : Added support for multi-quad systems
33 * Dave Jones : Report invalid combinations of Athlon CPUs.
34 * Rusty Russell : Hacked into shape for new "hotplug" boot process. */
36 #include <xen/config.h>
37 #include <xen/init.h>
38 #include <xen/kernel.h>
39 #include <xen/mm.h>
40 #include <xen/sched.h>
41 #include <xen/irq.h>
42 #include <xen/delay.h>
43 #include <xen/softirq.h>
44 #include <asm/current.h>
45 #include <asm/mc146818rtc.h>
46 #include <asm/desc.h>
47 #include <asm/div64.h>
48 #include <asm/flushtlb.h>
49 #include <asm/msr.h>
50 #include <mach_apic.h>
51 #include <mach_wakecpu.h>
53 static int _foo;
54 #define set_kernel_exec(x,y) (_foo=0)
55 #define alloc_bootmem_low_pages(x) __va(0x90000) /* trampoline address */
56 int tainted;
57 #define TAINT_UNSAFE_SMP 0
59 /* Set if we find a B stepping CPU */
60 static int __initdata smp_b_stepping;
62 /* Number of siblings per CPU package */
63 int smp_num_siblings = 1;
64 int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
65 EXPORT_SYMBOL(phys_proc_id);
66 int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
67 EXPORT_SYMBOL(cpu_core_id);
69 /* bitmap of online cpus */
70 cpumask_t cpu_online_map;
72 cpumask_t cpu_callin_map;
73 cpumask_t cpu_callout_map;
74 static cpumask_t smp_commenced_mask;
76 /* Per CPU bogomips and other parameters */
77 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
79 u8 x86_cpu_to_apicid[NR_CPUS] =
80 { [0 ... NR_CPUS-1] = 0xff };
81 EXPORT_SYMBOL(x86_cpu_to_apicid);
83 /*
84 * Trampoline 80x86 program as an array.
85 */
87 extern unsigned char trampoline_data [];
88 extern unsigned char trampoline_end [];
89 static unsigned char *trampoline_base;
90 static int trampoline_exec;
92 /*
93 * Currently trivial. Write the real->protected mode
94 * bootstrap into the page concerned. The caller
95 * has made sure it's suitably aligned.
96 */
98 static unsigned long __init setup_trampoline(void)
99 {
100 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
101 return virt_to_phys(trampoline_base);
102 }
104 /*
105 * We are called very early to get the low memory for the
106 * SMP bootup trampoline page.
107 */
108 void __init smp_alloc_memory(void)
109 {
110 trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
111 /*
112 * Has to be in very low memory so we can execute
113 * real-mode AP code.
114 */
115 if (__pa(trampoline_base) >= 0x9F000)
116 BUG();
117 /*
118 * Make the SMP trampoline executable:
119 */
120 trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
121 }
123 /*
124 * The bootstrap kernel entry code has set these up. Save them for
125 * a given CPU
126 */
128 static void __init smp_store_cpu_info(int id)
129 {
130 struct cpuinfo_x86 *c = cpu_data + id;
132 *c = boot_cpu_data;
133 if (id!=0)
134 identify_cpu(c);
135 /*
136 * Mask B, Pentium, but not Pentium MMX
137 */
138 if (c->x86_vendor == X86_VENDOR_INTEL &&
139 c->x86 == 5 &&
140 c->x86_mask >= 1 && c->x86_mask <= 4 &&
141 c->x86_model <= 3)
142 /*
143 * Remember we have B step Pentia with bugs
144 */
145 smp_b_stepping = 1;
147 /*
148 * Certain Athlons might work (for various values of 'work') in SMP
149 * but they are not certified as MP capable.
150 */
151 if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
153 /* Athlon 660/661 is valid. */
154 if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
155 goto valid_k7;
157 /* Duron 670 is valid */
158 if ((c->x86_model==7) && (c->x86_mask==0))
159 goto valid_k7;
161 /*
162 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
163 * It's worth noting that the A5 stepping (662) of some Athlon XP's
164 * have the MP bit set.
165 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
166 */
167 if (((c->x86_model==6) && (c->x86_mask>=2)) ||
168 ((c->x86_model==7) && (c->x86_mask>=1)) ||
169 (c->x86_model> 7))
170 if (cpu_has_mp)
171 goto valid_k7;
173 /* If we get here, it's not a certified SMP capable AMD system. */
174 tainted |= TAINT_UNSAFE_SMP;
175 }
177 valid_k7:
178 ;
179 }
181 /*
182 * TSC synchronization.
183 *
184 * We first check whether all CPUs have their TSC's synchronized,
185 * then we print a warning if not, and always resync.
186 */
188 static atomic_t tsc_start_flag = ATOMIC_INIT(0);
189 static atomic_t tsc_count_start = ATOMIC_INIT(0);
190 static atomic_t tsc_count_stop = ATOMIC_INIT(0);
191 static unsigned long long tsc_values[NR_CPUS];
193 #define NR_LOOPS 5
195 static void __init synchronize_tsc_bp (void)
196 {
197 int i;
198 unsigned long long t0;
199 unsigned long long sum, avg;
200 long long delta;
201 unsigned long one_usec;
202 int buggy = 0;
204 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
206 /* convert from kcyc/sec to cyc/usec */
207 one_usec = cpu_khz / 1000;
209 atomic_set(&tsc_start_flag, 1);
210 wmb();
212 /*
213 * We loop a few times to get a primed instruction cache,
214 * then the last pass is more or less synchronized and
215 * the BP and APs set their cycle counters to zero all at
216 * once. This reduces the chance of having random offsets
217 * between the processors, and guarantees that the maximum
218 * delay between the cycle counters is never bigger than
219 * the latency of information-passing (cachelines) between
220 * two CPUs.
221 */
222 for (i = 0; i < NR_LOOPS; i++) {
223 /*
224 * all APs synchronize but they loop on '== num_cpus'
225 */
226 while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
227 mb();
228 atomic_set(&tsc_count_stop, 0);
229 wmb();
230 /*
231 * this lets the APs save their current TSC:
232 */
233 atomic_inc(&tsc_count_start);
235 rdtscll(tsc_values[smp_processor_id()]);
236 /*
237 * We clear the TSC in the last loop:
238 */
239 if (i == NR_LOOPS-1)
240 write_tsc(0, 0);
242 /*
243 * Wait for all APs to leave the synchronization point:
244 */
245 while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
246 mb();
247 atomic_set(&tsc_count_start, 0);
248 wmb();
249 atomic_inc(&tsc_count_stop);
250 }
252 sum = 0;
253 for (i = 0; i < NR_CPUS; i++) {
254 if (cpu_isset(i, cpu_callout_map)) {
255 t0 = tsc_values[i];
256 sum += t0;
257 }
258 }
259 avg = sum;
260 do_div(avg, num_booting_cpus());
262 sum = 0;
263 for (i = 0; i < NR_CPUS; i++) {
264 if (!cpu_isset(i, cpu_callout_map))
265 continue;
266 delta = tsc_values[i] - avg;
267 if (delta < 0)
268 delta = -delta;
269 /*
270 * We report bigger than 2 microseconds clock differences.
271 */
272 if (delta > 2*one_usec) {
273 long realdelta;
274 if (!buggy) {
275 buggy = 1;
276 printk("\n");
277 }
278 realdelta = delta;
279 do_div(realdelta, one_usec);
280 if (tsc_values[i] < avg)
281 realdelta = -realdelta;
283 printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
284 }
286 sum += delta;
287 }
288 if (!buggy)
289 printk("passed.\n");
290 }
292 static void __init synchronize_tsc_ap (void)
293 {
294 int i;
296 /*
297 * Not every cpu is online at the time
298 * this gets called, so we first wait for the BP to
299 * finish SMP initialization:
300 */
301 while (!atomic_read(&tsc_start_flag)) mb();
303 for (i = 0; i < NR_LOOPS; i++) {
304 atomic_inc(&tsc_count_start);
305 while (atomic_read(&tsc_count_start) != num_booting_cpus())
306 mb();
308 rdtscll(tsc_values[smp_processor_id()]);
309 if (i == NR_LOOPS-1)
310 write_tsc(0, 0);
312 atomic_inc(&tsc_count_stop);
313 while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
314 }
315 }
316 #undef NR_LOOPS
318 extern void calibrate_delay(void);
320 static atomic_t init_deasserted;
322 void __init smp_callin(void)
323 {
324 int cpuid, phys_id, i;
326 /*
327 * If waken up by an INIT in an 82489DX configuration
328 * we may get here before an INIT-deassert IPI reaches
329 * our local APIC. We have to wait for the IPI or we'll
330 * lock up on an APIC access.
331 */
332 wait_for_init_deassert(&init_deasserted);
334 /*
335 * (This works even if the APIC is not enabled.)
336 */
337 phys_id = GET_APIC_ID(apic_read(APIC_ID));
338 cpuid = smp_processor_id();
339 if (cpu_isset(cpuid, cpu_callin_map)) {
340 printk("huh, phys CPU#%d, CPU#%d already present??\n",
341 phys_id, cpuid);
342 BUG();
343 }
344 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
346 /*
347 * STARTUP IPIs are fragile beasts as they might sometimes
348 * trigger some glue motherboard logic. Complete APIC bus
349 * silence for 1 second, this overestimates the time the
350 * boot CPU is spending to send the up to 2 STARTUP IPIs
351 * by a factor of two. This should be enough.
352 */
354 /*
355 * Waiting 2s total for startup
356 */
357 for (i = 0; i < 200; i++) {
358 /*
359 * Has the boot CPU finished it's STARTUP sequence?
360 */
361 if (cpu_isset(cpuid, cpu_callout_map))
362 break;
363 rep_nop();
364 mdelay(10);
365 }
367 if (!cpu_isset(cpuid, cpu_callout_map)) {
368 printk("BUG: CPU%d started up but did not get a callout!\n",
369 cpuid);
370 BUG();
371 }
373 /*
374 * the boot CPU has finished the init stage and is spinning
375 * on callin_map until we finish. We are free to set up this
376 * CPU, first the APIC. (this is probably redundant on most
377 * boards)
378 */
380 Dprintk("CALLIN, before setup_local_APIC().\n");
381 smp_callin_clear_local_apic();
382 setup_local_APIC();
383 map_cpu_to_logical_apicid();
385 #if 0
386 /*
387 * Get our bogomips.
388 */
389 calibrate_delay();
390 Dprintk("Stack at about %p\n",&cpuid);
391 #endif
393 /*
394 * Save our processor parameters
395 */
396 smp_store_cpu_info(cpuid);
398 disable_APIC_timer();
400 /*
401 * Allow the master to continue.
402 */
403 cpu_set(cpuid, cpu_callin_map);
405 /*
406 * Synchronize the TSC with the BP
407 */
408 if (cpu_has_tsc && cpu_khz)
409 synchronize_tsc_ap();
410 calibrate_tsc_ap();
411 }
413 int cpucount;
415 #ifdef CONFIG_X86_32
416 static void construct_percpu_idt(unsigned int cpu)
417 {
418 unsigned char idt_load[10];
420 idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
421 memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t));
423 *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
424 *(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
425 __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
426 }
427 #endif
429 /*
430 * Activate a secondary processor.
431 */
432 void __init start_secondary(void *unused)
433 {
434 unsigned int cpu = cpucount;
436 extern void percpu_traps_init(void);
438 set_current(idle_task[cpu]);
439 set_processor_id(cpu);
441 percpu_traps_init();
443 cpu_init();
444 smp_callin();
445 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
446 rep_nop();
448 #ifdef CONFIG_X86_32
449 /*
450 * At this point, boot CPU has fully initialised the IDT. It is
451 * now safe to make ourselves a private copy.
452 */
453 construct_percpu_idt(cpu);
454 #endif
456 setup_secondary_APIC_clock();
457 enable_APIC_timer();
459 /*
460 * low-memory mappings have been cleared, flush them from
461 * the local TLBs too.
462 */
463 local_flush_tlb();
464 cpu_set(smp_processor_id(), cpu_online_map);
466 /* We can take interrupts now: we're officially "up". */
467 local_irq_enable();
469 init_percpu_time();
471 wmb();
472 startup_cpu_idle_loop();
473 }
475 extern struct {
476 void * esp;
477 unsigned short ss;
478 } stack_start;
480 #ifdef CONFIG_NUMA
482 /* which logical CPUs are on which nodes */
483 cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
484 { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
485 /* which node each logical CPU is on */
486 int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
487 EXPORT_SYMBOL(cpu_2_node);
489 /* set up a mapping between cpu and node. */
490 static inline void map_cpu_to_node(int cpu, int node)
491 {
492 printk("Mapping cpu %d to node %d\n", cpu, node);
493 cpu_set(cpu, node_2_cpu_mask[node]);
494 cpu_2_node[cpu] = node;
495 }
497 /* undo a mapping between cpu and node. */
498 static inline void unmap_cpu_to_node(int cpu)
499 {
500 int node;
502 printk("Unmapping cpu %d from all nodes\n", cpu);
503 for (node = 0; node < MAX_NUMNODES; node ++)
504 cpu_clear(cpu, node_2_cpu_mask[node]);
505 cpu_2_node[cpu] = 0;
506 }
507 #else /* !CONFIG_NUMA */
509 #define map_cpu_to_node(cpu, node) ({})
510 #define unmap_cpu_to_node(cpu) ({})
512 #endif /* CONFIG_NUMA */
514 u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
516 void map_cpu_to_logical_apicid(void)
517 {
518 int cpu = smp_processor_id();
519 int apicid = logical_smp_processor_id();
521 cpu_2_logical_apicid[cpu] = apicid;
522 map_cpu_to_node(cpu, apicid_to_node(apicid));
523 }
525 void unmap_cpu_to_logical_apicid(int cpu)
526 {
527 cpu_2_logical_apicid[cpu] = BAD_APICID;
528 unmap_cpu_to_node(cpu);
529 }
531 #if APIC_DEBUG
532 static inline void __inquire_remote_apic(int apicid)
533 {
534 int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
535 char *names[] = { "ID", "VERSION", "SPIV" };
536 int timeout, status;
538 printk("Inquiring remote APIC #%d...\n", apicid);
540 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
541 printk("... APIC #%d %s: ", apicid, names[i]);
543 /*
544 * Wait for idle.
545 */
546 apic_wait_icr_idle();
548 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
549 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
551 timeout = 0;
552 do {
553 udelay(100);
554 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
555 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
557 switch (status) {
558 case APIC_ICR_RR_VALID:
559 status = apic_read(APIC_RRR);
560 printk("%08x\n", status);
561 break;
562 default:
563 printk("failed\n");
564 }
565 }
566 }
567 #endif
569 #ifdef WAKE_SECONDARY_VIA_NMI
570 /*
571 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
572 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
573 * won't ... remember to clear down the APIC, etc later.
574 */
575 static int __init
576 wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
577 {
578 unsigned long send_status = 0, accept_status = 0;
579 int timeout, maxlvt;
581 /* Target chip */
582 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
584 /* Boot on the stack */
585 /* Kick the second */
586 apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
588 Dprintk("Waiting for send to finish...\n");
589 timeout = 0;
590 do {
591 Dprintk("+");
592 udelay(100);
593 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
594 } while (send_status && (timeout++ < 1000));
596 /*
597 * Give the other CPU some time to accept the IPI.
598 */
599 udelay(200);
600 /*
601 * Due to the Pentium erratum 3AP.
602 */
603 maxlvt = get_maxlvt();
604 if (maxlvt > 3) {
605 apic_read_around(APIC_SPIV);
606 apic_write(APIC_ESR, 0);
607 }
608 accept_status = (apic_read(APIC_ESR) & 0xEF);
609 Dprintk("NMI sent.\n");
611 if (send_status)
612 printk("APIC never delivered???\n");
613 if (accept_status)
614 printk("APIC delivery error (%lx).\n", accept_status);
616 return (send_status | accept_status);
617 }
618 #endif /* WAKE_SECONDARY_VIA_NMI */
620 #ifdef WAKE_SECONDARY_VIA_INIT
621 static int __init
622 wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
623 {
624 unsigned long send_status = 0, accept_status = 0;
625 int maxlvt, timeout, num_starts, j;
627 /*
628 * Be paranoid about clearing APIC errors.
629 */
630 if (APIC_INTEGRATED(apic_version[phys_apicid])) {
631 apic_read_around(APIC_SPIV);
632 apic_write(APIC_ESR, 0);
633 apic_read(APIC_ESR);
634 }
636 Dprintk("Asserting INIT.\n");
638 /*
639 * Turn INIT on target chip
640 */
641 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
643 /*
644 * Send IPI
645 */
646 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
647 | APIC_DM_INIT);
649 Dprintk("Waiting for send to finish...\n");
650 timeout = 0;
651 do {
652 Dprintk("+");
653 udelay(100);
654 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
655 } while (send_status && (timeout++ < 1000));
657 mdelay(10);
659 Dprintk("Deasserting INIT.\n");
661 /* Target chip */
662 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
664 /* Send IPI */
665 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
667 Dprintk("Waiting for send to finish...\n");
668 timeout = 0;
669 do {
670 Dprintk("+");
671 udelay(100);
672 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
673 } while (send_status && (timeout++ < 1000));
675 atomic_set(&init_deasserted, 1);
677 /*
678 * Should we send STARTUP IPIs ?
679 *
680 * Determine this based on the APIC version.
681 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
682 */
683 if (APIC_INTEGRATED(apic_version[phys_apicid]))
684 num_starts = 2;
685 else
686 num_starts = 0;
688 /*
689 * Run STARTUP IPI loop.
690 */
691 Dprintk("#startup loops: %d.\n", num_starts);
693 maxlvt = get_maxlvt();
695 for (j = 1; j <= num_starts; j++) {
696 Dprintk("Sending STARTUP #%d.\n",j);
697 apic_read_around(APIC_SPIV);
698 apic_write(APIC_ESR, 0);
699 apic_read(APIC_ESR);
700 Dprintk("After apic_write.\n");
702 /*
703 * STARTUP IPI
704 */
706 /* Target chip */
707 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
709 /* Boot on the stack */
710 /* Kick the second */
711 apic_write_around(APIC_ICR, APIC_DM_STARTUP
712 | (start_eip >> 12));
714 /*
715 * Give the other CPU some time to accept the IPI.
716 */
717 udelay(300);
719 Dprintk("Startup point 1.\n");
721 Dprintk("Waiting for send to finish...\n");
722 timeout = 0;
723 do {
724 Dprintk("+");
725 udelay(100);
726 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
727 } while (send_status && (timeout++ < 1000));
729 /*
730 * Give the other CPU some time to accept the IPI.
731 */
732 udelay(200);
733 /*
734 * Due to the Pentium erratum 3AP.
735 */
736 if (maxlvt > 3) {
737 apic_read_around(APIC_SPIV);
738 apic_write(APIC_ESR, 0);
739 }
740 accept_status = (apic_read(APIC_ESR) & 0xEF);
741 if (send_status || accept_status)
742 break;
743 }
744 Dprintk("After Startup.\n");
746 if (send_status)
747 printk("APIC never delivered???\n");
748 if (accept_status)
749 printk("APIC delivery error (%lx).\n", accept_status);
751 return (send_status | accept_status);
752 }
753 #endif /* WAKE_SECONDARY_VIA_INIT */
755 extern cpumask_t cpu_initialized;
757 static int __init do_boot_cpu(int apicid)
758 /*
759 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
760 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
761 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
762 */
763 {
764 struct domain *idle;
765 struct vcpu *v;
766 void *stack;
767 unsigned long boot_error;
768 int timeout, cpu;
769 unsigned long start_eip;
770 unsigned short nmi_high = 0, nmi_low = 0;
772 cpu = ++cpucount;
774 if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
775 panic("failed 'createdomain' for CPU %d", cpu);
777 v = idle_task[cpu] = idle->vcpu[0];
779 set_bit(_DOMF_idle_domain, &idle->domain_flags);
781 v->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
783 /* start_eip had better be page-aligned! */
784 start_eip = setup_trampoline();
786 /* So we see what's up */
787 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
789 stack = alloc_xenheap_pages(STACK_ORDER);
790 #if defined(__i386__)
791 stack_start.esp = (void *)__pa(stack);
792 #elif defined(__x86_64__)
793 stack_start.esp = stack;
794 #endif
795 stack_start.esp += STACK_SIZE - sizeof(struct cpu_info);
797 /* Debug build: detect stack overflow by setting up a guard page. */
798 memguard_guard_stack(stack);
800 /*
801 * This grunge runs the startup process for
802 * the targeted processor.
803 */
805 atomic_set(&init_deasserted, 0);
807 Dprintk("Setting warm reset code and vector.\n");
809 store_NMI_vector(&nmi_high, &nmi_low);
811 CMOS_WRITE(0xa, 0xf);
812 local_flush_tlb();
813 Dprintk("1.\n");
814 *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
815 Dprintk("2.\n");
816 *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
817 Dprintk("3.\n");
819 /*
820 * Starting actual IPI sequence...
821 */
822 boot_error = wakeup_secondary_cpu(apicid, start_eip);
824 if (!boot_error) {
825 /*
826 * allow APs to start initializing.
827 */
828 Dprintk("Before Callout %d.\n", cpu);
829 cpu_set(cpu, cpu_callout_map);
830 Dprintk("After Callout %d.\n", cpu);
832 /*
833 * Wait 5s total for a response
834 */
835 for (timeout = 0; timeout < 50000; timeout++) {
836 if (cpu_isset(cpu, cpu_callin_map))
837 break; /* It has booted */
838 udelay(100);
839 }
841 if (cpu_isset(cpu, cpu_callin_map)) {
842 /* number CPUs logically, starting from 1 (BSP is 0) */
843 Dprintk("OK.\n");
844 printk("CPU%d: ", cpu);
845 print_cpu_info(&cpu_data[cpu]);
846 Dprintk("CPU has booted.\n");
847 } else {
848 boot_error= 1;
849 if (*((volatile unsigned char *)trampoline_base)
850 == 0xA5)
851 /* trampoline started but...? */
852 printk("Stuck ??\n");
853 else
854 /* trampoline code not run */
855 printk("Not responding.\n");
856 inquire_remote_apic(apicid);
857 }
858 }
859 x86_cpu_to_apicid[cpu] = apicid;
860 if (boot_error) {
861 /* Try to put things back the way they were before ... */
862 unmap_cpu_to_logical_apicid(cpu);
863 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
864 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
865 cpucount--;
866 }
868 /* mark "stuck" area as not stuck */
869 *((volatile unsigned long *)trampoline_base) = 0;
871 return boot_error;
872 }
874 #if 0
875 cycles_t cacheflush_time;
876 unsigned long cache_decay_ticks;
878 static void smp_tune_scheduling (void)
879 {
880 unsigned long cachesize; /* kB */
881 unsigned long bandwidth = 350; /* MB/s */
882 /*
883 * Rough estimation for SMP scheduling, this is the number of
884 * cycles it takes for a fully memory-limited process to flush
885 * the SMP-local cache.
886 *
887 * (For a P5 this pretty much means we will choose another idle
888 * CPU almost always at wakeup time (this is due to the small
889 * L1 cache), on PIIs it's around 50-100 usecs, depending on
890 * the cache size)
891 */
893 if (!cpu_khz) {
894 /*
895 * this basically disables processor-affinity
896 * scheduling on SMP without a TSC.
897 */
898 cacheflush_time = 0;
899 return;
900 } else {
901 cachesize = boot_cpu_data.x86_cache_size;
902 if (cachesize == -1) {
903 cachesize = 16; /* Pentiums, 2x8kB cache */
904 bandwidth = 100;
905 }
907 cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
908 }
910 cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
912 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
913 (long)cacheflush_time/(cpu_khz/1000),
914 ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
915 printk("task migration cache decay timeout: %ld msecs.\n",
916 cache_decay_ticks);
917 }
918 #else
919 #define smp_tune_scheduling() ((void)0)
920 #endif
922 /*
923 * Cycle through the processors sending APIC IPIs to boot each.
924 */
926 static int boot_cpu_logical_apicid;
927 /* Where the IO area was mapped on multiquad, always 0 otherwise */
928 void *xquad_portio;
930 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
931 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
932 EXPORT_SYMBOL(cpu_core_map);
934 static void __init smp_boot_cpus(unsigned int max_cpus)
935 {
936 int apicid, cpu, bit, kicked;
937 #ifdef BOGOMIPS
938 unsigned long bogosum = 0;
939 #endif
941 /*
942 * Setup boot CPU information
943 */
944 smp_store_cpu_info(0); /* Final full version of the data */
945 printk("CPU%d: ", 0);
946 print_cpu_info(&cpu_data[0]);
948 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
949 boot_cpu_logical_apicid = logical_smp_processor_id();
950 x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
952 /*current_thread_info()->cpu = 0;*/
953 smp_tune_scheduling();
954 cpus_clear(cpu_sibling_map[0]);
955 cpu_set(0, cpu_sibling_map[0]);
957 cpus_clear(cpu_core_map[0]);
958 cpu_set(0, cpu_core_map[0]);
960 /*
961 * If we couldn't find an SMP configuration at boot time,
962 * get out of here now!
963 */
964 if (!smp_found_config && !acpi_lapic) {
965 printk(KERN_NOTICE "SMP motherboard not detected.\n");
966 init_uniprocessor:
967 phys_cpu_present_map = physid_mask_of_physid(0);
968 if (APIC_init_uniprocessor())
969 printk(KERN_NOTICE "Local APIC not detected."
970 " Using dummy APIC emulation.\n");
971 map_cpu_to_logical_apicid();
972 cpu_set(0, cpu_sibling_map[0]);
973 cpu_set(0, cpu_core_map[0]);
974 return;
975 }
977 /*
978 * Should not be necessary because the MP table should list the boot
979 * CPU too, but we do it for the sake of robustness anyway.
980 * Makes no sense to do this check in clustered apic mode, so skip it
981 */
982 if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
983 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
984 boot_cpu_physical_apicid);
985 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
986 }
988 /*
989 * If we couldn't find a local APIC, then get out of here now!
990 */
991 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
992 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
993 boot_cpu_physical_apicid);
994 goto init_uniprocessor;
995 }
997 verify_local_APIC();
999 /*
1000 * If SMP should be disabled, then really disable it!
1001 */
1002 if (!max_cpus)
1003 goto init_uniprocessor;
1005 connect_bsp_APIC();
1006 setup_local_APIC();
1007 map_cpu_to_logical_apicid();
1010 setup_portio_remap();
1012 /*
1013 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
1015 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
1016 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
1017 * clustered apic ID.
1018 */
1019 Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
1021 kicked = 1;
1022 for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
1023 apicid = cpu_present_to_apicid(bit);
1024 /*
1025 * Don't even attempt to start the boot CPU!
1026 */
1027 if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
1028 continue;
1030 if (!check_apicid_present(bit))
1031 continue;
1032 if (max_cpus <= cpucount+1)
1033 continue;
1035 if (do_boot_cpu(apicid))
1036 printk("CPU #%d not responding - cannot use it.\n",
1037 apicid);
1038 else
1039 ++kicked;
1042 /*
1043 * Install writable page 0 entry to set BIOS data area.
1044 */
1045 local_flush_tlb();
1047 /*
1048 * Paranoid: Set warm reset code and vector here back
1049 * to default values.
1050 */
1051 CMOS_WRITE(0, 0xf);
1053 *((volatile long *) phys_to_virt(0x467)) = 0;
1055 #ifdef BOGOMIPS
1056 /*
1057 * Allow the user to impress friends.
1058 */
1059 Dprintk("Before bogomips.\n");
1060 for (cpu = 0; cpu < NR_CPUS; cpu++)
1061 if (cpu_isset(cpu, cpu_callout_map))
1062 bogosum += cpu_data[cpu].loops_per_jiffy;
1063 printk(KERN_INFO
1064 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
1065 cpucount+1,
1066 bogosum/(500000/HZ),
1067 (bogosum/(5000/HZ))%100);
1068 #else
1069 printk("Total of %d processors activated.\n", cpucount+1);
1070 #endif
1072 Dprintk("Before bogocount - setting activated=1.\n");
1074 if (smp_b_stepping)
1075 printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
1077 /*
1078 * Don't taint if we are running SMP kernel on a single non-MP
1079 * approved Athlon
1080 */
1081 if (tainted & TAINT_UNSAFE_SMP) {
1082 if (cpucount)
1083 printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
1084 else
1085 tainted &= ~TAINT_UNSAFE_SMP;
1088 Dprintk("Boot done.\n");
1090 /*
1091 * construct cpu_sibling_map[], so that we can tell sibling CPUs
1092 * efficiently.
1093 */
1094 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1095 cpus_clear(cpu_sibling_map[cpu]);
1096 cpus_clear(cpu_core_map[cpu]);
1099 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1100 struct cpuinfo_x86 *c = cpu_data + cpu;
1101 int siblings = 0;
1102 int i;
1103 if (!cpu_isset(cpu, cpu_callout_map))
1104 continue;
1106 if (smp_num_siblings > 1) {
1107 for (i = 0; i < NR_CPUS; i++) {
1108 if (!cpu_isset(i, cpu_callout_map))
1109 continue;
1110 if (cpu_core_id[cpu] == cpu_core_id[i]) {
1111 siblings++;
1112 cpu_set(i, cpu_sibling_map[cpu]);
1115 } else {
1116 siblings++;
1117 cpu_set(cpu, cpu_sibling_map[cpu]);
1120 if (siblings != smp_num_siblings) {
1121 printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
1122 smp_num_siblings = siblings;
1125 if (c->x86_num_cores > 1) {
1126 for (i = 0; i < NR_CPUS; i++) {
1127 if (!cpu_isset(i, cpu_callout_map))
1128 continue;
1129 if (phys_proc_id[cpu] == phys_proc_id[i]) {
1130 cpu_set(i, cpu_core_map[cpu]);
1133 } else {
1134 cpu_core_map[cpu] = cpu_sibling_map[cpu];
1138 if (nmi_watchdog == NMI_LOCAL_APIC)
1139 check_nmi_watchdog();
1141 /*
1142 * Here we can be sure that there is an IO-APIC in the system. Let's
1143 * go and set it up:
1144 */
1145 if (!skip_ioapic_setup && nr_ioapics)
1146 setup_IO_APIC();
1148 setup_boot_APIC_clock();
1150 /*
1151 * Synchronize the TSC with the AP
1152 */
1153 if (cpu_has_tsc && cpucount && cpu_khz)
1154 synchronize_tsc_bp();
1155 calibrate_tsc_bp();
1158 /* These are wrappers to interface to the new boot process. Someone
1159 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1160 void __init smp_prepare_cpus(unsigned int max_cpus)
1162 smp_boot_cpus(max_cpus);
1165 void __devinit smp_prepare_boot_cpu(void)
1167 cpu_set(smp_processor_id(), cpu_online_map);
1168 cpu_set(smp_processor_id(), cpu_callout_map);
1171 int __devinit __cpu_up(unsigned int cpu)
1173 /* This only works at boot for x86. See "rewrite" above. */
1174 if (cpu_isset(cpu, smp_commenced_mask))
1175 return -ENOSYS;
1177 /* In case one didn't come up */
1178 if (!cpu_isset(cpu, cpu_callin_map))
1179 return -EIO;
1181 /* Unleash the CPU! */
1182 cpu_set(cpu, smp_commenced_mask);
1183 while (!cpu_isset(cpu, cpu_online_map)) {
1184 mb();
1185 if (softirq_pending(0))
1186 do_softirq();
1189 return 0;
1192 void __init smp_cpus_done(unsigned int max_cpus)
1194 #ifdef CONFIG_X86_IO_APIC
1195 setup_ioapic_dest();
1196 #endif
1197 #ifdef CONFIG_X86_64
1198 zap_low_mappings();
1199 #endif
1200 /*
1201 * Disable executability of the SMP trampoline:
1202 */
1203 set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
1206 #if 0
1207 void __init smp_intr_init(void)
1209 /*
1210 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
1211 * IPI, driven by wakeup.
1212 */
1213 set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
1215 /* IPI for invalidation */
1216 set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
1218 /* IPI for generic function call */
1219 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
1221 #endif