ia64/xen-unstable

view linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c @ 7351:def91f2dbc89

Fix vcpu-hotplug xenbus watch handler and setup.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Oct 12 11:47:16 2005 +0100 (2005-10-12)
parents 74d56b7ff46c
children 5a97ee0633e8
line source
1 /*
2 * x86 SMP booting functions
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
6 *
7 * Much of the core SMP work is based on previous work by Thomas Radke, to
8 * whom a great many thanks are extended.
9 *
10 * Thanks to Intel for making available several different Pentium,
11 * Pentium Pro and Pentium-II/Xeon MP machines.
12 * Original development of Linux SMP code supported by Caldera.
13 *
14 * This code is released under the GNU General Public License version 2 or
15 * later.
16 *
17 * Fixes
18 * Felix Koop : NR_CPUS used properly
19 * Jose Renau : Handle single CPU case.
20 * Alan Cox : By repeated request 8) - Total BogoMIPS report.
21 * Greg Wright : Fix for kernel stacks panic.
22 * Erich Boleyn : MP v1.4 and additional changes.
23 * Matthias Sattler : Changes for 2.1 kernel map.
24 * Michel Lespinasse : Changes for 2.1 kernel map.
25 * Michael Chastain : Change trampoline.S to gnu as.
26 * Alan Cox : Dumb bug: 'B' step PPro's are fine
27 * Ingo Molnar : Added APIC timers, based on code
28 * from Jose Renau
29 * Ingo Molnar : various cleanups and rewrites
30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
32 * Martin J. Bligh : Added support for multi-quad systems
33 * Dave Jones : Report invalid combinations of Athlon CPUs.
34 * Rusty Russell : Hacked into shape for new "hotplug" boot process. */
36 #include <linux/module.h>
37 #include <linux/config.h>
38 #include <linux/init.h>
39 #include <linux/kernel.h>
41 #include <linux/mm.h>
42 #include <linux/sched.h>
43 #include <linux/kernel_stat.h>
44 #include <linux/smp_lock.h>
45 #include <linux/irq.h>
46 #include <linux/bootmem.h>
47 #include <linux/notifier.h>
48 #include <linux/cpu.h>
49 #include <linux/percpu.h>
51 #include <linux/delay.h>
52 #include <linux/mc146818rtc.h>
53 #include <asm/tlbflush.h>
54 #include <asm/desc.h>
55 #include <asm/arch_hooks.h>
57 #include <asm/smp_alt.h>
59 #ifndef CONFIG_X86_IO_APIC
60 #define Dprintk(args...)
61 #endif
62 #include <mach_wakecpu.h>
63 #include <smpboot_hooks.h>
65 #include <asm-xen/evtchn.h>
66 #include <asm-xen/xen-public/vcpu.h>
67 #include <asm-xen/xenbus.h>
69 /* Set if we find a B stepping CPU */
70 static int __initdata smp_b_stepping;
72 /* Number of siblings per CPU package */
73 int smp_num_siblings = 1;
74 int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
75 EXPORT_SYMBOL(phys_proc_id);
76 int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
77 EXPORT_SYMBOL(cpu_core_id);
79 /* bitmap of online cpus */
80 cpumask_t cpu_online_map;
82 cpumask_t cpu_callin_map;
83 cpumask_t cpu_callout_map;
84 static cpumask_t smp_commenced_mask;
86 /* Per CPU bogomips and other parameters */
87 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
89 u8 x86_cpu_to_apicid[NR_CPUS] =
90 { [0 ... NR_CPUS-1] = 0xff };
91 EXPORT_SYMBOL(x86_cpu_to_apicid);
93 #if 0
94 /*
95 * Trampoline 80x86 program as an array.
96 */
98 extern unsigned char trampoline_data [];
99 extern unsigned char trampoline_end [];
100 static unsigned char *trampoline_base;
101 static int trampoline_exec;
102 #endif
104 #ifdef CONFIG_HOTPLUG_CPU
105 /* State of each CPU. */
106 DEFINE_PER_CPU(int, cpu_state) = { 0 };
107 #endif
109 static DEFINE_PER_CPU(int, resched_irq);
110 static DEFINE_PER_CPU(int, callfunc_irq);
111 static char resched_name[NR_CPUS][15];
112 static char callfunc_name[NR_CPUS][15];
114 #if 0
115 /*
116 * Currently trivial. Write the real->protected mode
117 * bootstrap into the page concerned. The caller
118 * has made sure it's suitably aligned.
119 */
121 static unsigned long __init setup_trampoline(void)
122 {
123 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
124 return virt_to_phys(trampoline_base);
125 }
126 #endif
128 static void map_cpu_to_logical_apicid(void);
130 /*
131 * We are called very early to get the low memory for the
132 * SMP bootup trampoline page.
133 */
134 void __init smp_alloc_memory(void)
135 {
136 #if 0
137 trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
138 /*
139 * Has to be in very low memory so we can execute
140 * real-mode AP code.
141 */
142 if (__pa(trampoline_base) >= 0x9F000)
143 BUG();
144 /*
145 * Make the SMP trampoline executable:
146 */
147 trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
148 #endif
149 }
151 /*
152 * The bootstrap kernel entry code has set these up. Save them for
153 * a given CPU
154 */
156 static void __init smp_store_cpu_info(int id)
157 {
158 struct cpuinfo_x86 *c = cpu_data + id;
160 *c = boot_cpu_data;
161 if (id!=0)
162 identify_cpu(c);
163 /*
164 * Mask B, Pentium, but not Pentium MMX
165 */
166 if (c->x86_vendor == X86_VENDOR_INTEL &&
167 c->x86 == 5 &&
168 c->x86_mask >= 1 && c->x86_mask <= 4 &&
169 c->x86_model <= 3)
170 /*
171 * Remember we have B step Pentia with bugs
172 */
173 smp_b_stepping = 1;
175 /*
176 * Certain Athlons might work (for various values of 'work') in SMP
177 * but they are not certified as MP capable.
178 */
179 if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
181 /* Athlon 660/661 is valid. */
182 if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
183 goto valid_k7;
185 /* Duron 670 is valid */
186 if ((c->x86_model==7) && (c->x86_mask==0))
187 goto valid_k7;
189 /*
190 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
191 * It's worth noting that the A5 stepping (662) of some Athlon XP's
192 * have the MP bit set.
193 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
194 */
195 if (((c->x86_model==6) && (c->x86_mask>=2)) ||
196 ((c->x86_model==7) && (c->x86_mask>=1)) ||
197 (c->x86_model> 7))
198 if (cpu_has_mp)
199 goto valid_k7;
201 /* If we get here, it's not a certified SMP capable AMD system. */
202 tainted |= TAINT_UNSAFE_SMP;
203 }
205 valid_k7:
206 ;
207 }
209 #if 0
210 /*
211 * TSC synchronization.
212 *
213 * We first check whether all CPUs have their TSC's synchronized,
214 * then we print a warning if not, and always resync.
215 */
217 static atomic_t tsc_start_flag = ATOMIC_INIT(0);
218 static atomic_t tsc_count_start = ATOMIC_INIT(0);
219 static atomic_t tsc_count_stop = ATOMIC_INIT(0);
220 static unsigned long long tsc_values[NR_CPUS];
222 #define NR_LOOPS 5
224 static void __init synchronize_tsc_bp (void)
225 {
226 int i;
227 unsigned long long t0;
228 unsigned long long sum, avg;
229 long long delta;
230 unsigned long one_usec;
231 int buggy = 0;
233 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
235 /* convert from kcyc/sec to cyc/usec */
236 one_usec = cpu_khz / 1000;
238 atomic_set(&tsc_start_flag, 1);
239 wmb();
241 /*
242 * We loop a few times to get a primed instruction cache,
243 * then the last pass is more or less synchronized and
244 * the BP and APs set their cycle counters to zero all at
245 * once. This reduces the chance of having random offsets
246 * between the processors, and guarantees that the maximum
247 * delay between the cycle counters is never bigger than
248 * the latency of information-passing (cachelines) between
249 * two CPUs.
250 */
251 for (i = 0; i < NR_LOOPS; i++) {
252 /*
253 * all APs synchronize but they loop on '== num_cpus'
254 */
255 while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
256 mb();
257 atomic_set(&tsc_count_stop, 0);
258 wmb();
259 /*
260 * this lets the APs save their current TSC:
261 */
262 atomic_inc(&tsc_count_start);
264 rdtscll(tsc_values[smp_processor_id()]);
265 /*
266 * We clear the TSC in the last loop:
267 */
268 if (i == NR_LOOPS-1)
269 write_tsc(0, 0);
271 /*
272 * Wait for all APs to leave the synchronization point:
273 */
274 while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
275 mb();
276 atomic_set(&tsc_count_start, 0);
277 wmb();
278 atomic_inc(&tsc_count_stop);
279 }
281 sum = 0;
282 for (i = 0; i < NR_CPUS; i++) {
283 if (cpu_isset(i, cpu_callout_map)) {
284 t0 = tsc_values[i];
285 sum += t0;
286 }
287 }
288 avg = sum;
289 do_div(avg, num_booting_cpus());
291 sum = 0;
292 for (i = 0; i < NR_CPUS; i++) {
293 if (!cpu_isset(i, cpu_callout_map))
294 continue;
295 delta = tsc_values[i] - avg;
296 if (delta < 0)
297 delta = -delta;
298 /*
299 * We report bigger than 2 microseconds clock differences.
300 */
301 if (delta > 2*one_usec) {
302 long realdelta;
303 if (!buggy) {
304 buggy = 1;
305 printk("\n");
306 }
307 realdelta = delta;
308 do_div(realdelta, one_usec);
309 if (tsc_values[i] < avg)
310 realdelta = -realdelta;
312 printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
313 }
315 sum += delta;
316 }
317 if (!buggy)
318 printk("passed.\n");
319 }
321 static void __init synchronize_tsc_ap (void)
322 {
323 int i;
325 /*
326 * Not every cpu is online at the time
327 * this gets called, so we first wait for the BP to
328 * finish SMP initialization:
329 */
330 while (!atomic_read(&tsc_start_flag)) mb();
332 for (i = 0; i < NR_LOOPS; i++) {
333 atomic_inc(&tsc_count_start);
334 while (atomic_read(&tsc_count_start) != num_booting_cpus())
335 mb();
337 rdtscll(tsc_values[smp_processor_id()]);
338 if (i == NR_LOOPS-1)
339 write_tsc(0, 0);
341 atomic_inc(&tsc_count_stop);
342 while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
343 }
344 }
345 #undef NR_LOOPS
346 #endif
348 extern void calibrate_delay(void);
350 static atomic_t init_deasserted;
352 static void __init smp_callin(void)
353 {
354 int cpuid, phys_id;
355 unsigned long timeout;
357 #if 0
358 /*
359 * If waken up by an INIT in an 82489DX configuration
360 * we may get here before an INIT-deassert IPI reaches
361 * our local APIC. We have to wait for the IPI or we'll
362 * lock up on an APIC access.
363 */
364 wait_for_init_deassert(&init_deasserted);
365 #endif
367 /*
368 * (This works even if the APIC is not enabled.)
369 */
370 phys_id = smp_processor_id();
371 cpuid = smp_processor_id();
372 if (cpu_isset(cpuid, cpu_callin_map)) {
373 printk("huh, phys CPU#%d, CPU#%d already present??\n",
374 phys_id, cpuid);
375 BUG();
376 }
377 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
379 /*
380 * STARTUP IPIs are fragile beasts as they might sometimes
381 * trigger some glue motherboard logic. Complete APIC bus
382 * silence for 1 second, this overestimates the time the
383 * boot CPU is spending to send the up to 2 STARTUP IPIs
384 * by a factor of two. This should be enough.
385 */
387 /*
388 * Waiting 2s total for startup (udelay is not yet working)
389 */
390 timeout = jiffies + 2*HZ;
391 while (time_before(jiffies, timeout)) {
392 /*
393 * Has the boot CPU finished it's STARTUP sequence?
394 */
395 if (cpu_isset(cpuid, cpu_callout_map))
396 break;
397 rep_nop();
398 }
400 if (!time_before(jiffies, timeout)) {
401 printk("BUG: CPU%d started up but did not get a callout!\n",
402 cpuid);
403 BUG();
404 }
406 #if 0
407 /*
408 * the boot CPU has finished the init stage and is spinning
409 * on callin_map until we finish. We are free to set up this
410 * CPU, first the APIC. (this is probably redundant on most
411 * boards)
412 */
414 Dprintk("CALLIN, before setup_local_APIC().\n");
415 smp_callin_clear_local_apic();
416 setup_local_APIC();
417 #endif
418 map_cpu_to_logical_apicid();
420 /*
421 * Get our bogomips.
422 */
423 calibrate_delay();
424 Dprintk("Stack at about %p\n",&cpuid);
426 /*
427 * Save our processor parameters
428 */
429 smp_store_cpu_info(cpuid);
431 #if 0
432 disable_APIC_timer();
433 #endif
435 /*
436 * Allow the master to continue.
437 */
438 cpu_set(cpuid, cpu_callin_map);
440 #if 0
441 /*
442 * Synchronize the TSC with the BP
443 */
444 if (cpu_has_tsc && cpu_khz)
445 synchronize_tsc_ap();
446 #endif
447 }
449 static int cpucount;
451 extern void local_setup_timer(void);
453 /*
454 * Activate a secondary processor.
455 */
456 static void __init start_secondary(void *unused)
457 {
458 /*
459 * Dont put anything before smp_callin(), SMP
460 * booting is too fragile that we want to limit the
461 * things done here to the most necessary things.
462 */
463 cpu_init();
464 smp_callin();
465 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
466 rep_nop();
467 local_setup_timer();
468 smp_intr_init();
469 local_irq_enable();
470 /*
471 * low-memory mappings have been cleared, flush them from
472 * the local TLBs too.
473 */
474 local_flush_tlb();
475 cpu_set(smp_processor_id(), cpu_online_map);
477 /* We can take interrupts now: we're officially "up". */
478 local_irq_enable();
480 wmb();
481 cpu_idle();
482 }
484 /*
485 * Everything has been set up for the secondary
486 * CPUs - they just need to reload everything
487 * from the task structure
488 * This function must not return.
489 */
490 void __init initialize_secondary(void)
491 {
492 /*
493 * We don't actually need to load the full TSS,
494 * basically just the stack pointer and the eip.
495 */
497 asm volatile(
498 "movl %0,%%esp\n\t"
499 "jmp *%1"
500 :
501 :"r" (current->thread.esp),"r" (current->thread.eip));
502 }
504 extern struct {
505 void * esp;
506 unsigned short ss;
507 } stack_start;
509 #ifdef CONFIG_NUMA
511 /* which logical CPUs are on which nodes */
512 cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
513 { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
514 /* which node each logical CPU is on */
515 int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
516 EXPORT_SYMBOL(cpu_2_node);
518 /* set up a mapping between cpu and node. */
519 static inline void map_cpu_to_node(int cpu, int node)
520 {
521 printk("Mapping cpu %d to node %d\n", cpu, node);
522 cpu_set(cpu, node_2_cpu_mask[node]);
523 cpu_2_node[cpu] = node;
524 }
526 /* undo a mapping between cpu and node. */
527 static inline void unmap_cpu_to_node(int cpu)
528 {
529 int node;
531 printk("Unmapping cpu %d from all nodes\n", cpu);
532 for (node = 0; node < MAX_NUMNODES; node ++)
533 cpu_clear(cpu, node_2_cpu_mask[node]);
534 cpu_2_node[cpu] = 0;
535 }
536 #else /* !CONFIG_NUMA */
538 #define map_cpu_to_node(cpu, node) ({})
539 #define unmap_cpu_to_node(cpu) ({})
541 #endif /* CONFIG_NUMA */
543 u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
545 static void map_cpu_to_logical_apicid(void)
546 {
547 int cpu = smp_processor_id();
548 int apicid = smp_processor_id();
550 cpu_2_logical_apicid[cpu] = apicid;
551 map_cpu_to_node(cpu, apicid_to_node(apicid));
552 }
554 static void unmap_cpu_to_logical_apicid(int cpu)
555 {
556 cpu_2_logical_apicid[cpu] = BAD_APICID;
557 unmap_cpu_to_node(cpu);
558 }
560 #if APIC_DEBUG
561 static inline void __inquire_remote_apic(int apicid)
562 {
563 int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
564 char *names[] = { "ID", "VERSION", "SPIV" };
565 int timeout, status;
567 printk("Inquiring remote APIC #%d...\n", apicid);
569 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
570 printk("... APIC #%d %s: ", apicid, names[i]);
572 /*
573 * Wait for idle.
574 */
575 apic_wait_icr_idle();
577 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
578 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
580 timeout = 0;
581 do {
582 udelay(100);
583 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
584 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
586 switch (status) {
587 case APIC_ICR_RR_VALID:
588 status = apic_read(APIC_RRR);
589 printk("%08x\n", status);
590 break;
591 default:
592 printk("failed\n");
593 }
594 }
595 }
596 #endif
598 #if 0
599 #ifdef WAKE_SECONDARY_VIA_NMI
600 /*
601 * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
602 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
603 * won't ... remember to clear down the APIC, etc later.
604 */
605 static int __init
606 wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
607 {
608 unsigned long send_status = 0, accept_status = 0;
609 int timeout, maxlvt;
611 /* Target chip */
612 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
614 /* Boot on the stack */
615 /* Kick the second */
616 apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
618 Dprintk("Waiting for send to finish...\n");
619 timeout = 0;
620 do {
621 Dprintk("+");
622 udelay(100);
623 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
624 } while (send_status && (timeout++ < 1000));
626 /*
627 * Give the other CPU some time to accept the IPI.
628 */
629 udelay(200);
630 /*
631 * Due to the Pentium erratum 3AP.
632 */
633 maxlvt = get_maxlvt();
634 if (maxlvt > 3) {
635 apic_read_around(APIC_SPIV);
636 apic_write(APIC_ESR, 0);
637 }
638 accept_status = (apic_read(APIC_ESR) & 0xEF);
639 Dprintk("NMI sent.\n");
641 if (send_status)
642 printk("APIC never delivered???\n");
643 if (accept_status)
644 printk("APIC delivery error (%lx).\n", accept_status);
646 return (send_status | accept_status);
647 }
648 #endif /* WAKE_SECONDARY_VIA_NMI */
650 #ifdef WAKE_SECONDARY_VIA_INIT
651 static int __init
652 wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
653 {
654 unsigned long send_status = 0, accept_status = 0;
655 int maxlvt, timeout, num_starts, j;
657 /*
658 * Be paranoid about clearing APIC errors.
659 */
660 if (APIC_INTEGRATED(apic_version[phys_apicid])) {
661 apic_read_around(APIC_SPIV);
662 apic_write(APIC_ESR, 0);
663 apic_read(APIC_ESR);
664 }
666 Dprintk("Asserting INIT.\n");
668 /*
669 * Turn INIT on target chip
670 */
671 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
673 /*
674 * Send IPI
675 */
676 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
677 | APIC_DM_INIT);
679 Dprintk("Waiting for send to finish...\n");
680 timeout = 0;
681 do {
682 Dprintk("+");
683 udelay(100);
684 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
685 } while (send_status && (timeout++ < 1000));
687 mdelay(10);
689 Dprintk("Deasserting INIT.\n");
691 /* Target chip */
692 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
694 /* Send IPI */
695 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
697 Dprintk("Waiting for send to finish...\n");
698 timeout = 0;
699 do {
700 Dprintk("+");
701 udelay(100);
702 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
703 } while (send_status && (timeout++ < 1000));
705 atomic_set(&init_deasserted, 1);
707 /*
708 * Should we send STARTUP IPIs ?
709 *
710 * Determine this based on the APIC version.
711 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
712 */
713 if (APIC_INTEGRATED(apic_version[phys_apicid]))
714 num_starts = 2;
715 else
716 num_starts = 0;
718 /*
719 * Run STARTUP IPI loop.
720 */
721 Dprintk("#startup loops: %d.\n", num_starts);
723 maxlvt = get_maxlvt();
725 for (j = 1; j <= num_starts; j++) {
726 Dprintk("Sending STARTUP #%d.\n",j);
727 apic_read_around(APIC_SPIV);
728 apic_write(APIC_ESR, 0);
729 apic_read(APIC_ESR);
730 Dprintk("After apic_write.\n");
732 /*
733 * STARTUP IPI
734 */
736 /* Target chip */
737 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
739 /* Boot on the stack */
740 /* Kick the second */
741 apic_write_around(APIC_ICR, APIC_DM_STARTUP
742 | (start_eip >> 12));
744 /*
745 * Give the other CPU some time to accept the IPI.
746 */
747 udelay(300);
749 Dprintk("Startup point 1.\n");
751 Dprintk("Waiting for send to finish...\n");
752 timeout = 0;
753 do {
754 Dprintk("+");
755 udelay(100);
756 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
757 } while (send_status && (timeout++ < 1000));
759 /*
760 * Give the other CPU some time to accept the IPI.
761 */
762 udelay(200);
763 /*
764 * Due to the Pentium erratum 3AP.
765 */
766 if (maxlvt > 3) {
767 apic_read_around(APIC_SPIV);
768 apic_write(APIC_ESR, 0);
769 }
770 accept_status = (apic_read(APIC_ESR) & 0xEF);
771 if (send_status || accept_status)
772 break;
773 }
774 Dprintk("After Startup.\n");
776 if (send_status)
777 printk("APIC never delivered???\n");
778 if (accept_status)
779 printk("APIC delivery error (%lx).\n", accept_status);
781 return (send_status | accept_status);
782 }
783 #endif /* WAKE_SECONDARY_VIA_INIT */
784 #endif
786 extern cpumask_t cpu_initialized;
788 static int __init do_boot_cpu(int apicid)
789 /*
790 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
791 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
792 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
793 */
794 {
795 struct task_struct *idle;
796 unsigned long boot_error;
797 int timeout, cpu;
798 unsigned long start_eip;
799 #if 0
800 unsigned short nmi_high = 0, nmi_low = 0;
801 #endif
802 vcpu_guest_context_t ctxt;
803 extern void startup_32_smp(void);
804 extern void hypervisor_callback(void);
805 extern void failsafe_callback(void);
806 extern void smp_trap_init(trap_info_t *);
808 cpu = ++cpucount;
809 /*
810 * We can't use kernel_thread since we must avoid to
811 * reschedule the child.
812 */
813 idle = fork_idle(cpu);
814 if (IS_ERR(idle))
815 panic("failed fork for CPU %d", cpu);
816 idle->thread.eip = (unsigned long) start_secondary;
817 /* start_eip had better be page-aligned! */
818 start_eip = (unsigned long)startup_32_smp;
820 /* So we see what's up */
821 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
822 /* Stack for startup_32 can be just as for start_secondary onwards */
823 stack_start.esp = (void *) idle->thread.esp;
825 irq_ctx_init(cpu);
827 /*
828 * This grunge runs the startup process for
829 * the targeted processor.
830 */
832 atomic_set(&init_deasserted, 0);
834 #if 1
835 cpu_gdt_descr[cpu].address = __get_free_page(GFP_KERNEL|__GFP_ZERO);
836 BUG_ON(cpu_gdt_descr[0].size > PAGE_SIZE);
837 cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
838 memcpy((void *)cpu_gdt_descr[cpu].address,
839 (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
841 memset(&ctxt, 0, sizeof(ctxt));
843 ctxt.user_regs.ds = __USER_DS;
844 ctxt.user_regs.es = __USER_DS;
845 ctxt.user_regs.fs = 0;
846 ctxt.user_regs.gs = 0;
847 ctxt.user_regs.ss = __KERNEL_DS;
848 ctxt.user_regs.cs = __KERNEL_CS;
849 ctxt.user_regs.eip = start_eip;
850 ctxt.user_regs.esp = idle->thread.esp;
851 #define X86_EFLAGS_IOPL_RING1 0x1000
852 ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING1;
854 /* FPU is set up to default initial state. */
855 memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
857 smp_trap_init(ctxt.trap_ctxt);
859 /* No LDT. */
860 ctxt.ldt_ents = 0;
862 {
863 unsigned long va;
864 int f;
866 for (va = cpu_gdt_descr[cpu].address, f = 0;
867 va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
868 va += PAGE_SIZE, f++) {
869 ctxt.gdt_frames[f] = virt_to_mfn(va);
870 make_page_readonly((void *)va);
871 }
872 ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
873 }
875 /* Ring 1 stack is the initial stack. */
876 ctxt.kernel_ss = __KERNEL_DS;
877 ctxt.kernel_sp = idle->thread.esp;
879 /* Callback handlers. */
880 ctxt.event_callback_cs = __KERNEL_CS;
881 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
882 ctxt.failsafe_callback_cs = __KERNEL_CS;
883 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
885 ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
887 boot_error = HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt);
888 if (boot_error)
889 printk("boot error: %ld\n", boot_error);
891 if (!boot_error) {
892 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
894 /*
895 * allow APs to start initializing.
896 */
897 Dprintk("Before Callout %d.\n", cpu);
898 cpu_set(cpu, cpu_callout_map);
899 Dprintk("After Callout %d.\n", cpu);
901 /*
902 * Wait 5s total for a response
903 */
904 for (timeout = 0; timeout < 50000; timeout++) {
905 if (cpu_isset(cpu, cpu_callin_map))
906 break; /* It has booted */
907 udelay(100);
908 }
910 if (cpu_isset(cpu, cpu_callin_map)) {
911 /* number CPUs logically, starting from 1 (BSP is 0) */
912 Dprintk("OK.\n");
913 printk("CPU%d: ", cpu);
914 print_cpu_info(&cpu_data[cpu]);
915 Dprintk("CPU has booted.\n");
916 } else {
917 boot_error= 1;
918 }
919 }
920 x86_cpu_to_apicid[cpu] = apicid;
921 if (boot_error) {
922 /* Try to put things back the way they were before ... */
923 unmap_cpu_to_logical_apicid(cpu);
924 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
925 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
926 cpucount--;
927 }
929 #else
930 Dprintk("Setting warm reset code and vector.\n");
932 store_NMI_vector(&nmi_high, &nmi_low);
934 smpboot_setup_warm_reset_vector(start_eip);
936 /*
937 * Starting actual IPI sequence...
938 */
939 boot_error = wakeup_secondary_cpu(apicid, start_eip);
941 if (!boot_error) {
942 /*
943 * allow APs to start initializing.
944 */
945 Dprintk("Before Callout %d.\n", cpu);
946 cpu_set(cpu, cpu_callout_map);
947 Dprintk("After Callout %d.\n", cpu);
949 /*
950 * Wait 5s total for a response
951 */
952 for (timeout = 0; timeout < 50000; timeout++) {
953 if (cpu_isset(cpu, cpu_callin_map))
954 break; /* It has booted */
955 udelay(100);
956 }
958 if (cpu_isset(cpu, cpu_callin_map)) {
959 /* number CPUs logically, starting from 1 (BSP is 0) */
960 Dprintk("OK.\n");
961 printk("CPU%d: ", cpu);
962 print_cpu_info(&cpu_data[cpu]);
963 Dprintk("CPU has booted.\n");
964 } else {
965 boot_error= 1;
966 if (*((volatile unsigned char *)trampoline_base)
967 == 0xA5)
968 /* trampoline started but...? */
969 printk("Stuck ??\n");
970 else
971 /* trampoline code not run */
972 printk("Not responding.\n");
973 inquire_remote_apic(apicid);
974 }
975 }
976 x86_cpu_to_apicid[cpu] = apicid;
977 if (boot_error) {
978 /* Try to put things back the way they were before ... */
979 unmap_cpu_to_logical_apicid(cpu);
980 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
981 cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
982 cpucount--;
983 }
985 /* mark "stuck" area as not stuck */
986 *((volatile unsigned long *)trampoline_base) = 0;
987 #endif
989 return boot_error;
990 }
992 static void smp_tune_scheduling (void)
993 {
994 unsigned long cachesize; /* kB */
995 unsigned long bandwidth = 350; /* MB/s */
996 /*
997 * Rough estimation for SMP scheduling, this is the number of
998 * cycles it takes for a fully memory-limited process to flush
999 * the SMP-local cache.
1001 * (For a P5 this pretty much means we will choose another idle
1002 * CPU almost always at wakeup time (this is due to the small
1003 * L1 cache), on PIIs it's around 50-100 usecs, depending on
1004 * the cache size)
1005 */
1007 if (!cpu_khz) {
1008 /*
1009 * this basically disables processor-affinity
1010 * scheduling on SMP without a TSC.
1011 */
1012 return;
1013 } else {
1014 cachesize = boot_cpu_data.x86_cache_size;
1015 if (cachesize == -1) {
1016 cachesize = 16; /* Pentiums, 2x8kB cache */
1017 bandwidth = 100;
1022 /*
1023 * Cycle through the processors sending APIC IPIs to boot each.
1024 */
1026 #if 0
1027 static int boot_cpu_logical_apicid;
1028 #endif
1029 /* Where the IO area was mapped on multiquad, always 0 otherwise */
1030 void *xquad_portio;
1032 cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
1033 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
1034 EXPORT_SYMBOL(cpu_core_map);
1036 static void __init smp_boot_cpus(unsigned int max_cpus)
1038 int cpu, kicked;
1039 unsigned long bogosum = 0;
1040 #if 0
1041 int apicid, bit;
1042 #endif
1044 /*
1045 * Setup boot CPU information
1046 */
1047 smp_store_cpu_info(0); /* Final full version of the data */
1048 printk("CPU%d: ", 0);
1049 print_cpu_info(&cpu_data[0]);
1051 #if 0
1052 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
1053 boot_cpu_logical_apicid = logical_smp_processor_id();
1054 x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
1055 #else
1056 // boot_cpu_physical_apicid = 0;
1057 // boot_cpu_logical_apicid = 0;
1058 x86_cpu_to_apicid[0] = 0;
1059 #endif
1061 current_thread_info()->cpu = 0;
1062 smp_tune_scheduling();
1063 cpus_clear(cpu_sibling_map[0]);
1064 cpu_set(0, cpu_sibling_map[0]);
1066 cpus_clear(cpu_core_map[0]);
1067 cpu_set(0, cpu_core_map[0]);
1069 #ifdef CONFIG_X86_IO_APIC
1070 /*
1071 * If we couldn't find an SMP configuration at boot time,
1072 * get out of here now!
1073 */
1074 if (!smp_found_config && !acpi_lapic) {
1075 printk(KERN_NOTICE "SMP motherboard not detected.\n");
1076 smpboot_clear_io_apic_irqs();
1077 #if 0
1078 phys_cpu_present_map = physid_mask_of_physid(0);
1079 #endif
1080 #ifdef CONFIG_X86_LOCAL_APIC
1081 if (APIC_init_uniprocessor())
1082 printk(KERN_NOTICE "Local APIC not detected."
1083 " Using dummy APIC emulation.\n");
1084 #endif
1085 map_cpu_to_logical_apicid();
1086 cpu_set(0, cpu_sibling_map[0]);
1087 cpu_set(0, cpu_core_map[0]);
1088 return;
1090 #endif
1092 #if 0
1093 /*
1094 * Should not be necessary because the MP table should list the boot
1095 * CPU too, but we do it for the sake of robustness anyway.
1096 * Makes no sense to do this check in clustered apic mode, so skip it
1097 */
1098 if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
1099 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
1100 boot_cpu_physical_apicid);
1101 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
1104 /*
1105 * If we couldn't find a local APIC, then get out of here now!
1106 */
1107 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
1108 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1109 boot_cpu_physical_apicid);
1110 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
1111 smpboot_clear_io_apic_irqs();
1112 phys_cpu_present_map = physid_mask_of_physid(0);
1113 cpu_set(0, cpu_sibling_map[0]);
1114 cpu_set(0, cpu_core_map[0]);
1115 cpu_set(0, cpu_sibling_map[0]);
1116 cpu_set(0, cpu_core_map[0]);
1117 return;
1120 verify_local_APIC();
1121 #endif
1123 /*
1124 * If SMP should be disabled, then really disable it!
1125 */
1126 if (!max_cpus) {
1127 HYPERVISOR_shared_info->n_vcpu = 1;
1128 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
1129 smpboot_clear_io_apic_irqs();
1130 #if 0
1131 phys_cpu_present_map = physid_mask_of_physid(0);
1132 #endif
1133 return;
1136 smp_intr_init();
1138 #if 0
1139 connect_bsp_APIC();
1140 setup_local_APIC();
1141 #endif
1142 map_cpu_to_logical_apicid();
1143 #if 0
1146 setup_portio_remap();
1148 /*
1149 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
1151 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
1152 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the
1153 * clustered apic ID.
1154 */
1155 Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
1156 #endif
1157 Dprintk("CPU present map: %lx\n",
1158 (1UL << HYPERVISOR_shared_info->n_vcpu) - 1);
1160 kicked = 1;
1161 for (cpu = 1; kicked < NR_CPUS &&
1162 cpu < HYPERVISOR_shared_info->n_vcpu; cpu++) {
1163 if (max_cpus <= cpucount+1)
1164 continue;
1166 #ifdef CONFIG_SMP_ALTERNATIVES
1167 if (kicked == 1)
1168 prepare_for_smp();
1169 #endif
1170 if (do_boot_cpu(cpu))
1171 printk("CPU #%d not responding - cannot use it.\n",
1172 cpu);
1173 else
1174 ++kicked;
1177 #if 0
1178 /*
1179 * Cleanup possible dangling ends...
1180 */
1181 smpboot_restore_warm_reset_vector();
1182 #endif
1184 /*
1185 * Allow the user to impress friends.
1186 */
1187 Dprintk("Before bogomips.\n");
1188 for (cpu = 0; cpu < NR_CPUS; cpu++)
1189 if (cpu_isset(cpu, cpu_callout_map))
1190 bogosum += cpu_data[cpu].loops_per_jiffy;
1191 printk(KERN_INFO
1192 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
1193 cpucount+1,
1194 bogosum/(500000/HZ),
1195 (bogosum/(5000/HZ))%100);
1197 Dprintk("Before bogocount - setting activated=1.\n");
1199 if (smp_b_stepping)
1200 printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
1202 /*
1203 * Don't taint if we are running SMP kernel on a single non-MP
1204 * approved Athlon
1205 */
1206 if (tainted & TAINT_UNSAFE_SMP) {
1207 if (cpucount)
1208 printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
1209 else
1210 tainted &= ~TAINT_UNSAFE_SMP;
1213 Dprintk("Boot done.\n");
1215 /*
1216 * construct cpu_sibling_map[], so that we can tell sibling CPUs
1217 * efficiently.
1218 */
1219 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1220 cpus_clear(cpu_sibling_map[cpu]);
1221 cpus_clear(cpu_core_map[cpu]);
1224 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1225 struct cpuinfo_x86 *c = cpu_data + cpu;
1226 int siblings = 0;
1227 int i;
1228 if (!cpu_isset(cpu, cpu_callout_map))
1229 continue;
1231 if (smp_num_siblings > 1) {
1232 for (i = 0; i < NR_CPUS; i++) {
1233 if (!cpu_isset(i, cpu_callout_map))
1234 continue;
1235 if (cpu_core_id[cpu] == cpu_core_id[i]) {
1236 siblings++;
1237 cpu_set(i, cpu_sibling_map[cpu]);
1240 } else {
1241 siblings++;
1242 cpu_set(cpu, cpu_sibling_map[cpu]);
1245 if (siblings != smp_num_siblings) {
1246 printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
1247 smp_num_siblings = siblings;
1250 if (c->x86_num_cores > 1) {
1251 for (i = 0; i < NR_CPUS; i++) {
1252 if (!cpu_isset(i, cpu_callout_map))
1253 continue;
1254 if (phys_proc_id[cpu] == phys_proc_id[i]) {
1255 cpu_set(i, cpu_core_map[cpu]);
1258 } else {
1259 cpu_core_map[cpu] = cpu_sibling_map[cpu];
1263 smpboot_setup_io_apic();
1265 #if 0
1266 setup_boot_APIC_clock();
1268 /*
1269 * Synchronize the TSC with the AP
1270 */
1271 if (cpu_has_tsc && cpucount && cpu_khz)
1272 synchronize_tsc_bp();
1273 #endif
1276 /* These are wrappers to interface to the new boot process. Someone
1277 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1278 void __init smp_prepare_cpus(unsigned int max_cpus)
1280 smp_commenced_mask = cpumask_of_cpu(0);
1281 cpu_callin_map = cpumask_of_cpu(0);
1282 mb();
1283 smp_boot_cpus(max_cpus);
1286 void __devinit smp_prepare_boot_cpu(void)
1288 cpu_set(smp_processor_id(), cpu_online_map);
1289 cpu_set(smp_processor_id(), cpu_callout_map);
1292 #ifdef CONFIG_HOTPLUG_CPU
1294 static void handle_vcpu_hotplug_event(
1295 struct xenbus_watch *watch, const char **vec, unsigned int len)
1297 int err, cpu;
1298 char dir[32], state[32];
1299 char *cpustr;
1300 const char *node = vec[XS_WATCH_PATH];
1302 if ((cpustr = strstr(node, "cpu/")) == NULL)
1303 return;
1305 sscanf(cpustr, "cpu/%d", &cpu);
1307 sprintf(dir, "cpu/%d", cpu);
1308 err = xenbus_scanf(NULL, dir, "availability", "%s", state);
1309 if (err != 1) {
1310 printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
1311 return;
1314 if (strcmp(state, "online") == 0)
1315 (void)cpu_up(cpu);
1316 else if (strcmp(state, "offline") == 0)
1317 (void)cpu_down(cpu);
1318 else
1319 printk(KERN_ERR "XENBUS: unknown state(%s) on node(%s)\n",
1320 state, node);
1323 static int setup_cpu_watcher(struct notifier_block *notifier,
1324 unsigned long event, void *data)
1326 static struct xenbus_watch cpu_watch = {
1327 .node = "cpu",
1328 .callback = handle_vcpu_hotplug_event };
1329 (void)register_xenbus_watch(&cpu_watch);
1330 return NOTIFY_DONE;
1333 static int __init setup_vcpu_hotplug_event(void)
1335 static struct notifier_block xsn_cpu = {
1336 .notifier_call = setup_cpu_watcher };
1337 register_xenstore_notifier(&xsn_cpu);
1338 return 0;
1341 subsys_initcall(setup_vcpu_hotplug_event);
1343 /* must be called with the cpucontrol mutex held */
1344 static int __devinit cpu_enable(unsigned int cpu)
1346 #ifdef CONFIG_SMP_ALTERNATIVES
1347 if (num_online_cpus() == 1)
1348 prepare_for_smp();
1349 #endif
1351 /* get the target out of its holding state */
1352 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1353 wmb();
1355 /* wait for the processor to ack it. timeout? */
1356 while (!cpu_online(cpu))
1357 cpu_relax();
1359 fixup_irqs(cpu_online_map);
1361 /* counter the disable in fixup_irqs() */
1362 local_irq_enable();
1363 return 0;
1366 int __cpu_disable(void)
1368 cpumask_t map = cpu_online_map;
1369 int cpu = smp_processor_id();
1371 /*
1372 * Perhaps use cpufreq to drop frequency, but that could go
1373 * into generic code.
1375 * We won't take down the boot processor on i386 due to some
1376 * interrupts only being able to be serviced by the BSP.
1377 * Especially so if we're not using an IOAPIC -zwane
1378 */
1379 if (cpu == 0)
1380 return -EBUSY;
1382 cpu_clear(cpu, map);
1383 fixup_irqs(map);
1385 /* It's now safe to remove this processor from the online map */
1386 cpu_clear(cpu, cpu_online_map);
1388 #ifdef CONFIG_SMP_ALTERNATIVES
1389 if (num_online_cpus() == 1)
1390 unprepare_for_smp();
1391 #endif
1393 return 0;
1396 void __cpu_die(unsigned int cpu)
1398 /* We don't do anything here: idle task is faking death itself. */
1399 unsigned int i;
1401 for (i = 0; i < 10; i++) {
1402 /* They ack this in play_dead by setting CPU_DEAD */
1403 if (per_cpu(cpu_state, cpu) == CPU_DEAD)
1404 return;
1405 current->state = TASK_UNINTERRUPTIBLE;
1406 schedule_timeout(HZ/10);
1408 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1411 #else /* ... !CONFIG_HOTPLUG_CPU */
1412 int __cpu_disable(void)
1414 return -ENOSYS;
1417 void __cpu_die(unsigned int cpu)
1419 /* We said "no" in __cpu_disable */
1420 BUG();
1422 #endif /* CONFIG_HOTPLUG_CPU */
1424 int __devinit __cpu_up(unsigned int cpu)
1426 /* In case one didn't come up */
1427 if (!cpu_isset(cpu, cpu_callin_map)) {
1428 printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
1429 local_irq_enable();
1430 return -EIO;
1433 #ifdef CONFIG_HOTPLUG_CPU
1434 #ifdef CONFIG_XEN
1435 /* Tell hypervisor to bring vcpu up. */
1436 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
1437 #endif
1438 /* Already up, and in cpu_quiescent now? */
1439 if (cpu_isset(cpu, smp_commenced_mask)) {
1440 cpu_enable(cpu);
1441 return 0;
1443 #endif
1445 local_irq_enable();
1446 /* Unleash the CPU! */
1447 cpu_set(cpu, smp_commenced_mask);
1448 while (!cpu_isset(cpu, cpu_online_map))
1449 mb();
1450 return 0;
1453 void __init smp_cpus_done(unsigned int max_cpus)
1455 #if 1
1456 #else
1457 #ifdef CONFIG_X86_IO_APIC
1458 setup_ioapic_dest();
1459 #endif
1460 zap_low_mappings();
1461 /*
1462 * Disable executability of the SMP trampoline:
1463 */
1464 set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
1465 #endif
1468 extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
1469 extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
1471 void smp_intr_init(void)
1473 int cpu = smp_processor_id();
1475 per_cpu(resched_irq, cpu) =
1476 bind_ipi_to_irq(RESCHEDULE_VECTOR);
1477 sprintf(resched_name[cpu], "resched%d", cpu);
1478 BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
1479 SA_INTERRUPT, resched_name[cpu], NULL));
1481 per_cpu(callfunc_irq, cpu) =
1482 bind_ipi_to_irq(CALL_FUNCTION_VECTOR);
1483 sprintf(callfunc_name[cpu], "callfunc%d", cpu);
1484 BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
1485 smp_call_function_interrupt,
1486 SA_INTERRUPT, callfunc_name[cpu], NULL));
1489 static void smp_intr_exit(void)
1491 int cpu = smp_processor_id();
1493 free_irq(per_cpu(resched_irq, cpu), NULL);
1494 unbind_ipi_from_irq(RESCHEDULE_VECTOR);
1496 free_irq(per_cpu(callfunc_irq, cpu), NULL);
1497 unbind_ipi_from_irq(CALL_FUNCTION_VECTOR);
1500 extern void local_setup_timer_irq(void);
1501 extern void local_teardown_timer_irq(void);
1503 void smp_suspend(void)
1505 local_teardown_timer_irq();
1506 smp_intr_exit();
1509 void smp_resume(void)
1511 smp_intr_init();
1512 local_setup_timer();
1515 void vcpu_prepare(int vcpu)
1517 extern void hypervisor_callback(void);
1518 extern void failsafe_callback(void);
1519 extern void smp_trap_init(trap_info_t *);
1520 extern void cpu_restore(void);
1521 vcpu_guest_context_t ctxt;
1522 struct task_struct *idle = idle_task(vcpu);
1524 if (vcpu == 0)
1525 return;
1527 memset(&ctxt, 0, sizeof(ctxt));
1529 ctxt.user_regs.ds = __USER_DS;
1530 ctxt.user_regs.es = __USER_DS;
1531 ctxt.user_regs.fs = 0;
1532 ctxt.user_regs.gs = 0;
1533 ctxt.user_regs.ss = __KERNEL_DS;
1534 ctxt.user_regs.cs = __KERNEL_CS;
1535 ctxt.user_regs.eip = (unsigned long)cpu_restore;
1536 ctxt.user_regs.esp = idle->thread.esp;
1537 ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING1;
1539 memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
1541 smp_trap_init(ctxt.trap_ctxt);
1543 ctxt.ldt_ents = 0;
1545 ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[vcpu].address);
1546 ctxt.gdt_ents = cpu_gdt_descr[vcpu].size / 8;
1548 ctxt.kernel_ss = __KERNEL_DS;
1549 ctxt.kernel_sp = idle->thread.esp0;
1551 ctxt.event_callback_cs = __KERNEL_CS;
1552 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
1553 ctxt.failsafe_callback_cs = __KERNEL_CS;
1554 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
1556 ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
1558 (void)HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt);
1559 (void)HYPERVISOR_vcpu_op(VCPUOP_up, vcpu, NULL);
1562 /*
1563 * Local variables:
1564 * c-file-style: "linux"
1565 * indent-tabs-mode: t
1566 * c-indent-level: 8
1567 * c-basic-offset: 8
1568 * tab-width: 8
1569 * End:
1570 */