ia64/xen-unstable

view xen/arch/ia64/xen/xensetup.c @ 15154:b46c2ff6dfb0

[IA64] Fix initialization order for buddy allocator

Fix initialization order of buddy allocator to avoid panic
on machines with multi NUMA node.

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
author Alex Williamson <alex.williamson@hp.com>
date Thu May 31 14:47:08 2007 -0600 (2007-05-31)
parents 90f19b7667f7
children cbf749e9961f
line source
1 /******************************************************************************
2 * xensetup.c
3 * Copyright (c) 2004-2005 Hewlett-Packard Co
4 * Dan Magenheimer <dan.magenheimer@hp.com>
5 */
7 #include <xen/config.h>
8 #include <xen/lib.h>
9 #include <xen/errno.h>
10 //#include <xen/spinlock.h>
11 #include <xen/multiboot.h>
12 #include <xen/sched.h>
13 #include <xen/mm.h>
14 #include <public/version.h>
15 #include <xen/gdbstub.h>
16 #include <xen/version.h>
17 #include <xen/console.h>
18 #include <xen/domain.h>
19 #include <xen/serial.h>
20 #include <xen/trace.h>
21 #include <xen/keyhandler.h>
22 #include <asm/meminit.h>
23 #include <asm/page.h>
24 #include <asm/setup.h>
25 #include <xen/string.h>
26 #include <asm/vmx.h>
27 #include <linux/efi.h>
28 #include <asm/iosapic.h>
29 #include <xen/softirq.h>
30 #include <xen/rcupdate.h>
31 #include <acm/acm_hooks.h>
32 #include <asm/sn/simulator.h>
34 unsigned long xenheap_phys_end, total_pages;
36 char saved_command_line[COMMAND_LINE_SIZE];
37 char __initdata dom0_command_line[COMMAND_LINE_SIZE];
39 cpumask_t cpu_present_map;
41 extern unsigned long domain0_ready;
43 int find_max_pfn (unsigned long, unsigned long, void *);
45 /* FIXME: which header these declarations should be there ? */
46 extern long is_platform_hp_ski(void);
47 extern void early_setup_arch(char **);
48 extern void late_setup_arch(char **);
49 extern void hpsim_serial_init(void);
50 extern void alloc_dom0(void);
51 extern void setup_per_cpu_areas(void);
52 extern void mem_init(void);
53 extern void init_IRQ(void);
54 extern void trap_init(void);
55 extern void xen_patch_kernel(void);
57 /* opt_nosmp: If true, secondary processors are ignored. */
58 static int opt_nosmp;
59 boolean_param("nosmp", opt_nosmp);
61 /* maxcpus: maximum number of CPUs to activate. */
62 static unsigned int __initdata max_cpus = NR_CPUS;
63 integer_param("maxcpus", max_cpus);
65 /* xencons: if true enable xenconsole input (and irq).
66 Note: you have to disable 8250 serials in domains (to avoid use of the
67 same resource). */
68 static int __initdata opt_xencons = 1;
69 integer_param("xencons", opt_xencons);
71 /* Toggle to allow non-legacy xencons UARTs to run in polling mode */
72 static int __initdata opt_xencons_poll;
73 boolean_param("xencons_poll", opt_xencons_poll);
75 /*
76 * opt_xenheap_megabytes: Size of Xen heap in megabytes, including:
77 * xen image
78 * bootmap bits
79 * xen heap
80 * Note: To allow xenheap size configurable, the prerequisite is
81 * to configure elilo allowing relocation defaultly. Then since
82 * elilo chooses 256M as alignment when relocating, alignment issue
83 * on IPF can be addressed.
84 */
85 unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
86 unsigned long xenheap_size = XENHEAP_DEFAULT_SIZE;
87 extern long running_on_sim;
88 unsigned long xen_pstart;
89 void *xen_pickle_offset __read_mostly;
91 static int __init
92 xen_count_pages(u64 start, u64 end, void *arg)
93 {
94 unsigned long *count = arg;
96 /* FIXME: do we need consider difference between DMA-usable memory and
97 * normal memory? Seems that HV has no requirement to operate DMA which
98 * is owned by Dom0? */
99 *count += (end - start) >> PAGE_SHIFT;
100 return 0;
101 }
103 static void __init do_initcalls(void)
104 {
105 initcall_t *call;
106 for ( call = &__initcall_start; call < &__initcall_end; call++ )
107 (*call)();
108 }
110 /*
111 * IPF loader only supports one commaind line currently, for
112 * both xen and guest kernel. This function provides pre-parse
113 * to mixed command line, to split it into two parts.
114 *
115 * User should split the parameters by "--", with strings after
116 * spliter for guest kernel. Missing "--" means whole line belongs
117 * to guest. Example:
118 * "com2=57600,8n1 console=com2 -- console=ttyS1 console=tty
119 * root=/dev/sda3 ro"
120 */
121 static char null[4] = { 0 };
123 void __init early_cmdline_parse(char **cmdline_p)
124 {
125 char *guest_cmd;
126 static const char * const split = "--";
128 if (*cmdline_p == NULL) {
129 *cmdline_p = &null[0];
130 saved_command_line[0] = '\0';
131 dom0_command_line[0] = '\0';
132 return;
133 }
135 guest_cmd = strstr(*cmdline_p, split);
136 /* If no spliter, whole line is for guest */
137 if (guest_cmd == NULL) {
138 guest_cmd = *cmdline_p;
139 *cmdline_p = &null[0];
140 } else {
141 *guest_cmd = '\0'; /* Split boot parameters for xen and guest */
142 guest_cmd += strlen(split);
143 while (*guest_cmd == ' ') guest_cmd++;
144 }
146 strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
147 strlcpy(dom0_command_line, guest_cmd, COMMAND_LINE_SIZE);
148 return;
149 }
151 struct ns16550_defaults ns16550_com1 = {
152 .data_bits = 8,
153 .parity = 'n',
154 .stop_bits = 1
155 };
157 unsigned int ns16550_com1_gsi;
158 unsigned int ns16550_com1_polarity;
159 unsigned int ns16550_com1_trigger;
161 struct ns16550_defaults ns16550_com2 = {
162 .data_bits = 8,
163 .parity = 'n',
164 .stop_bits = 1
165 };
167 /* efi_print: print efi table at boot */
168 static int __initdata opt_efi_print;
169 boolean_param("efi_print", opt_efi_print);
171 /* print EFI memory map: */
172 static void __init
173 efi_print(void)
174 {
175 void *efi_map_start, *efi_map_end;
176 u64 efi_desc_size;
178 efi_memory_desc_t *md;
179 void *p;
180 int i;
182 if (!opt_efi_print)
183 return;
185 efi_map_start = __va(ia64_boot_param->efi_memmap);
186 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
187 efi_desc_size = ia64_boot_param->efi_memdesc_size;
189 for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
190 md = p;
191 printk("mem%02u: type=%2u, attr=0x%016lx, range=[0x%016lx-0x%016lx) "
192 "(%luMB)\n", i, md->type, md->attribute, md->phys_addr,
193 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
194 md->num_pages >> (20 - EFI_PAGE_SHIFT));
195 }
196 }
198 /*
199 * These functions are utility functions for getting and
200 * testing memory descriptors for allocating the xenheap area.
201 */
202 static efi_memory_desc_t *
203 efi_get_md (unsigned long phys_addr)
204 {
205 void *efi_map_start, *efi_map_end, *p;
206 efi_memory_desc_t *md;
207 u64 efi_desc_size;
209 efi_map_start = __va(ia64_boot_param->efi_memmap);
210 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
211 efi_desc_size = ia64_boot_param->efi_memdesc_size;
213 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
214 md = p;
215 if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
216 return md;
217 }
218 return 0;
219 }
221 static int
222 is_xenheap_usable_memory(efi_memory_desc_t *md)
223 {
224 if (!(md->attribute & EFI_MEMORY_WB))
225 return 0;
227 switch (md->type) {
228 case EFI_LOADER_CODE:
229 case EFI_LOADER_DATA:
230 case EFI_BOOT_SERVICES_CODE:
231 case EFI_BOOT_SERVICES_DATA:
232 case EFI_CONVENTIONAL_MEMORY:
233 return 1;
234 }
235 return 0;
236 }
238 static inline int
239 md_overlaps(efi_memory_desc_t *md, unsigned long phys_addr)
240 {
241 return (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT));
242 }
244 #define MD_SIZE(md) (md->num_pages << EFI_PAGE_SHIFT)
246 extern char __init_begin[], __init_end[];
247 static void noinline init_done(void)
248 {
249 memset(__init_begin, 0, __init_end - __init_begin);
250 flush_icache_range((unsigned long)__init_begin, (unsigned long)__init_end);
251 init_xenheap_pages(__pa(__init_begin), __pa(__init_end));
252 printk("Freed %ldkB init memory.\n",
253 (long)(__init_end-__init_begin)>>10);
255 startup_cpu_idle_loop();
256 }
258 void __init start_kernel(void)
259 {
260 char *cmdline;
261 unsigned long nr_pages;
262 unsigned long dom0_memory_start, dom0_memory_size;
263 unsigned long dom0_initrd_start, dom0_initrd_size;
264 unsigned long md_end, relo_start, relo_end, relo_size = 0;
265 struct domain *idle_domain;
266 struct vcpu *dom0_vcpu0;
267 efi_memory_desc_t *kern_md, *last_md, *md;
268 void *xen_heap_start;
269 #ifdef CONFIG_SMP
270 int i;
271 #endif
273 /* Be sure the struct shared_info size is <= XSI_SIZE. */
274 BUILD_BUG_ON(sizeof(struct shared_info) > XSI_SIZE);
276 running_on_sim = is_platform_hp_ski();
277 /* Kernel may be relocated by EFI loader */
278 xen_pstart = ia64_tpa(KERNEL_START);
280 early_setup_arch(&cmdline);
282 /* We initialise the serial devices very early so we can get debugging. */
283 if (running_on_sim)
284 hpsim_serial_init();
285 else {
286 ns16550_init(0, &ns16550_com1);
287 ns16550_init(1, &ns16550_com2);
288 }
289 serial_init_preirq();
291 init_console();
292 set_printk_prefix("(XEN) ");
294 if (running_on_sim || ia64_boot_param->domain_start == 0 ||
295 ia64_boot_param->domain_size == 0) {
296 /* This is possible only with the old elilo, which does not support
297 a vmm. Fix now, and continue without initrd. */
298 printk ("Your elilo is not Xen-aware. Bootparams fixed\n");
299 ia64_boot_param->domain_start = ia64_boot_param->initrd_start;
300 ia64_boot_param->domain_size = ia64_boot_param->initrd_size;
301 ia64_boot_param->initrd_start = 0;
302 ia64_boot_param->initrd_size = 0;
303 }
305 printk("Xen command line: %s\n", saved_command_line);
306 /* xenheap should be in same TR-covered range with xen image */
307 xenheap_phys_end = xen_pstart + xenheap_size;
308 printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n",
309 xen_pstart, xenheap_phys_end);
311 xen_patch_kernel();
313 kern_md = md = efi_get_md(xen_pstart);
314 md_end = __pa(ia64_imva(&_end));
315 relo_start = xenheap_phys_end;
317 /*
318 * Scan through the memory descriptors after the kernel
319 * image to make sure we have enough room for the xenheap
320 * area, pushing out whatever may already be there.
321 */
322 while (relo_start + relo_size >= md_end) {
323 md = efi_get_md(md_end);
325 BUG_ON(!md);
326 BUG_ON(!is_xenheap_usable_memory(md));
328 md_end = md->phys_addr + MD_SIZE(md);
329 /*
330 * The dom0 kernel or initrd could overlap, reserve space
331 * at the end to relocate them later.
332 */
333 if (md->type == EFI_LOADER_DATA) {
334 /* Test for ranges we're not prepared to move */
335 BUG_ON(md_overlaps(md, __pa(ia64_boot_param)) ||
336 md_overlaps(md, ia64_boot_param->efi_memmap) ||
337 md_overlaps(md, ia64_boot_param->command_line));
339 relo_size += MD_SIZE(md);
340 /* If range overlaps the end, push out the relocation start */
341 if (md_end > relo_start)
342 relo_start = md_end;
343 }
344 }
345 last_md = md;
346 relo_end = relo_start + relo_size;
348 md_end = __pa(ia64_imva(&_end));
350 /*
351 * Move any relocated data out into the previously found relocation
352 * area. Any extra memory descriptrs are moved out to the end
353 * and set to zero pages.
354 */
355 for (md = efi_get_md(md_end) ;; md = efi_get_md(md_end)) {
356 md_end = md->phys_addr + MD_SIZE(md);
358 if (md->type == EFI_LOADER_DATA) {
359 unsigned long relo_offset;
361 if (md_overlaps(md, ia64_boot_param->domain_start)) {
362 relo_offset = ia64_boot_param->domain_start - md->phys_addr;
363 printk("Moving Dom0 kernel image: 0x%lx -> 0x%lx (%ld KiB)\n",
364 ia64_boot_param->domain_start, relo_start + relo_offset,
365 ia64_boot_param->domain_size >> 10);
366 ia64_boot_param->domain_start = relo_start + relo_offset;
367 }
368 if (ia64_boot_param->initrd_size &&
369 md_overlaps(md, ia64_boot_param->initrd_start)) {
370 relo_offset = ia64_boot_param->initrd_start - md->phys_addr;
371 printk("Moving Dom0 initrd image: 0x%lx -> 0x%lx (%ld KiB)\n",
372 ia64_boot_param->initrd_start, relo_start + relo_offset,
373 ia64_boot_param->initrd_size >> 10);
374 ia64_boot_param->initrd_start = relo_start + relo_offset;
375 }
376 memcpy(__va(relo_start), __va(md->phys_addr), MD_SIZE(md));
377 relo_start += MD_SIZE(md);
378 }
380 if (md == kern_md)
381 continue;
382 if (md == last_md)
383 break;
385 md->phys_addr = relo_end;
386 md->num_pages = 0;
387 }
389 /* Trim the last entry */
390 md->phys_addr = relo_end;
391 md->num_pages = (md_end - relo_end) >> EFI_PAGE_SHIFT;
393 /*
394 * Expand the new kernel/xenheap (and maybe dom0/initrd) out to
395 * the full size. This range will already be type EFI_LOADER_DATA,
396 * therefore the xenheap area is now protected being allocated for
397 * use by find_memmap_space() in efi.c
398 */
399 kern_md->num_pages = (relo_end - kern_md->phys_addr) >> EFI_PAGE_SHIFT;
401 reserve_memory();
403 /* first find highest page frame number */
404 max_page = 0;
405 efi_memmap_walk(find_max_pfn, &max_page);
406 printk("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page);
407 efi_print();
409 /*
410 * later [__init_begin, __init_end) will be freed up as xen heap
411 * so that struct domain might be allocated from the init area
412 * which is < xen_heap_start. so we can't simply set
413 * xen_pickle_offset = xen_heap_start.
414 */
415 xen_pickle_offset = ia64_imva(__init_begin);
417 xen_heap_start = memguard_init(ia64_imva(&_end));
418 printk("Before xen_heap_start: %p\n", xen_heap_start);
419 xen_heap_start = __va(init_boot_allocator(__pa(xen_heap_start)));
420 printk("After xen_heap_start: %p\n", xen_heap_start);
422 efi_memmap_walk(filter_rsvd_memory, init_boot_pages);
423 efi_memmap_walk(xen_count_pages, &nr_pages);
425 printk("System RAM: %luMB (%lukB)\n",
426 nr_pages >> (20 - PAGE_SHIFT),
427 nr_pages << (PAGE_SHIFT - 10));
428 total_pages = nr_pages;
430 init_frametable();
432 trap_init();
434 alloc_dom0();
436 init_xenheap_pages(__pa(xen_heap_start), xenheap_phys_end);
437 printk("Xen heap: %luMB (%lukB)\n",
438 (xenheap_phys_end-__pa(xen_heap_start)) >> 20,
439 (xenheap_phys_end-__pa(xen_heap_start)) >> 10);
441 end_boot_allocator();
443 late_setup_arch(&cmdline);
445 scheduler_init();
446 idle_vcpu[0] = (struct vcpu*) ia64_r13;
447 idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0);
448 if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) )
449 BUG();
451 alloc_dom_xen_and_dom_io();
452 setup_per_cpu_areas();
453 mem_init();
455 local_irq_disable();
456 init_IRQ ();
457 init_xen_time(); /* initialise the time */
458 timer_init();
460 rcu_init();
462 #ifdef CONFIG_XEN_IA64_TLBFLUSH_CLOCK
463 open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
464 #endif
466 #ifdef CONFIG_SMP
467 if ( opt_nosmp )
468 {
469 max_cpus = 0;
470 smp_num_siblings = 1;
471 //boot_cpu_data.x86_num_cores = 1;
472 }
474 /* A vcpu is created for the idle domain on every physical cpu.
475 Limit the number of cpus to the maximum number of vcpus. */
476 if (max_cpus > MAX_VIRT_CPUS)
477 max_cpus = MAX_VIRT_CPUS;
479 smp_prepare_cpus(max_cpus);
481 /* We aren't hotplug-capable yet. */
482 for_each_cpu ( i )
483 cpu_set(i, cpu_present_map);
485 /* Enable IRQ to receive IPI (needed for ITC sync). */
486 local_irq_enable();
488 printk("num_online_cpus=%d, max_cpus=%d\n",num_online_cpus(),max_cpus);
489 for_each_present_cpu ( i )
490 {
491 if ( num_online_cpus() >= max_cpus )
492 break;
493 if ( !cpu_online(i) ) {
494 rcu_online_cpu(i);
495 __cpu_up(i);
496 }
497 }
499 local_irq_disable();
501 printk("Brought up %ld CPUs\n", (long)num_online_cpus());
502 smp_cpus_done(max_cpus);
503 #endif
505 initialise_gdb(); /* could be moved earlier */
507 do_initcalls();
508 sort_main_extable();
510 init_rid_allocator ();
512 local_irq_enable();
514 if (opt_xencons) {
515 initialize_keytable();
516 if (ns16550_com1_gsi) {
517 if (opt_xencons_poll ||
518 iosapic_register_intr(ns16550_com1_gsi,
519 ns16550_com1_polarity,
520 ns16550_com1_trigger) < 0) {
521 ns16550_com1.irq = 0;
522 ns16550_init(0, &ns16550_com1);
523 }
524 }
525 serial_init_postirq();
527 /* Hide the HCDP table from dom0 */
528 efi.hcdp = NULL;
529 }
531 expose_p2m_init();
533 /* Create initial domain 0. */
534 dom0 = domain_create(0, 0, DOM0_SSIDREF);
535 if (dom0 == NULL)
536 panic("Error creating domain 0\n");
537 dom0_vcpu0 = alloc_vcpu(dom0, 0, 0);
538 if (dom0_vcpu0 == NULL || vcpu_late_initialise(dom0_vcpu0) != 0)
539 panic("Cannot allocate dom0 vcpu 0\n");
541 dom0->is_privileged = 1;
543 /*
544 * We're going to setup domain0 using the module(s) that we stashed safely
545 * above our heap. The second module, if present, is an initrd ramdisk.
546 */
547 dom0_memory_start = (unsigned long) __va(ia64_boot_param->domain_start);
548 dom0_memory_size = ia64_boot_param->domain_size;
549 dom0_initrd_start = (unsigned long) __va(ia64_boot_param->initrd_start);
550 dom0_initrd_size = ia64_boot_param->initrd_size;
552 if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_size,
553 dom0_initrd_start,dom0_initrd_size,
554 0) != 0)
555 panic("Could not set up DOM0 guest OS\n");
557 if (!running_on_sim && !IS_MEDUSA()) // slow on ski and pages are pre-initialized to zero
558 scrub_heap_pages();
560 init_trace_bufs();
562 if (opt_xencons) {
563 console_endboot();
564 serial_endboot();
565 }
567 domain0_ready = 1;
569 domain_unpause_by_systemcontroller(dom0);
571 init_done();
572 }
574 void arch_get_xen_caps(xen_capabilities_info_t *info)
575 {
576 /* Interface name is always xen-3.0-* for Xen-3.x. */
577 int major = 3, minor = 0;
578 char s[32];
580 (*info)[0] = '\0';
582 snprintf(s, sizeof(s), "xen-%d.%d-ia64 ", major, minor);
583 safe_strcat(*info, s);
585 snprintf(s, sizeof(s), "xen-%d.%d-ia64be ", major, minor);
586 safe_strcat(*info, s);
588 if (vmx_enabled)
589 {
590 snprintf(s, sizeof(s), "hvm-%d.%d-ia64 ", major, minor);
591 safe_strcat(*info, s);
592 }
593 }