ia64/xen-unstable

view xen/arch/x86/setup.c @ 9878:ef0a56c0784d

Add BUILD_BUG_ON() and a handful of users.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Apr 27 15:55:09 2006 +0100 (2006-04-27)
parents 428babd7c1e0
children 4122e88b6c75
line source
2 #include <xen/config.h>
3 #include <xen/init.h>
4 #include <xen/lib.h>
5 #include <xen/sched.h>
6 #include <xen/domain.h>
7 #include <xen/serial.h>
8 #include <xen/softirq.h>
9 #include <xen/acpi.h>
10 #include <xen/console.h>
11 #include <xen/serial.h>
12 #include <xen/trace.h>
13 #include <xen/multiboot.h>
14 #include <xen/domain_page.h>
15 #include <xen/compile.h>
16 #include <xen/gdbstub.h>
17 #include <xen/percpu.h>
18 #include <public/version.h>
19 #include <asm/bitops.h>
20 #include <asm/smp.h>
21 #include <asm/processor.h>
22 #include <asm/mpspec.h>
23 #include <asm/apic.h>
24 #include <asm/desc.h>
25 #include <asm/shadow.h>
26 #include <asm/e820.h>
27 #include <acm/acm_hooks.h>
29 extern void dmi_scan_machine(void);
30 extern void generic_apic_probe(void);
32 /*
33 * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
34 * page_info table and allocation bitmap.
35 */
36 static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
37 #if defined(CONFIG_X86_64)
38 integer_param("xenheap_megabytes", opt_xenheap_megabytes);
39 #endif
41 /* opt_nosmp: If true, secondary processors are ignored. */
42 static int opt_nosmp = 0;
43 boolean_param("nosmp", opt_nosmp);
45 /* maxcpus: maximum number of CPUs to activate. */
46 static unsigned int max_cpus = NR_CPUS;
47 integer_param("maxcpus", max_cpus);
49 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
50 static int opt_watchdog = 0;
51 boolean_param("watchdog", opt_watchdog);
53 /* **** Linux config option: propagated to domain0. */
54 /* "acpi=off": Sisables both ACPI table parsing and interpreter. */
55 /* "acpi=force": Override the disable blacklist. */
56 /* "acpi=strict": Disables out-of-spec workarounds. */
57 /* "acpi=ht": Limit ACPI just to boot-time to enable HT. */
58 /* "acpi=noirq": Disables ACPI interrupt routing. */
59 static void parse_acpi_param(char *s);
60 custom_param("acpi", parse_acpi_param);
62 /* **** Linux config option: propagated to domain0. */
63 /* acpi_skip_timer_override: Skip IRQ0 overrides. */
64 extern int acpi_skip_timer_override;
65 boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
67 /* **** Linux config option: propagated to domain0. */
68 /* noapic: Disable IOAPIC setup. */
69 extern int skip_ioapic_setup;
70 boolean_param("noapic", skip_ioapic_setup);
72 int early_boot = 1;
74 cpumask_t cpu_present_map;
76 /* Limits of Xen heap, used to initialise the allocator. */
77 unsigned long xenheap_phys_start, xenheap_phys_end;
79 extern void arch_init_memory(void);
80 extern void init_IRQ(void);
81 extern void trap_init(void);
82 extern void early_time_init(void);
83 extern void initialize_keytable(void);
84 extern void early_cpu_init(void);
86 struct tss_struct init_tss[NR_CPUS];
88 struct vcpu *idle_vcpu[NR_CPUS];
90 extern unsigned long cpu0_stack[];
92 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
94 #if CONFIG_PAGING_LEVELS > 2
95 unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
96 #else
97 unsigned long mmu_cr4_features = X86_CR4_PSE;
98 #endif
99 EXPORT_SYMBOL(mmu_cr4_features);
101 int acpi_disabled;
103 int acpi_force;
104 char acpi_param[10] = "";
105 static void parse_acpi_param(char *s)
106 {
107 /* Save the parameter so it can be propagated to domain0. */
108 strncpy(acpi_param, s, sizeof(acpi_param));
109 acpi_param[sizeof(acpi_param)-1] = '\0';
111 /* Interpret the parameter for use within Xen. */
112 if ( !strcmp(s, "off") )
113 {
114 disable_acpi();
115 }
116 else if ( !strcmp(s, "force") )
117 {
118 acpi_force = 1;
119 acpi_ht = 1;
120 acpi_disabled = 0;
121 }
122 else if ( !strcmp(s, "strict") )
123 {
124 acpi_strict = 1;
125 }
126 else if ( !strcmp(s, "ht") )
127 {
128 if ( !acpi_force )
129 disable_acpi();
130 acpi_ht = 1;
131 }
132 else if ( !strcmp(s, "noirq") )
133 {
134 acpi_noirq_set();
135 }
136 }
138 static void __init do_initcalls(void)
139 {
140 initcall_t *call;
141 for ( call = &__initcall_start; call < &__initcall_end; call++ )
142 (*call)();
143 }
145 #define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
147 static struct e820entry e820_raw[E820MAX];
149 static unsigned long initial_images_start, initial_images_end;
151 unsigned long initial_images_nrpages(void)
152 {
153 unsigned long s = initial_images_start + PAGE_SIZE - 1;
154 unsigned long e = initial_images_end;
155 return ((e >> PAGE_SHIFT) - (s >> PAGE_SHIFT));
156 }
158 void discard_initial_images(void)
159 {
160 init_domheap_pages(initial_images_start, initial_images_end);
161 }
163 extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[];
165 static void percpu_init_areas(void)
166 {
167 unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start;
169 BUG_ON(data_size > PERCPU_SIZE);
171 for ( i = 1; i < NR_CPUS; i++ )
172 memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
173 __per_cpu_start,
174 data_size);
175 }
177 static void percpu_free_unused_areas(void)
178 {
179 unsigned int i, first_unused;
181 /* Find first unused CPU number. */
182 for ( i = 0; i < NR_CPUS; i++ )
183 if ( !cpu_online(i) )
184 break;
185 first_unused = i;
187 /* Check that there are no holes in cpu_online_map. */
188 for ( ; i < NR_CPUS; i++ )
189 BUG_ON(cpu_online(i));
191 init_xenheap_pages(__pa(__per_cpu_start) + (first_unused << PERCPU_SHIFT),
192 __pa(__per_cpu_end));
193 }
195 void __init __start_xen(multiboot_info_t *mbi)
196 {
197 char __cmdline[] = "", *cmdline = __cmdline;
198 struct domain *idle_domain;
199 unsigned long _initrd_start = 0, _initrd_len = 0;
200 unsigned int initrdidx = 1;
201 module_t *mod = (module_t *)__va(mbi->mods_addr);
202 unsigned long nr_pages, modules_length;
203 paddr_t s, e;
204 int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0;
205 struct ns16550_defaults ns16550 = {
206 .data_bits = 8,
207 .parity = 'n',
208 .stop_bits = 1
209 };
211 /* Parse the command-line options. */
212 if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
213 cmdline = __va(mbi->cmdline);
214 cmdline_parse(cmdline);
216 set_current((struct vcpu *)0xfffff000); /* debug sanity */
217 set_processor_id(0); /* needed early, for smp_processor_id() */
219 smp_prepare_boot_cpu();
221 /* We initialise the serial devices very early so we can get debugging. */
222 ns16550.io_base = 0x3f8;
223 ns16550.irq = 4;
224 ns16550_init(0, &ns16550);
225 ns16550.io_base = 0x2f8;
226 ns16550.irq = 3;
227 ns16550_init(1, &ns16550);
228 serial_init_preirq();
230 init_console();
232 printf("Command line: %s\n", cmdline);
234 /* Check that we have at least one Multiboot module. */
235 if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
236 {
237 printk("FATAL ERROR: dom0 kernel not specified."
238 " Check bootloader configuration.\n");
239 EARLY_FAIL();
240 }
242 if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
243 {
244 printk("FATAL ERROR: Misaligned CPU0 stack.\n");
245 EARLY_FAIL();
246 }
248 percpu_init_areas();
250 xenheap_phys_end = opt_xenheap_megabytes << 20;
252 if ( mbi->flags & MBI_MEMMAP )
253 {
254 while ( bytes < mbi->mmap_length )
255 {
256 memory_map_t *map = __va(mbi->mmap_addr + bytes);
258 /*
259 * This is a gross workaround for a BIOS bug. Some bootloaders do
260 * not write e820 map entries into pre-zeroed memory. This is
261 * okay if the BIOS fills in all fields of the map entry, but
262 * some broken BIOSes do not bother to write the high word of
263 * the length field if the length is smaller than 4GB. We
264 * detect and fix this by flagging sections below 4GB that
265 * appear to be larger than 4GB in size.
266 */
267 if ( (map->base_addr_high == 0) && (map->length_high != 0) )
268 {
269 e820_warn = 1;
270 map->length_high = 0;
271 }
273 e820_raw[e820_raw_nr].addr =
274 ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
275 e820_raw[e820_raw_nr].size =
276 ((u64)map->length_high << 32) | (u64)map->length_low;
277 e820_raw[e820_raw_nr].type =
278 (map->type > E820_SHARED_PAGE) ? E820_RESERVED : map->type;
279 e820_raw_nr++;
281 bytes += map->size + 4;
282 }
283 }
284 else if ( mbi->flags & MBI_MEMLIMITS )
285 {
286 e820_raw[0].addr = 0;
287 e820_raw[0].size = mbi->mem_lower << 10;
288 e820_raw[0].type = E820_RAM;
289 e820_raw[1].addr = 0x100000;
290 e820_raw[1].size = mbi->mem_upper << 10;
291 e820_raw[1].type = E820_RAM;
292 e820_raw_nr = 2;
293 }
294 else
295 {
296 printk("FATAL ERROR: Bootloader provided no memory information.\n");
297 for ( ; ; ) ;
298 }
300 if ( e820_warn )
301 printk("WARNING: Buggy e820 map detected and fixed "
302 "(truncated length fields).\n");
304 max_page = init_e820(e820_raw, &e820_raw_nr);
306 modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
308 /* Find a large enough RAM extent to stash the DOM0 modules. */
309 for ( i = 0; ; i++ )
310 {
311 if ( i == e820.nr_map )
312 {
313 printk("Not enough memory to stash the DOM0 kernel image.\n");
314 for ( ; ; ) ;
315 }
317 if ( (e820.map[i].type == E820_RAM) &&
318 (e820.map[i].size >= modules_length) &&
319 ((e820.map[i].addr + e820.map[i].size) >=
320 (xenheap_phys_end + modules_length)) )
321 break;
322 }
324 /* Stash as near as possible to the beginning of the RAM extent. */
325 initial_images_start = e820.map[i].addr;
326 if ( initial_images_start < xenheap_phys_end )
327 initial_images_start = xenheap_phys_end;
328 initial_images_end = initial_images_start + modules_length;
330 #if defined(CONFIG_X86_32)
331 memmove((void *)initial_images_start, /* use low mapping */
332 (void *)mod[0].mod_start, /* use low mapping */
333 mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
334 #elif defined(CONFIG_X86_64)
335 memmove(__va(initial_images_start),
336 __va(mod[0].mod_start),
337 mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
338 #endif
340 /* Initialise boot-time allocator with all RAM situated after modules. */
341 xenheap_phys_start = init_boot_allocator(__pa(&_end));
342 nr_pages = 0;
343 for ( i = 0; i < e820.nr_map; i++ )
344 {
345 if ( e820.map[i].type != E820_RAM )
346 continue;
348 nr_pages += e820.map[i].size >> PAGE_SHIFT;
350 /* Initialise boot heap, skipping Xen heap and dom0 modules. */
351 s = e820.map[i].addr;
352 e = s + e820.map[i].size;
353 if ( s < xenheap_phys_end )
354 s = xenheap_phys_end;
355 if ( (s < initial_images_end) && (e > initial_images_start) )
356 s = initial_images_end;
357 init_boot_pages(s, e);
359 #if defined (CONFIG_X86_64)
360 /*
361 * x86/64 maps all registered RAM. Points to note:
362 * 1. The initial pagetable already maps low 1GB, so skip that.
363 * 2. We must map *only* RAM areas, taking care to avoid I/O holes.
364 * Failure to do this can cause coherency problems and deadlocks
365 * due to cache-attribute mismatches (e.g., AMD/AGP Linux bug).
366 */
367 {
368 /* Calculate page-frame range, discarding partial frames. */
369 unsigned long start, end;
370 unsigned long init_mapped = 1UL << (30 - PAGE_SHIFT); /* 1GB */
371 start = PFN_UP(e820.map[i].addr);
372 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
373 /* Clip the range to exclude what the bootstrapper initialised. */
374 if ( end < init_mapped )
375 continue;
376 if ( start < init_mapped )
377 start = init_mapped;
378 /* Request the mapping. */
379 map_pages_to_xen(
380 PAGE_OFFSET + (start << PAGE_SHIFT),
381 start, end-start, PAGE_HYPERVISOR);
382 }
383 #endif
384 }
386 memguard_init();
388 printk("System RAM: %luMB (%lukB)\n",
389 nr_pages >> (20 - PAGE_SHIFT),
390 nr_pages << (PAGE_SHIFT - 10));
391 total_pages = nr_pages;
393 /* Sanity check for unwanted bloat of dom0_op structure. */
394 BUILD_BUG_ON(sizeof(((struct dom0_op *)0)->u) !=
395 sizeof(((struct dom0_op *)0)->u.pad));
397 BUILD_BUG_ON(sizeof(start_info_t) > PAGE_SIZE);
398 BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
399 BUILD_BUG_ON(sizeof(vcpu_info_t) != 64);
401 /* __foo are defined in public headers. Check they match internal defs. */
402 BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
403 #ifdef HYPERVISOR_VIRT_END
404 BUILD_BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END);
405 #endif
407 init_frametable();
409 end_boot_allocator();
411 /* Initialise the Xen heap, skipping RAM holes. */
412 nr_pages = 0;
413 for ( i = 0; i < e820.nr_map; i++ )
414 {
415 if ( e820.map[i].type != E820_RAM )
416 continue;
418 s = e820.map[i].addr;
419 e = s + e820.map[i].size;
420 if ( s < xenheap_phys_start )
421 s = xenheap_phys_start;
422 if ( e > xenheap_phys_end )
423 e = xenheap_phys_end;
425 if ( s < e )
426 {
427 nr_pages += (e - s) >> PAGE_SHIFT;
428 init_xenheap_pages(s, e);
429 }
430 }
432 printk("Xen heap: %luMB (%lukB)\n",
433 nr_pages >> (20 - PAGE_SHIFT),
434 nr_pages << (PAGE_SHIFT - 10));
436 early_boot = 0;
438 early_cpu_init();
440 scheduler_init();
442 idle_domain = domain_create(IDLE_DOMAIN_ID, 0);
443 BUG_ON(idle_domain == NULL);
445 set_current(idle_domain->vcpu[0]);
446 this_cpu(curr_vcpu) = idle_domain->vcpu[0];
447 idle_vcpu[0] = current;
449 paging_init();
451 /* Unmap the first page of CPU0's stack. */
452 memguard_guard_stack(cpu0_stack);
454 open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
456 if ( opt_watchdog )
457 nmi_watchdog = NMI_LOCAL_APIC;
459 sort_exception_tables();
461 find_smp_config();
463 smp_alloc_memory();
465 dmi_scan_machine();
467 generic_apic_probe();
469 acpi_boot_table_init();
470 acpi_boot_init();
472 if ( smp_found_config )
473 get_smp_config();
475 init_apic_mappings();
477 init_IRQ();
479 trap_init();
481 timer_init();
483 early_time_init();
485 arch_init_memory();
487 identify_cpu(&boot_cpu_data);
488 if ( cpu_has_fxsr )
489 set_in_cr4(X86_CR4_OSFXSR);
490 if ( cpu_has_xmm )
491 set_in_cr4(X86_CR4_OSXMMEXCPT);
493 if ( opt_nosmp )
494 max_cpus = 0;
496 smp_prepare_cpus(max_cpus);
498 /*
499 * Initialise higher-level timer functions. We do this fairly late
500 * (post-SMP) because the time bases and scale factors need to be updated
501 * regularly, and SMP initialisation can cause a long delay with
502 * interrupts not yet enabled.
503 */
504 init_xen_time();
506 initialize_keytable();
508 serial_init_postirq();
510 BUG_ON(!local_irq_is_enabled());
512 for_each_present_cpu ( i )
513 {
514 if ( num_online_cpus() >= max_cpus )
515 break;
516 if ( !cpu_online(i) )
517 __cpu_up(i);
518 }
520 printk("Brought up %ld CPUs\n", (long)num_online_cpus());
521 smp_cpus_done(max_cpus);
523 percpu_free_unused_areas();
525 initialise_gdb(); /* could be moved earlier */
527 do_initcalls();
529 schedulers_start();
531 if ( opt_watchdog )
532 watchdog_enable();
534 shadow_mode_init();
536 /* initialize access control security module */
537 acm_init(&initrdidx, mbi, initial_images_start);
539 /* Create initial domain 0. */
540 dom0 = domain_create(0, 0);
541 if ( dom0 == NULL )
542 panic("Error creating domain 0\n");
544 set_bit(_DOMF_privileged, &dom0->domain_flags);
545 /* post-create hooks sets security label */
546 acm_post_domain0_create(dom0->domain_id);
548 /* Grab the DOM0 command line. */
549 cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
550 if ( cmdline != NULL )
551 {
552 static char dom0_cmdline[MAX_GUEST_CMDLINE];
554 /* Skip past the image name and copy to a local buffer. */
555 while ( *cmdline == ' ' ) cmdline++;
556 if ( (cmdline = strchr(cmdline, ' ')) != NULL )
557 {
558 while ( *cmdline == ' ' ) cmdline++;
559 strcpy(dom0_cmdline, cmdline);
560 }
562 cmdline = dom0_cmdline;
564 /* Append any extra parameters. */
565 if ( skip_ioapic_setup && !strstr(cmdline, "noapic") )
566 strcat(cmdline, " noapic");
567 if ( acpi_skip_timer_override &&
568 !strstr(cmdline, "acpi_skip_timer_override") )
569 strcat(cmdline, " acpi_skip_timer_override");
570 if ( (strlen(acpi_param) != 0) && !strstr(cmdline, "acpi=") )
571 {
572 strcat(cmdline, " acpi=");
573 strcat(cmdline, acpi_param);
574 }
575 }
577 if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
578 {
579 _initrd_start = initial_images_start +
580 (mod[initrdidx].mod_start - mod[0].mod_start);
581 _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
582 }
584 /*
585 * We're going to setup domain0 using the module(s) that we stashed safely
586 * above our heap. The second module, if present, is an initrd ramdisk.
587 */
588 if ( construct_dom0(dom0,
589 initial_images_start,
590 mod[0].mod_end-mod[0].mod_start,
591 _initrd_start,
592 _initrd_len,
593 cmdline) != 0)
594 panic("Could not set up DOM0 guest OS\n");
596 /* Scrub RAM that is still free and so may go to an unprivileged domain. */
597 scrub_heap_pages();
599 init_trace_bufs();
601 /* Give up the VGA console if DOM0 is configured to grab it. */
602 console_endboot(cmdline && strstr(cmdline, "tty0"));
604 /* Hide UART from DOM0 if we're using it */
605 serial_endboot();
607 domain_unpause_by_systemcontroller(dom0);
609 startup_cpu_idle_loop();
610 }
612 void arch_get_xen_caps(xen_capabilities_info_t info)
613 {
614 char *p = info;
616 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
618 p += sprintf(p, "xen-%d.%d-x86_32 ", XEN_VERSION, XEN_SUBVERSION);
619 if ( hvm_enabled )
620 p += sprintf(p, "hvm-%d.%d-x86_32 ", XEN_VERSION, XEN_SUBVERSION);
622 #elif defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE)
624 p += sprintf(p, "xen-%d.%d-x86_32p ", XEN_VERSION, XEN_SUBVERSION);
625 if ( hvm_enabled )
626 {
627 p += sprintf(p, "hvm-%d.%d-x86_32 ", XEN_VERSION, XEN_SUBVERSION);
628 //p += sprintf(p, "hvm-%d.%d-x86_32p ", XEN_VERSION, XEN_SUBVERSION);
629 }
631 #elif defined(CONFIG_X86_64)
633 p += sprintf(p, "xen-%d.%d-x86_64 ", XEN_VERSION, XEN_SUBVERSION);
634 if ( hvm_enabled )
635 {
636 p += sprintf(p, "hvm-%d.%d-x86_32 ", XEN_VERSION, XEN_SUBVERSION);
637 p += sprintf(p, "hvm-%d.%d-x86_32p ", XEN_VERSION, XEN_SUBVERSION);
638 p += sprintf(p, "hvm-%d.%d-x86_64 ", XEN_VERSION, XEN_SUBVERSION);
639 }
641 #else
643 p++;
645 #endif
647 *(p-1) = 0;
649 BUG_ON((p - info) > sizeof(xen_capabilities_info_t));
650 }
652 /*
653 * Local variables:
654 * mode: C
655 * c-set-style: "BSD"
656 * c-basic-offset: 4
657 * tab-width: 4
658 * indent-tabs-mode: nil
659 * End:
660 */