ia64/xen-unstable

view xen/arch/x86/setup.c @ 9800:7e72a5f6f886

Minor comment fix.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Apr 21 09:11:13 2006 +0100 (2006-04-21)
parents 4ce84cc55727
children 9b1c9d4133f8
line source
2 #include <xen/config.h>
3 #include <xen/init.h>
4 #include <xen/lib.h>
5 #include <xen/sched.h>
6 #include <xen/domain.h>
7 #include <xen/serial.h>
8 #include <xen/softirq.h>
9 #include <xen/acpi.h>
10 #include <xen/console.h>
11 #include <xen/serial.h>
12 #include <xen/trace.h>
13 #include <xen/multiboot.h>
14 #include <xen/domain_page.h>
15 #include <xen/compile.h>
16 #include <xen/gdbstub.h>
17 #include <public/version.h>
18 #include <asm/bitops.h>
19 #include <asm/smp.h>
20 #include <asm/processor.h>
21 #include <asm/mpspec.h>
22 #include <asm/apic.h>
23 #include <asm/desc.h>
24 #include <asm/shadow.h>
25 #include <asm/e820.h>
26 #include <acm/acm_hooks.h>
28 extern void dmi_scan_machine(void);
29 extern void generic_apic_probe(void);
31 /*
32 * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
33 * page_info table and allocation bitmap.
34 */
35 static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
36 #if defined(CONFIG_X86_64)
37 integer_param("xenheap_megabytes", opt_xenheap_megabytes);
38 #endif
40 /* opt_nosmp: If true, secondary processors are ignored. */
41 static int opt_nosmp = 0;
42 boolean_param("nosmp", opt_nosmp);
44 /* maxcpus: maximum number of CPUs to activate. */
45 static unsigned int max_cpus = NR_CPUS;
46 integer_param("maxcpus", max_cpus);
48 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
49 static int opt_watchdog = 0;
50 boolean_param("watchdog", opt_watchdog);
52 /* **** Linux config option: propagated to domain0. */
53 /* "acpi=off": Sisables both ACPI table parsing and interpreter. */
54 /* "acpi=force": Override the disable blacklist. */
55 /* "acpi=strict": Disables out-of-spec workarounds. */
56 /* "acpi=ht": Limit ACPI just to boot-time to enable HT. */
57 /* "acpi=noirq": Disables ACPI interrupt routing. */
58 static void parse_acpi_param(char *s);
59 custom_param("acpi", parse_acpi_param);
61 /* **** Linux config option: propagated to domain0. */
62 /* acpi_skip_timer_override: Skip IRQ0 overrides. */
63 extern int acpi_skip_timer_override;
64 boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
66 /* **** Linux config option: propagated to domain0. */
67 /* noapic: Disable IOAPIC setup. */
68 extern int skip_ioapic_setup;
69 boolean_param("noapic", skip_ioapic_setup);
71 int early_boot = 1;
73 cpumask_t cpu_present_map;
75 /* Limits of Xen heap, used to initialise the allocator. */
76 unsigned long xenheap_phys_start, xenheap_phys_end;
78 extern void arch_init_memory(void);
79 extern void init_IRQ(void);
80 extern void trap_init(void);
81 extern void early_time_init(void);
82 extern void initialize_keytable(void);
83 extern void early_cpu_init(void);
85 struct tss_struct init_tss[NR_CPUS];
87 struct vcpu *idle_vcpu[NR_CPUS];
89 extern unsigned long cpu0_stack[];
91 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
93 #if CONFIG_PAGING_LEVELS > 2
94 unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
95 #else
96 unsigned long mmu_cr4_features = X86_CR4_PSE;
97 #endif
98 EXPORT_SYMBOL(mmu_cr4_features);
100 int acpi_disabled;
102 int acpi_force;
103 char acpi_param[10] = "";
104 static void parse_acpi_param(char *s)
105 {
106 /* Save the parameter so it can be propagated to domain0. */
107 strncpy(acpi_param, s, sizeof(acpi_param));
108 acpi_param[sizeof(acpi_param)-1] = '\0';
110 /* Interpret the parameter for use within Xen. */
111 if ( !strcmp(s, "off") )
112 {
113 disable_acpi();
114 }
115 else if ( !strcmp(s, "force") )
116 {
117 acpi_force = 1;
118 acpi_ht = 1;
119 acpi_disabled = 0;
120 }
121 else if ( !strcmp(s, "strict") )
122 {
123 acpi_strict = 1;
124 }
125 else if ( !strcmp(s, "ht") )
126 {
127 if ( !acpi_force )
128 disable_acpi();
129 acpi_ht = 1;
130 }
131 else if ( !strcmp(s, "noirq") )
132 {
133 acpi_noirq_set();
134 }
135 }
137 static void __init do_initcalls(void)
138 {
139 initcall_t *call;
140 for ( call = &__initcall_start; call < &__initcall_end; call++ )
141 (*call)();
142 }
144 #define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
146 static struct e820entry e820_raw[E820MAX];
148 static unsigned long initial_images_start, initial_images_end;
150 unsigned long initial_images_nrpages(void)
151 {
152 unsigned long s = initial_images_start + PAGE_SIZE - 1;
153 unsigned long e = initial_images_end;
154 return ((e >> PAGE_SHIFT) - (s >> PAGE_SHIFT));
155 }
157 void discard_initial_images(void)
158 {
159 init_domheap_pages(initial_images_start, initial_images_end);
160 }
162 void __init __start_xen(multiboot_info_t *mbi)
163 {
164 char *cmdline;
165 struct domain *idle_domain;
166 unsigned long _initrd_start = 0, _initrd_len = 0;
167 unsigned int initrdidx = 1;
168 module_t *mod = (module_t *)__va(mbi->mods_addr);
169 unsigned long nr_pages, modules_length;
170 paddr_t s, e;
171 int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0;
172 struct ns16550_defaults ns16550 = {
173 .data_bits = 8,
174 .parity = 'n',
175 .stop_bits = 1
176 };
178 /* Parse the command-line options. */
179 if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
180 cmdline_parse(__va(mbi->cmdline));
182 set_current((struct vcpu *)0xfffff000); /* debug sanity */
183 set_processor_id(0); /* needed early, for smp_processor_id() */
185 smp_prepare_boot_cpu();
187 /* We initialise the serial devices very early so we can get debugging. */
188 ns16550.io_base = 0x3f8;
189 ns16550.irq = 4;
190 ns16550_init(0, &ns16550);
191 ns16550.io_base = 0x2f8;
192 ns16550.irq = 3;
193 ns16550_init(1, &ns16550);
194 serial_init_preirq();
196 init_console();
198 /* Check that we have at least one Multiboot module. */
199 if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
200 {
201 printk("FATAL ERROR: dom0 kernel not specified."
202 " Check bootloader configuration.\n");
203 EARLY_FAIL();
204 }
206 if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
207 {
208 printk("FATAL ERROR: Misaligned CPU0 stack.\n");
209 EARLY_FAIL();
210 }
212 xenheap_phys_end = opt_xenheap_megabytes << 20;
214 if ( mbi->flags & MBI_MEMMAP )
215 {
216 while ( bytes < mbi->mmap_length )
217 {
218 memory_map_t *map = __va(mbi->mmap_addr + bytes);
220 /*
221 * This is a gross workaround for a BIOS bug. Some bootloaders do
222 * not write e820 map entries into pre-zeroed memory. This is
223 * okay if the BIOS fills in all fields of the map entry, but
224 * some broken BIOSes do not bother to write the high word of
225 * the length field if the length is smaller than 4GB. We
226 * detect and fix this by flagging sections below 4GB that
227 * appear to be larger than 4GB in size.
228 */
229 if ( (map->base_addr_high == 0) && (map->length_high != 0) )
230 {
231 e820_warn = 1;
232 map->length_high = 0;
233 }
235 e820_raw[e820_raw_nr].addr =
236 ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
237 e820_raw[e820_raw_nr].size =
238 ((u64)map->length_high << 32) | (u64)map->length_low;
239 e820_raw[e820_raw_nr].type =
240 (map->type > E820_SHARED_PAGE) ? E820_RESERVED : map->type;
241 e820_raw_nr++;
243 bytes += map->size + 4;
244 }
245 }
246 else if ( mbi->flags & MBI_MEMLIMITS )
247 {
248 e820_raw[0].addr = 0;
249 e820_raw[0].size = mbi->mem_lower << 10;
250 e820_raw[0].type = E820_RAM;
251 e820_raw[1].addr = 0x100000;
252 e820_raw[1].size = mbi->mem_upper << 10;
253 e820_raw[1].type = E820_RAM;
254 e820_raw_nr = 2;
255 }
256 else
257 {
258 printk("FATAL ERROR: Bootloader provided no memory information.\n");
259 for ( ; ; ) ;
260 }
262 if ( e820_warn )
263 printk("WARNING: Buggy e820 map detected and fixed "
264 "(truncated length fields).\n");
266 max_page = init_e820(e820_raw, &e820_raw_nr);
268 modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
270 /* Find a large enough RAM extent to stash the DOM0 modules. */
271 for ( i = 0; ; i++ )
272 {
273 if ( i == e820.nr_map )
274 {
275 printk("Not enough memory to stash the DOM0 kernel image.\n");
276 for ( ; ; ) ;
277 }
279 if ( (e820.map[i].type == E820_RAM) &&
280 (e820.map[i].size >= modules_length) &&
281 ((e820.map[i].addr + e820.map[i].size) >=
282 (xenheap_phys_end + modules_length)) )
283 break;
284 }
286 /* Stash as near as possible to the beginning of the RAM extent. */
287 initial_images_start = e820.map[i].addr;
288 if ( initial_images_start < xenheap_phys_end )
289 initial_images_start = xenheap_phys_end;
290 initial_images_end = initial_images_start + modules_length;
292 #if defined(CONFIG_X86_32)
293 memmove((void *)initial_images_start, /* use low mapping */
294 (void *)mod[0].mod_start, /* use low mapping */
295 mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
296 #elif defined(CONFIG_X86_64)
297 memmove(__va(initial_images_start),
298 __va(mod[0].mod_start),
299 mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
300 #endif
302 /* Initialise boot-time allocator with all RAM situated after modules. */
303 xenheap_phys_start = init_boot_allocator(__pa(&_end));
304 nr_pages = 0;
305 for ( i = 0; i < e820.nr_map; i++ )
306 {
307 if ( e820.map[i].type != E820_RAM )
308 continue;
310 nr_pages += e820.map[i].size >> PAGE_SHIFT;
312 /* Initialise boot heap, skipping Xen heap and dom0 modules. */
313 s = e820.map[i].addr;
314 e = s + e820.map[i].size;
315 if ( s < xenheap_phys_end )
316 s = xenheap_phys_end;
317 if ( (s < initial_images_end) && (e > initial_images_start) )
318 s = initial_images_end;
319 init_boot_pages(s, e);
321 #if defined (CONFIG_X86_64)
322 /*
323 * x86/64 maps all registered RAM. Points to note:
324 * 1. The initial pagetable already maps low 1GB, so skip that.
325 * 2. We must map *only* RAM areas, taking care to avoid I/O holes.
326 * Failure to do this can cause coherency problems and deadlocks
327 * due to cache-attribute mismatches (e.g., AMD/AGP Linux bug).
328 */
329 {
330 /* Calculate page-frame range, discarding partial frames. */
331 unsigned long start, end;
332 unsigned long init_mapped = 1UL << (30 - PAGE_SHIFT); /* 1GB */
333 start = PFN_UP(e820.map[i].addr);
334 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
335 /* Clip the range to exclude what the bootstrapper initialised. */
336 if ( end < init_mapped )
337 continue;
338 if ( start < init_mapped )
339 start = init_mapped;
340 /* Request the mapping. */
341 map_pages_to_xen(
342 PAGE_OFFSET + (start << PAGE_SHIFT),
343 start, end-start, PAGE_HYPERVISOR);
344 }
345 #endif
346 }
348 memguard_init();
350 printk("System RAM: %luMB (%lukB)\n",
351 nr_pages >> (20 - PAGE_SHIFT),
352 nr_pages << (PAGE_SHIFT - 10));
353 total_pages = nr_pages;
355 /* Sanity check for unwanted bloat of dom0_op structure. */
356 BUG_ON(sizeof(((struct dom0_op *)0)->u) !=
357 sizeof(((struct dom0_op *)0)->u.pad));
359 BUG_ON(sizeof(start_info_t) > PAGE_SIZE);
360 BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
361 BUG_ON(sizeof(vcpu_info_t) != 64);
363 /* __foo are defined in public headers. Check they match internal defs. */
364 BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
365 #ifdef HYPERVISOR_VIRT_END
366 BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END);
367 #endif
369 init_frametable();
371 end_boot_allocator();
373 /* Initialise the Xen heap, skipping RAM holes. */
374 nr_pages = 0;
375 for ( i = 0; i < e820.nr_map; i++ )
376 {
377 if ( e820.map[i].type != E820_RAM )
378 continue;
380 s = e820.map[i].addr;
381 e = s + e820.map[i].size;
382 if ( s < xenheap_phys_start )
383 s = xenheap_phys_start;
384 if ( e > xenheap_phys_end )
385 e = xenheap_phys_end;
387 if ( s < e )
388 {
389 nr_pages += (e - s) >> PAGE_SHIFT;
390 init_xenheap_pages(s, e);
391 }
392 }
394 printk("Xen heap: %luMB (%lukB)\n",
395 nr_pages >> (20 - PAGE_SHIFT),
396 nr_pages << (PAGE_SHIFT - 10));
398 early_boot = 0;
400 early_cpu_init();
402 scheduler_init();
404 idle_domain = domain_create(IDLE_DOMAIN_ID, 0);
405 BUG_ON(idle_domain == NULL);
407 set_current(idle_domain->vcpu[0]);
408 set_current_execstate(idle_domain->vcpu[0]);
409 idle_vcpu[0] = current;
411 paging_init();
413 /* Unmap the first page of CPU0's stack. */
414 memguard_guard_stack(cpu0_stack);
416 open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
418 if ( opt_watchdog )
419 nmi_watchdog = NMI_LOCAL_APIC;
421 sort_exception_tables();
423 find_smp_config();
425 smp_alloc_memory();
427 dmi_scan_machine();
429 generic_apic_probe();
431 acpi_boot_table_init();
432 acpi_boot_init();
434 if ( smp_found_config )
435 get_smp_config();
437 init_apic_mappings();
439 init_IRQ();
441 trap_init();
443 timer_init();
445 early_time_init();
447 arch_init_memory();
449 identify_cpu(&boot_cpu_data);
450 if ( cpu_has_fxsr )
451 set_in_cr4(X86_CR4_OSFXSR);
452 if ( cpu_has_xmm )
453 set_in_cr4(X86_CR4_OSXMMEXCPT);
455 if ( opt_nosmp )
456 max_cpus = 0;
458 smp_prepare_cpus(max_cpus);
460 /*
461 * Initialise higher-level timer functions. We do this fairly late
462 * (post-SMP) because the time bases and scale factors need to be updated
463 * regularly, and SMP initialisation can cause a long delay with
464 * interrupts not yet enabled.
465 */
466 init_xen_time();
468 initialize_keytable();
470 serial_init_postirq();
472 BUG_ON(!local_irq_is_enabled());
474 for_each_present_cpu ( i )
475 {
476 if ( num_online_cpus() >= max_cpus )
477 break;
478 if ( !cpu_online(i) )
479 __cpu_up(i);
480 }
482 printk("Brought up %ld CPUs\n", (long)num_online_cpus());
483 smp_cpus_done(max_cpus);
485 initialise_gdb(); /* could be moved earlier */
487 do_initcalls();
489 schedulers_start();
491 if ( opt_watchdog )
492 watchdog_enable();
494 shadow_mode_init();
496 /* initialize access control security module */
497 acm_init(&initrdidx, mbi, initial_images_start);
499 /* Create initial domain 0. */
500 dom0 = domain_create(0, 0);
501 if ( dom0 == NULL )
502 panic("Error creating domain 0\n");
504 set_bit(_DOMF_privileged, &dom0->domain_flags);
505 /* post-create hooks sets security label */
506 acm_post_domain0_create(dom0->domain_id);
508 /* Grab the DOM0 command line. */
509 cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
510 if ( cmdline != NULL )
511 {
512 static char dom0_cmdline[MAX_GUEST_CMDLINE];
514 /* Skip past the image name and copy to a local buffer. */
515 while ( *cmdline == ' ' ) cmdline++;
516 if ( (cmdline = strchr(cmdline, ' ')) != NULL )
517 {
518 while ( *cmdline == ' ' ) cmdline++;
519 strcpy(dom0_cmdline, cmdline);
520 }
522 cmdline = dom0_cmdline;
524 /* Append any extra parameters. */
525 if ( skip_ioapic_setup && !strstr(cmdline, "noapic") )
526 strcat(cmdline, " noapic");
527 if ( acpi_skip_timer_override &&
528 !strstr(cmdline, "acpi_skip_timer_override") )
529 strcat(cmdline, " acpi_skip_timer_override");
530 if ( (strlen(acpi_param) != 0) && !strstr(cmdline, "acpi=") )
531 {
532 strcat(cmdline, " acpi=");
533 strcat(cmdline, acpi_param);
534 }
535 }
537 if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
538 {
539 _initrd_start = initial_images_start +
540 (mod[initrdidx].mod_start - mod[0].mod_start);
541 _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
542 }
544 /*
545 * We're going to setup domain0 using the module(s) that we stashed safely
546 * above our heap. The second module, if present, is an initrd ramdisk.
547 */
548 if ( construct_dom0(dom0,
549 initial_images_start,
550 mod[0].mod_end-mod[0].mod_start,
551 _initrd_start,
552 _initrd_len,
553 cmdline) != 0)
554 panic("Could not set up DOM0 guest OS\n");
556 /* Scrub RAM that is still free and so may go to an unprivileged domain. */
557 scrub_heap_pages();
559 init_trace_bufs();
561 /* Give up the VGA console if DOM0 is configured to grab it. */
562 console_endboot(cmdline && strstr(cmdline, "tty0"));
564 /* Hide UART from DOM0 if we're using it */
565 serial_endboot();
567 domain_unpause_by_systemcontroller(dom0);
569 startup_cpu_idle_loop();
570 }
572 void arch_get_xen_caps(xen_capabilities_info_t info)
573 {
574 char *p = info;
576 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
578 p += sprintf(p, "xen-%d.%d-x86_32 ", XEN_VERSION, XEN_SUBVERSION);
579 if ( hvm_enabled )
580 p += sprintf(p, "hvm-%d.%d-x86_32 ", XEN_VERSION, XEN_SUBVERSION);
582 #elif defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE)
584 p += sprintf(p, "xen-%d.%d-x86_32p ", XEN_VERSION, XEN_SUBVERSION);
585 if ( hvm_enabled )
586 {
587 p += sprintf(p, "hvm-%d.%d-x86_32 ", XEN_VERSION, XEN_SUBVERSION);
588 //p += sprintf(p, "hvm-%d.%d-x86_32p ", XEN_VERSION, XEN_SUBVERSION);
589 }
591 #elif defined(CONFIG_X86_64)
593 p += sprintf(p, "xen-%d.%d-x86_64 ", XEN_VERSION, XEN_SUBVERSION);
594 if ( hvm_enabled )
595 {
596 p += sprintf(p, "hvm-%d.%d-x86_32 ", XEN_VERSION, XEN_SUBVERSION);
597 //p += sprintf(p, "hvm-%d.%d-x86_32p ", XEN_VERSION, XEN_SUBVERSION);
598 p += sprintf(p, "hvm-%d.%d-x86_64 ", XEN_VERSION, XEN_SUBVERSION);
599 }
601 #else
603 p++;
605 #endif
607 *(p-1) = 0;
609 BUG_ON((p - info) > sizeof(xen_capabilities_info_t));
610 }
612 /*
613 * Local variables:
614 * mode: C
615 * c-set-style: "BSD"
616 * c-basic-offset: 4
617 * tab-width: 4
618 * indent-tabs-mode: nil
619 * End:
620 */