ia64/xen-unstable

view xen/arch/x86/setup.c @ 15796:2eb38cefdcd9

Skipping image name at start of command line is an
architecture-specific action. Definitely not required on IA64, for now
make it just x86 specific.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Aug 29 15:43:53 2007 +0100 (2007-08-29)
parents f2649861d594
children 1c19a3430ab1
line source
1 #include <xen/config.h>
2 #include <xen/init.h>
3 #include <xen/lib.h>
4 #include <xen/sched.h>
5 #include <xen/domain.h>
6 #include <xen/serial.h>
7 #include <xen/softirq.h>
8 #include <xen/acpi.h>
9 #include <xen/console.h>
10 #include <xen/serial.h>
11 #include <xen/trace.h>
12 #include <xen/multiboot.h>
13 #include <xen/domain_page.h>
14 #include <xen/version.h>
15 #include <xen/gdbstub.h>
16 #include <xen/percpu.h>
17 #include <xen/hypercall.h>
18 #include <xen/keyhandler.h>
19 #include <xen/numa.h>
20 #include <xen/rcupdate.h>
21 #include <xen/vga.h>
22 #include <public/version.h>
23 #ifdef CONFIG_COMPAT
24 #include <compat/platform.h>
25 #include <compat/xen.h>
26 #endif
27 #include <asm/bitops.h>
28 #include <asm/smp.h>
29 #include <asm/processor.h>
30 #include <asm/mpspec.h>
31 #include <asm/apic.h>
32 #include <asm/desc.h>
33 #include <asm/paging.h>
34 #include <asm/e820.h>
35 #include <acm/acm_hooks.h>
36 #include <xen/kexec.h>
37 #include <asm/edd.h>
39 #if defined(CONFIG_X86_64)
40 #define BOOTSTRAP_DIRECTMAP_END (1UL << 32) /* 4GB */
41 #define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
42 #else
43 #define BOOTSTRAP_DIRECTMAP_END (1UL << 30) /* 1GB */
44 #define maddr_to_bootstrap_virt(m) ((void *)(long)(m))
45 #endif
47 extern void dmi_scan_machine(void);
48 extern void generic_apic_probe(void);
49 extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
51 extern u16 boot_edid_caps;
52 extern u8 boot_edid_info[128];
53 extern struct boot_video_info boot_vid_info;
55 /*
56 * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
57 * page_info table and allocation bitmap.
58 */
59 static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
60 #if defined(CONFIG_X86_64)
61 integer_param("xenheap_megabytes", opt_xenheap_megabytes);
62 #endif
64 /* opt_nosmp: If true, secondary processors are ignored. */
65 static int opt_nosmp = 0;
66 boolean_param("nosmp", opt_nosmp);
68 /* maxcpus: maximum number of CPUs to activate. */
69 static unsigned int max_cpus = NR_CPUS;
70 integer_param("maxcpus", max_cpus);
72 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
73 static int opt_watchdog = 0;
74 boolean_param("watchdog", opt_watchdog);
76 /* **** Linux config option: propagated to domain0. */
77 /* "acpi=off": Sisables both ACPI table parsing and interpreter. */
78 /* "acpi=force": Override the disable blacklist. */
79 /* "acpi=strict": Disables out-of-spec workarounds. */
80 /* "acpi=ht": Limit ACPI just to boot-time to enable HT. */
81 /* "acpi=noirq": Disables ACPI interrupt routing. */
82 static void parse_acpi_param(char *s);
83 custom_param("acpi", parse_acpi_param);
85 /* **** Linux config option: propagated to domain0. */
86 /* acpi_skip_timer_override: Skip IRQ0 overrides. */
87 extern int acpi_skip_timer_override;
88 boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
90 /* **** Linux config option: propagated to domain0. */
91 /* noapic: Disable IOAPIC setup. */
92 extern int skip_ioapic_setup;
93 boolean_param("noapic", skip_ioapic_setup);
95 int early_boot = 1;
97 cpumask_t cpu_present_map;
99 unsigned long xen_phys_start;
101 /* Limits of Xen heap, used to initialise the allocator. */
102 unsigned long xenheap_phys_start, xenheap_phys_end;
104 extern void arch_init_memory(void);
105 extern void init_IRQ(void);
106 extern void trap_init(void);
107 extern void early_time_init(void);
108 extern void early_cpu_init(void);
109 extern void vesa_init(void);
110 extern void vesa_mtrr_init(void);
112 struct tss_struct init_tss[NR_CPUS];
114 char __attribute__ ((__section__(".bss.stack_aligned"))) cpu0_stack[STACK_SIZE];
116 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
118 #if CONFIG_PAGING_LEVELS > 2
119 unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
120 #else
121 unsigned long mmu_cr4_features = X86_CR4_PSE;
122 #endif
123 EXPORT_SYMBOL(mmu_cr4_features);
125 int acpi_disabled;
127 int acpi_force;
128 char acpi_param[10] = "";
129 static void __init parse_acpi_param(char *s)
130 {
131 /* Save the parameter so it can be propagated to domain0. */
132 safe_strcpy(acpi_param, s);
134 /* Interpret the parameter for use within Xen. */
135 if ( !strcmp(s, "off") )
136 {
137 disable_acpi();
138 }
139 else if ( !strcmp(s, "force") )
140 {
141 acpi_force = 1;
142 acpi_ht = 1;
143 acpi_disabled = 0;
144 }
145 else if ( !strcmp(s, "strict") )
146 {
147 acpi_strict = 1;
148 }
149 else if ( !strcmp(s, "ht") )
150 {
151 if ( !acpi_force )
152 disable_acpi();
153 acpi_ht = 1;
154 }
155 else if ( !strcmp(s, "noirq") )
156 {
157 acpi_noirq_set();
158 }
159 }
161 static void __init do_initcalls(void)
162 {
163 initcall_t *call;
164 for ( call = &__initcall_start; call < &__initcall_end; call++ )
165 (*call)();
166 }
168 #define EARLY_FAIL(f, a...) do { \
169 printk( f , ## a ); \
170 for ( ; ; ) __asm__ __volatile__ ( "hlt" ); \
171 } while (0)
173 static unsigned long __initdata initial_images_start, initial_images_end;
175 unsigned long __init initial_images_nrpages(void)
176 {
177 ASSERT(!(initial_images_start & ~PAGE_MASK));
178 ASSERT(!(initial_images_end & ~PAGE_MASK));
179 return ((initial_images_end >> PAGE_SHIFT) -
180 (initial_images_start >> PAGE_SHIFT));
181 }
183 void __init discard_initial_images(void)
184 {
185 init_domheap_pages(initial_images_start, initial_images_end);
186 }
188 extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[];
190 static void __init percpu_init_areas(void)
191 {
192 unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start;
193 unsigned int first_unused;
195 BUG_ON(data_size > PERCPU_SIZE);
197 /* Initialise per-cpu data area for all possible secondary CPUs. */
198 for ( i = 1; (i < NR_CPUS) && cpu_possible(i); i++ )
199 memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
200 __per_cpu_start,
201 data_size);
202 first_unused = i;
204 /* Check that there are no holes in cpu_possible_map. */
205 for ( ; i < NR_CPUS; i++ )
206 BUG_ON(cpu_possible(i));
208 #ifndef MEMORY_GUARD
209 init_xenheap_pages(__pa(__per_cpu_start) + (first_unused << PERCPU_SHIFT),
210 __pa(__per_cpu_end));
211 #endif
212 memguard_guard_range(&__per_cpu_start[first_unused << PERCPU_SHIFT],
213 (NR_CPUS - first_unused) << PERCPU_SHIFT);
214 #if defined(CONFIG_X86_64)
215 /* Also zap the mapping in the 1:1 area. */
216 memguard_guard_range(__va(__pa(__per_cpu_start)) +
217 (first_unused << PERCPU_SHIFT),
218 (NR_CPUS - first_unused) << PERCPU_SHIFT);
219 #endif
220 }
222 /* Fetch acm policy module from multiboot modules. */
223 static void __init extract_acm_policy(
224 multiboot_info_t *mbi,
225 unsigned int *initrdidx,
226 char **_policy_start,
227 unsigned long *_policy_len)
228 {
229 int i;
230 module_t *mod = (module_t *)__va(mbi->mods_addr);
231 unsigned long start, policy_len;
232 char *policy_start;
234 /*
235 * Try all modules and see whichever could be the binary policy.
236 * Adjust the initrdidx if module[1] is the binary policy.
237 */
238 for ( i = mbi->mods_count-1; i >= 1; i-- )
239 {
240 start = initial_images_start + (mod[i].mod_start-mod[0].mod_start);
241 policy_start = maddr_to_bootstrap_virt(start);
242 policy_len = mod[i].mod_end - mod[i].mod_start;
243 if ( acm_is_policy(policy_start, policy_len) )
244 {
245 printk("Policy len 0x%lx, start at %p - module %d.\n",
246 policy_len, policy_start, i);
247 *_policy_start = policy_start;
248 *_policy_len = policy_len;
249 if ( i == 1 )
250 *initrdidx = (mbi->mods_count > 2) ? 2 : 0;
251 break;
252 }
253 }
254 }
256 static void __init init_idle_domain(void)
257 {
258 struct domain *idle_domain;
260 /* Domain creation requires that scheduler structures are initialised. */
261 scheduler_init();
263 idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0);
264 if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) )
265 BUG();
267 set_current(idle_domain->vcpu[0]);
268 idle_vcpu[0] = this_cpu(curr_vcpu) = current;
270 setup_idle_pagetable();
271 }
273 static void __init srat_detect_node(int cpu)
274 {
275 unsigned node;
276 u8 apicid = x86_cpu_to_apicid[cpu];
278 node = apicid_to_node[apicid];
279 if ( node == NUMA_NO_NODE )
280 node = 0;
281 numa_set_node(cpu, node);
283 if ( acpi_numa > 0 )
284 printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
285 }
287 /*
288 * Ensure a given physical memory range is present in the bootstrap mappings.
289 * Use superpage mappings to ensure that pagetable memory needn't be allocated.
290 */
291 static void __init bootstrap_map(unsigned long start, unsigned long end)
292 {
293 unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
294 start = start & ~mask;
295 end = (end + mask) & ~mask;
296 if ( end > BOOTSTRAP_DIRECTMAP_END )
297 panic("Cannot access memory beyond end of "
298 "bootstrap direct-map area\n");
299 map_pages_to_xen(
300 (unsigned long)maddr_to_bootstrap_virt(start),
301 start >> PAGE_SHIFT, (end-start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
302 }
304 static void __init move_memory(
305 unsigned long dst, unsigned long src_start, unsigned long src_end)
306 {
307 bootstrap_map(src_start, src_end);
308 bootstrap_map(dst, dst + src_end - src_start);
309 memmove(maddr_to_bootstrap_virt(dst),
310 maddr_to_bootstrap_virt(src_start),
311 src_end - src_start);
312 }
314 /* A temporary copy of the e820 map that we can mess with during bootstrap. */
315 static struct e820map __initdata boot_e820;
317 /* Reserve area (@s,@e) in the temporary bootstrap e820 map. */
318 static int __init reserve_in_boot_e820(unsigned long s, unsigned long e)
319 {
320 uint64_t rs, re;
321 int i;
323 for ( i = 0; i < boot_e820.nr_map; i++ )
324 {
325 /* Have we found the e820 region that includes the specified range? */
326 rs = boot_e820.map[i].addr;
327 re = rs + boot_e820.map[i].size;
328 if ( (s >= rs) && (e <= re) )
329 goto found;
330 }
332 return 0;
334 found:
335 /* Start fragment. */
336 boot_e820.map[i].size = s - rs;
338 /* End fragment. */
339 if ( e < re )
340 {
341 memmove(&boot_e820.map[i+1], &boot_e820.map[i],
342 (boot_e820.nr_map-i) * sizeof(boot_e820.map[0]));
343 boot_e820.nr_map++;
344 i++;
345 boot_e820.map[i].addr = e;
346 boot_e820.map[i].size = re - e;
347 }
349 return 1;
350 }
352 struct boot_video_info {
353 u8 orig_x; /* 0x00 */
354 u8 orig_y; /* 0x01 */
355 u8 orig_video_mode; /* 0x02 */
356 u8 orig_video_cols; /* 0x03 */
357 u8 orig_video_lines; /* 0x04 */
358 u8 orig_video_isVGA; /* 0x05 */
359 u16 orig_video_points; /* 0x06 */
361 /* VESA graphic mode -- linear frame buffer */
362 u32 capabilities; /* 0x08 */
363 u16 lfb_linelength; /* 0x0c */
364 u16 lfb_width; /* 0x0e */
365 u16 lfb_height; /* 0x10 */
366 u16 lfb_depth; /* 0x12 */
367 u32 lfb_base; /* 0x14 */
368 u32 lfb_size; /* 0x18 */
369 u8 red_size; /* 0x1c */
370 u8 red_pos; /* 0x1d */
371 u8 green_size; /* 0x1e */
372 u8 green_pos; /* 0x1f */
373 u8 blue_size; /* 0x20 */
374 u8 blue_pos; /* 0x21 */
375 u8 rsvd_size; /* 0x22 */
376 u8 rsvd_pos; /* 0x23 */
377 u16 vesapm_seg; /* 0x24 */
378 u16 vesapm_off; /* 0x26 */
379 };
381 static void __init parse_video_info(void)
382 {
383 struct boot_video_info *bvi = &bootsym(boot_vid_info);
385 if ( (bvi->orig_video_isVGA == 1) && (bvi->orig_video_mode == 3) )
386 {
387 vga_console_info.video_type = XEN_VGATYPE_TEXT_MODE_3;
388 vga_console_info.u.text_mode_3.font_height = bvi->orig_video_points;
389 vga_console_info.u.text_mode_3.cursor_x = bvi->orig_x;
390 vga_console_info.u.text_mode_3.cursor_y = bvi->orig_y;
391 vga_console_info.u.text_mode_3.rows = bvi->orig_video_lines;
392 vga_console_info.u.text_mode_3.columns = bvi->orig_video_cols;
393 }
394 else if ( bvi->orig_video_isVGA == 0x23 )
395 {
396 vga_console_info.video_type = XEN_VGATYPE_VESA_LFB;
397 vga_console_info.u.vesa_lfb.width = bvi->lfb_width;
398 vga_console_info.u.vesa_lfb.height = bvi->lfb_height;
399 vga_console_info.u.vesa_lfb.bytes_per_line = bvi->lfb_linelength;
400 vga_console_info.u.vesa_lfb.bits_per_pixel = bvi->lfb_depth;
401 vga_console_info.u.vesa_lfb.lfb_base = bvi->lfb_base;
402 vga_console_info.u.vesa_lfb.lfb_size = bvi->lfb_size;
403 vga_console_info.u.vesa_lfb.red_pos = bvi->red_pos;
404 vga_console_info.u.vesa_lfb.red_size = bvi->red_size;
405 vga_console_info.u.vesa_lfb.green_pos = bvi->green_pos;
406 vga_console_info.u.vesa_lfb.green_size = bvi->green_size;
407 vga_console_info.u.vesa_lfb.blue_pos = bvi->blue_pos;
408 vga_console_info.u.vesa_lfb.blue_size = bvi->blue_size;
409 vga_console_info.u.vesa_lfb.rsvd_pos = bvi->rsvd_pos;
410 vga_console_info.u.vesa_lfb.rsvd_size = bvi->rsvd_size;
411 }
412 }
414 void init_done(void)
415 {
416 extern char __init_begin[], __init_end[];
418 /* Free (or page-protect) the init areas. */
419 #ifndef MEMORY_GUARD
420 init_xenheap_pages(__pa(__init_begin), __pa(__init_end));
421 #endif
422 memguard_guard_range(__init_begin, __init_end - __init_begin);
423 #if defined(CONFIG_X86_64)
424 /* Also zap the mapping in the 1:1 area. */
425 memguard_guard_range(__va(__pa(__init_begin)), __init_end - __init_begin);
426 #endif
427 printk("Freed %ldkB init memory.\n", (long)(__init_end-__init_begin)>>10);
429 startup_cpu_idle_loop();
430 }
432 static char * __init cmdline_cook(char *p)
433 {
434 p = p ? : "";
435 while ( *p == ' ' )
436 p++;
437 while ( (*p != ' ') && (*p != '\0') )
438 p++;
439 while ( *p == ' ' )
440 p++;
441 return p;
442 }
444 void __init __start_xen(unsigned long mbi_p)
445 {
446 char *memmap_type = NULL;
447 char *cmdline, *kextra;
448 unsigned long _initrd_start = 0, _initrd_len = 0;
449 unsigned int initrdidx = 1;
450 char *_policy_start = NULL;
451 unsigned long _policy_len = 0;
452 multiboot_info_t *mbi = __va(mbi_p);
453 module_t *mod = (module_t *)__va(mbi->mods_addr);
454 unsigned long nr_pages, modules_length;
455 int i, e820_warn = 0, bytes = 0;
456 struct ns16550_defaults ns16550 = {
457 .data_bits = 8,
458 .parity = 'n',
459 .stop_bits = 1
460 };
462 extern void early_page_fault(void);
463 set_intr_gate(TRAP_page_fault, &early_page_fault);
465 /* Parse the command-line options. */
466 cmdline = cmdline_cook((mbi->flags & MBI_CMDLINE) ?
467 __va(mbi->cmdline) : NULL);
468 if ( (kextra = strstr(cmdline, " -- ")) != NULL )
469 {
470 /*
471 * Options after ' -- ' separator belong to dom0.
472 * 1. Orphan dom0's options from Xen's command line.
473 * 2. Skip all but final leading space from dom0's options.
474 */
475 *kextra = '\0';
476 kextra += 3;
477 while ( kextra[1] == ' ' ) kextra++;
478 }
479 cmdline_parse(cmdline);
481 parse_video_info();
483 set_current((struct vcpu *)0xfffff000); /* debug sanity */
484 idle_vcpu[0] = current;
485 set_processor_id(0); /* needed early, for smp_processor_id() */
487 smp_prepare_boot_cpu();
489 /* We initialise the serial devices very early so we can get debugging. */
490 ns16550.io_base = 0x3f8;
491 ns16550.irq = 4;
492 ns16550_init(0, &ns16550);
493 ns16550.io_base = 0x2f8;
494 ns16550.irq = 3;
495 ns16550_init(1, &ns16550);
496 serial_init_preirq();
498 init_console();
500 printk("Command line: %s\n", cmdline);
502 printk("Video information:\n");
504 /* Print VGA display mode information. */
505 switch ( vga_console_info.video_type )
506 {
507 case XEN_VGATYPE_TEXT_MODE_3:
508 printk(" VGA is text mode %dx%d, font 8x%d\n",
509 vga_console_info.u.text_mode_3.columns,
510 vga_console_info.u.text_mode_3.rows,
511 vga_console_info.u.text_mode_3.font_height);
512 break;
513 case XEN_VGATYPE_VESA_LFB:
514 printk(" VGA is graphics mode %dx%d, %d bpp\n",
515 vga_console_info.u.vesa_lfb.width,
516 vga_console_info.u.vesa_lfb.height,
517 vga_console_info.u.vesa_lfb.bits_per_pixel);
518 break;
519 default:
520 printk(" No VGA detected\n");
521 break;
522 }
524 /* Print VBE/DDC EDID information. */
525 if ( bootsym(boot_edid_caps) != 0x1313 )
526 {
527 u16 caps = bootsym(boot_edid_caps);
528 printk(" VBE/DDC methods:%s%s%s; ",
529 (caps & 1) ? " V1" : "",
530 (caps & 2) ? " V2" : "",
531 !(caps & 3) ? " none" : "");
532 printk("EDID transfer time: %d seconds\n", caps >> 8);
533 if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 )
534 {
535 printk(" EDID info not retrieved because ");
536 if ( !(caps & 3) )
537 printk("no DDC retrieval method detected\n");
538 else if ( (caps >> 8) > 5 )
539 printk("takes longer than 5 seconds\n");
540 else
541 printk("of reasons unknown\n");
542 }
543 }
545 printk("Disc information:\n");
546 printk(" Found %d MBR signatures\n",
547 bootsym(boot_mbr_signature_nr));
548 printk(" Found %d EDD information structures\n",
549 bootsym(boot_edd_info_nr));
551 /* Check that we have at least one Multiboot module. */
552 if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
553 EARLY_FAIL("dom0 kernel not specified. "
554 "Check bootloader configuration.\n");
556 if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
557 EARLY_FAIL("Misaligned CPU0 stack.\n");
559 /*
560 * Since there are some stubs getting built on the stacks which use
561 * direct calls/jumps, the heap must be confined to the lower 2G so
562 * that those branches can reach their targets.
563 */
564 if ( opt_xenheap_megabytes > 2048 )
565 opt_xenheap_megabytes = 2048;
567 if ( e820_raw_nr != 0 )
568 {
569 memmap_type = "Xen-e820";
570 }
571 else if ( bootsym(lowmem_kb) )
572 {
573 memmap_type = "Xen-e801";
574 e820_raw[0].addr = 0;
575 e820_raw[0].size = bootsym(lowmem_kb) << 10;
576 e820_raw[0].type = E820_RAM;
577 e820_raw[1].addr = 0x100000;
578 e820_raw[1].size = bootsym(highmem_kb) << 10;
579 e820_raw[1].type = E820_RAM;
580 e820_raw_nr = 2;
581 }
582 else if ( mbi->flags & MBI_MEMMAP )
583 {
584 memmap_type = "Multiboot-e820";
585 while ( bytes < mbi->mmap_length )
586 {
587 memory_map_t *map = __va(mbi->mmap_addr + bytes);
589 /*
590 * This is a gross workaround for a BIOS bug. Some bootloaders do
591 * not write e820 map entries into pre-zeroed memory. This is
592 * okay if the BIOS fills in all fields of the map entry, but
593 * some broken BIOSes do not bother to write the high word of
594 * the length field if the length is smaller than 4GB. We
595 * detect and fix this by flagging sections below 4GB that
596 * appear to be larger than 4GB in size.
597 */
598 if ( (map->base_addr_high == 0) && (map->length_high != 0) )
599 {
600 if ( !e820_warn )
601 {
602 printk("WARNING: Buggy e820 map detected and fixed "
603 "(truncated length fields).\n");
604 e820_warn = 1;
605 }
606 map->length_high = 0;
607 }
609 e820_raw[e820_raw_nr].addr =
610 ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
611 e820_raw[e820_raw_nr].size =
612 ((u64)map->length_high << 32) | (u64)map->length_low;
613 e820_raw[e820_raw_nr].type =
614 (map->type > E820_NVS) ? E820_RESERVED : map->type;
615 e820_raw_nr++;
617 bytes += map->size + 4;
618 }
619 }
620 else if ( mbi->flags & MBI_MEMLIMITS )
621 {
622 memmap_type = "Multiboot-e801";
623 e820_raw[0].addr = 0;
624 e820_raw[0].size = mbi->mem_lower << 10;
625 e820_raw[0].type = E820_RAM;
626 e820_raw[1].addr = 0x100000;
627 e820_raw[1].size = mbi->mem_upper << 10;
628 e820_raw[1].type = E820_RAM;
629 e820_raw_nr = 2;
630 }
631 else
632 {
633 EARLY_FAIL("Bootloader provided no memory information.\n");
634 }
636 /* Ensure that all E820 RAM regions are page-aligned and -sized. */
637 for ( i = 0; i < e820_raw_nr; i++ )
638 {
639 uint64_t s, e;
641 if ( e820_raw[i].type != E820_RAM )
642 continue;
643 s = PFN_UP(e820_raw[i].addr);
644 e = PFN_DOWN(e820_raw[i].addr + e820_raw[i].size);
645 e820_raw[i].size = 0; /* discarded later */
646 if ( s < e )
647 {
648 e820_raw[i].addr = s << PAGE_SHIFT;
649 e820_raw[i].size = (e - s) << PAGE_SHIFT;
650 }
651 }
653 /* Sanitise the raw E820 map to produce a final clean version. */
654 max_page = init_e820(memmap_type, e820_raw, &e820_raw_nr);
656 /*
657 * Create a temporary copy of the E820 map. Truncate it to above 16MB
658 * as anything below that is already mapped and has a statically-allocated
659 * purpose.
660 */
661 memcpy(&boot_e820, &e820, sizeof(e820));
662 for ( i = 0; i < boot_e820.nr_map; i++ )
663 {
664 uint64_t s, e, min = 16 << 20; /* 16MB */
665 s = boot_e820.map[i].addr;
666 e = boot_e820.map[i].addr + boot_e820.map[i].size;
667 if ( s >= min )
668 continue;
669 if ( e > min )
670 {
671 boot_e820.map[i].addr = min;
672 boot_e820.map[i].size = e - min;
673 }
674 else
675 boot_e820.map[i].type = E820_RESERVED;
676 }
678 /*
679 * Iterate backwards over all superpage-aligned RAM regions.
680 *
681 * We require superpage alignment because the boot allocator is not yet
682 * initialised. Hence we can only map superpages in the address range
683 * 0 to BOOTSTRAP_DIRECTMAP_END, as this is guaranteed not to require
684 * dynamic allocation of pagetables.
685 *
686 * As well as mapping superpages in that range, in preparation for
687 * initialising the boot allocator, we also look for a region to which
688 * we can relocate the dom0 kernel and other multiboot modules. Also, on
689 * x86/64, we relocate Xen to higher memory.
690 */
691 modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
692 for ( i = boot_e820.nr_map-1; i >= 0; i-- )
693 {
694 uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
696 /* Superpage-aligned chunks up to BOOTSTRAP_DIRECTMAP_END, please. */
697 s = (boot_e820.map[i].addr + mask) & ~mask;
698 e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
699 e = min_t(uint64_t, e, BOOTSTRAP_DIRECTMAP_END);
700 if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
701 continue;
703 /* Map the chunk. No memory will need to be allocated to do this. */
704 map_pages_to_xen(
705 (unsigned long)maddr_to_bootstrap_virt(s),
706 s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
708 #if defined(CONFIG_X86_64)
709 /* Is the region suitable for relocating Xen? */
710 if ( !xen_phys_start && (((e-s) >> 20) >= opt_xenheap_megabytes) )
711 {
712 extern l2_pgentry_t l2_xenmap[];
713 l4_pgentry_t *pl4e;
714 l3_pgentry_t *pl3e;
715 l2_pgentry_t *pl2e;
716 int i, j;
718 /* Select relocation address. */
719 e = (e - (opt_xenheap_megabytes << 20)) & ~mask;
720 xen_phys_start = e;
721 bootsym(trampoline_xen_phys_start) = e;
723 /*
724 * Perform relocation to new physical address.
725 * Before doing so we must sync static/global data with main memory
726 * with a barrier(). After this we must *not* modify static/global
727 * data until after we have switched to the relocated pagetables!
728 */
729 barrier();
730 move_memory(e, 0, __pa(&_end) - xen_phys_start);
732 /* Poison low 1MB to detect stray pointers to physical 0-1MB. */
733 memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
735 /* Walk initial pagetables, relocating page directory entries. */
736 pl4e = __va(__pa(idle_pg_table));
737 for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
738 {
739 if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
740 continue;
741 *pl4e = l4e_from_intpte(l4e_get_intpte(*pl4e) +
742 xen_phys_start);
743 pl3e = l4e_to_l3e(*pl4e);
744 for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ )
745 {
746 /* Not present or already relocated? */
747 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ||
748 (l3e_get_pfn(*pl3e) > 0x1000) )
749 continue;
750 *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) +
751 xen_phys_start);
752 }
753 }
755 /* The only data mappings to be relocated are in the Xen area. */
756 pl2e = __va(__pa(l2_xenmap));
757 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ )
758 {
759 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
760 continue;
761 *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
762 xen_phys_start);
763 }
765 /* Re-sync the stack and then switch to relocated pagetables. */
766 asm volatile (
767 "rep movsb ; " /* re-sync the stack */
768 "movq %%cr4,%%rsi ; "
769 "andb $0x7f,%%sil ; "
770 "movq %%rsi,%%cr4 ; " /* CR4.PGE == 0 */
771 "movq %0,%%cr3 ; " /* CR3 == new pagetables */
772 "orb $0x80,%%sil ; "
773 "movq %%rsi,%%cr4 " /* CR4.PGE == 1 */
774 : : "r" (__pa(idle_pg_table)), "S" (cpu0_stack),
775 "D" (__va(__pa(cpu0_stack))), "c" (STACK_SIZE) : "memory" );
776 }
777 #endif
779 /* Is the region suitable for relocating the multiboot modules? */
780 if ( !initial_images_start && (s < e) && ((e-s) >= modules_length) )
781 {
782 initial_images_end = e;
783 e = (e - modules_length) & PAGE_MASK;
784 initial_images_start = e;
785 move_memory(initial_images_start,
786 mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
787 }
789 if ( !kexec_crash_area.start && (s < e) &&
790 ((e-s) >= kexec_crash_area.size) )
791 {
792 e = (e - kexec_crash_area.size) & PAGE_MASK;
793 kexec_crash_area.start = e;
794 }
795 }
797 if ( !initial_images_start )
798 EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
799 reserve_in_boot_e820(initial_images_start, initial_images_end);
801 /*
802 * With modules (and Xen itself, on x86/64) relocated out of the way, we
803 * can now initialise the boot allocator with some memory.
804 */
805 xenheap_phys_start = init_boot_allocator(__pa(&_end));
806 xenheap_phys_end = opt_xenheap_megabytes << 20;
807 #if defined(CONFIG_X86_64)
808 if ( !xen_phys_start )
809 EARLY_FAIL("Not enough memory to relocate Xen.\n");
810 xenheap_phys_end += xen_phys_start;
811 reserve_in_boot_e820(xen_phys_start,
812 xen_phys_start + (opt_xenheap_megabytes<<20));
813 init_boot_pages(1<<20, 16<<20); /* Initial seed: 15MB */
814 #else
815 init_boot_pages(xenheap_phys_end, 16<<20); /* Initial seed: 4MB */
816 #endif
818 if ( kexec_crash_area.size != 0 )
819 {
820 unsigned long kdump_start = kexec_crash_area.start;
821 unsigned long kdump_size = kexec_crash_area.size;
823 kdump_size = (kdump_size + PAGE_SIZE - 1) & PAGE_MASK;
825 if ( !reserve_in_boot_e820(kdump_start, kdump_size) )
826 {
827 printk("Kdump: DISABLED (failed to reserve %luMB (%lukB) at 0x%lx)"
828 "\n", kdump_size >> 20, kdump_size >> 10, kdump_start);
829 kexec_crash_area.start = kexec_crash_area.size = 0;
830 }
831 else
832 {
833 printk("Kdump: %luMB (%lukB) at 0x%lx\n",
834 kdump_size >> 20, kdump_size >> 10, kdump_start);
835 }
836 }
838 /*
839 * With the boot allocator now seeded, we can walk every RAM region and
840 * map it in its entirety (on x86/64, at least) and notify it to the
841 * boot allocator.
842 */
843 for ( i = 0; i < boot_e820.nr_map; i++ )
844 {
845 uint64_t s, e, map_e, mask = PAGE_SIZE - 1;
847 /* Only page alignment required now. */
848 s = (boot_e820.map[i].addr + mask) & ~mask;
849 e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
850 if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
851 continue;
853 /* Perform the mapping (truncated in 32-bit mode). */
854 map_e = e;
855 #if defined(CONFIG_X86_32)
856 map_e = min_t(uint64_t, map_e, BOOTSTRAP_DIRECTMAP_END);
857 #endif
858 if ( s < map_e )
859 map_pages_to_xen(
860 (unsigned long)maddr_to_bootstrap_virt(s),
861 s >> PAGE_SHIFT, (map_e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
863 init_boot_pages(s, e);
864 }
866 memguard_init();
868 nr_pages = 0;
869 for ( i = 0; i < e820.nr_map; i++ )
870 if ( e820.map[i].type == E820_RAM )
871 nr_pages += e820.map[i].size >> PAGE_SHIFT;
872 printk("System RAM: %luMB (%lukB)\n",
873 nr_pages >> (20 - PAGE_SHIFT),
874 nr_pages << (PAGE_SHIFT - 10));
875 total_pages = nr_pages;
877 /* Sanity check for unwanted bloat of certain hypercall structures. */
878 BUILD_BUG_ON(sizeof(((struct xen_platform_op *)0)->u) !=
879 sizeof(((struct xen_platform_op *)0)->u.pad));
880 BUILD_BUG_ON(sizeof(((struct xen_domctl *)0)->u) !=
881 sizeof(((struct xen_domctl *)0)->u.pad));
882 BUILD_BUG_ON(sizeof(((struct xen_sysctl *)0)->u) !=
883 sizeof(((struct xen_sysctl *)0)->u.pad));
885 BUILD_BUG_ON(sizeof(start_info_t) > PAGE_SIZE);
886 BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
887 BUILD_BUG_ON(sizeof(struct vcpu_info) != 64);
889 #ifdef CONFIG_COMPAT
890 BUILD_BUG_ON(sizeof(((struct compat_platform_op *)0)->u) !=
891 sizeof(((struct compat_platform_op *)0)->u.pad));
892 BUILD_BUG_ON(sizeof(start_info_compat_t) > PAGE_SIZE);
893 BUILD_BUG_ON(sizeof(struct compat_vcpu_info) != 64);
894 #endif
896 /* Check definitions in public headers match internal defs. */
897 BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
898 #ifdef HYPERVISOR_VIRT_END
899 BUILD_BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END);
900 #endif
901 BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START);
902 BUILD_BUG_ON(MACH2PHYS_VIRT_END != RO_MPT_VIRT_END);
904 init_frametable();
906 acpi_boot_table_init();
908 acpi_numa_init();
910 numa_initmem_init(0, max_page);
912 /* Initialise the Xen heap, skipping RAM holes. */
913 init_xenheap_pages(xenheap_phys_start, xenheap_phys_end);
914 nr_pages = (xenheap_phys_end - xenheap_phys_start) >> PAGE_SHIFT;
915 #ifdef __x86_64__
916 init_xenheap_pages(xen_phys_start, __pa(&_start));
917 nr_pages += (__pa(&_start) - xen_phys_start) >> PAGE_SHIFT;
918 vesa_init();
919 #endif
920 xenheap_phys_start = xen_phys_start;
921 printk("Xen heap: %luMB (%lukB)\n",
922 nr_pages >> (20 - PAGE_SHIFT),
923 nr_pages << (PAGE_SHIFT - 10));
925 end_boot_allocator();
927 early_boot = 0;
929 early_cpu_init();
931 paging_init();
933 /* Unmap the first page of CPU0's stack. */
934 memguard_guard_stack(cpu0_stack);
936 open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
938 if ( opt_watchdog )
939 nmi_watchdog = NMI_LOCAL_APIC;
941 sort_exception_tables();
943 find_smp_config();
945 dmi_scan_machine();
947 generic_apic_probe();
949 acpi_boot_init();
951 init_cpu_to_node();
953 if ( smp_found_config )
954 get_smp_config();
956 #ifdef CONFIG_X86_64
957 /* Low mappings were only needed for some BIOS table parsing. */
958 zap_low_mappings();
959 #endif
961 init_apic_mappings();
963 init_IRQ();
965 percpu_init_areas();
967 init_idle_domain();
969 trap_init();
971 rcu_init();
973 timer_init();
975 early_time_init();
977 arch_init_memory();
979 identify_cpu(&boot_cpu_data);
980 if ( cpu_has_fxsr )
981 set_in_cr4(X86_CR4_OSFXSR);
982 if ( cpu_has_xmm )
983 set_in_cr4(X86_CR4_OSXMMEXCPT);
984 #ifdef CONFIG_X86_64
985 vesa_mtrr_init();
986 #endif
988 if ( opt_nosmp )
989 max_cpus = 0;
991 smp_prepare_cpus(max_cpus);
993 /*
994 * Initialise higher-level timer functions. We do this fairly late
995 * (post-SMP) because the time bases and scale factors need to be updated
996 * regularly, and SMP initialisation can cause a long delay with
997 * interrupts not yet enabled.
998 */
999 init_xen_time();
1001 initialize_keytable();
1003 serial_init_postirq();
1005 BUG_ON(!local_irq_is_enabled());
1007 for_each_present_cpu ( i )
1009 if ( num_online_cpus() >= max_cpus )
1010 break;
1011 if ( !cpu_online(i) )
1013 rcu_online_cpu(i);
1014 __cpu_up(i);
1017 /* Set up cpu_to_node[]. */
1018 srat_detect_node(i);
1019 /* Set up node_to_cpumask based on cpu_to_node[]. */
1020 numa_add_cpu(i);
1023 printk("Brought up %ld CPUs\n", (long)num_online_cpus());
1024 smp_cpus_done(max_cpus);
1026 initialise_gdb(); /* could be moved earlier */
1028 do_initcalls();
1030 if ( opt_watchdog )
1031 watchdog_enable();
1033 /* Extract policy from multiboot. */
1034 extract_acm_policy(mbi, &initrdidx, &_policy_start, &_policy_len);
1036 /* initialize access control security module */
1037 acm_init(_policy_start, _policy_len);
1039 /* Create initial domain 0. */
1040 dom0 = domain_create(0, 0, DOM0_SSIDREF);
1041 if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) )
1042 panic("Error creating domain 0\n");
1044 dom0->is_privileged = 1;
1046 /* Grab the DOM0 command line. */
1047 cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
1048 if ( (cmdline != NULL) || (kextra != NULL) )
1050 static char dom0_cmdline[MAX_GUEST_CMDLINE];
1052 cmdline = cmdline_cook(cmdline);
1053 safe_strcpy(dom0_cmdline, cmdline);
1055 if ( kextra != NULL )
1056 /* kextra always includes exactly one leading space. */
1057 safe_strcat(dom0_cmdline, kextra);
1059 /* Append any extra parameters. */
1060 if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
1061 safe_strcat(dom0_cmdline, " noapic");
1062 if ( acpi_skip_timer_override &&
1063 !strstr(dom0_cmdline, "acpi_skip_timer_override") )
1064 safe_strcat(dom0_cmdline, " acpi_skip_timer_override");
1065 if ( (strlen(acpi_param) != 0) && !strstr(dom0_cmdline, "acpi=") )
1067 safe_strcat(dom0_cmdline, " acpi=");
1068 safe_strcat(dom0_cmdline, acpi_param);
1071 cmdline = dom0_cmdline;
1074 if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
1076 _initrd_start = initial_images_start +
1077 (mod[initrdidx].mod_start - mod[0].mod_start);
1078 _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
1081 /*
1082 * We're going to setup domain0 using the module(s) that we stashed safely
1083 * above our heap. The second module, if present, is an initrd ramdisk.
1084 */
1085 if ( construct_dom0(dom0,
1086 initial_images_start,
1087 mod[0].mod_end-mod[0].mod_start,
1088 _initrd_start,
1089 _initrd_len,
1090 cmdline) != 0)
1091 panic("Could not set up DOM0 guest OS\n");
1093 /* Scrub RAM that is still free and so may go to an unprivileged domain. */
1094 scrub_heap_pages();
1096 init_trace_bufs();
1098 console_endboot();
1100 /* Hide UART from DOM0 if we're using it */
1101 serial_endboot();
1103 domain_unpause_by_systemcontroller(dom0);
1105 reset_stack_and_jump(init_done);
1108 void arch_get_xen_caps(xen_capabilities_info_t *info)
1110 /* Interface name is always xen-3.0-* for Xen-3.x. */
1111 int major = 3, minor = 0;
1112 char s[32];
1114 (*info)[0] = '\0';
1116 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
1118 snprintf(s, sizeof(s), "xen-%d.%d-x86_32 ", major, minor);
1119 safe_strcat(*info, s);
1120 if ( hvm_enabled )
1122 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor);
1123 safe_strcat(*info, s);
1126 #elif defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE)
1128 snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor);
1129 safe_strcat(*info, s);
1130 if ( hvm_enabled )
1132 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor);
1133 safe_strcat(*info, s);
1134 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor);
1135 safe_strcat(*info, s);
1138 #elif defined(CONFIG_X86_64)
1140 snprintf(s, sizeof(s), "xen-%d.%d-x86_64 ", major, minor);
1141 safe_strcat(*info, s);
1142 #ifdef CONFIG_COMPAT
1143 snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor);
1144 safe_strcat(*info, s);
1145 #endif
1146 if ( hvm_enabled )
1148 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor);
1149 safe_strcat(*info, s);
1150 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor);
1151 safe_strcat(*info, s);
1152 snprintf(s, sizeof(s), "hvm-%d.%d-x86_64 ", major, minor);
1153 safe_strcat(*info, s);
1156 #endif
1159 /*
1160 * Local variables:
1161 * mode: C
1162 * c-set-style: "BSD"
1163 * c-basic-offset: 4
1164 * tab-width: 4
1165 * indent-tabs-mode: nil
1166 * End:
1167 */