ia64/xen-unstable

annotate xen/arch/x86/setup.c @ 15815:96f64f4c42f0

Xen Security Modules: XSM
Signed-off-by: George Coker <gscoker@alpha.ncsc.mil>
author kfraser@localhost.localdomain
date Fri Aug 31 11:21:35 2007 +0100 (2007-08-31)
parents 1c19a3430ab1
children fa4d44c9d9f6
rev   line source
kaf24@1452 1 #include <xen/config.h>
kaf24@1452 2 #include <xen/init.h>
kaf24@1452 3 #include <xen/lib.h>
kaf24@1452 4 #include <xen/sched.h>
cl349@5247 5 #include <xen/domain.h>
kaf24@1452 6 #include <xen/serial.h>
kaf24@1506 7 #include <xen/softirq.h>
kaf24@1452 8 #include <xen/acpi.h>
kaf24@3338 9 #include <xen/console.h>
iap10@4287 10 #include <xen/serial.h>
kaf24@3338 11 #include <xen/trace.h>
kaf24@3338 12 #include <xen/multiboot.h>
kaf24@5356 13 #include <xen/domain_page.h>
kfraser@10890 14 #include <xen/version.h>
kaf24@9117 15 #include <xen/gdbstub.h>
kaf24@9818 16 #include <xen/percpu.h>
kfraser@11296 17 #include <xen/hypercall.h>
kfraser@11601 18 #include <xen/keyhandler.h>
kfraser@11971 19 #include <xen/numa.h>
kaf24@13662 20 #include <xen/rcupdate.h>
keir@15298 21 #include <xen/vga.h>
iap10@6721 22 #include <public/version.h>
ack@13291 23 #ifdef CONFIG_COMPAT
ack@13291 24 #include <compat/platform.h>
ack@13291 25 #include <compat/xen.h>
ack@13291 26 #endif
kaf24@1452 27 #include <asm/bitops.h>
kaf24@1452 28 #include <asm/smp.h>
kaf24@1452 29 #include <asm/processor.h>
kaf24@1452 30 #include <asm/mpspec.h>
kaf24@1452 31 #include <asm/apic.h>
kaf24@1452 32 #include <asm/desc.h>
Tim@13909 33 #include <asm/paging.h>
kaf24@3344 34 #include <asm/e820.h>
kaf24@5536 35 #include <acm/acm_hooks.h>
ian@12677 36 #include <xen/kexec.h>
kfraser@15336 37 #include <asm/edd.h>
kfraser@15815 38 #include <xsm/xsm.h>
kaf24@3338 39
kfraser@15074 40 #if defined(CONFIG_X86_64)
kfraser@15597 41 #define BOOTSTRAP_DIRECTMAP_END (1UL << 32) /* 4GB */
kfraser@15074 42 #define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
kfraser@15074 43 #else
kfraser@15597 44 #define BOOTSTRAP_DIRECTMAP_END (1UL << 30) /* 1GB */
kfraser@15074 45 #define maddr_to_bootstrap_virt(m) ((void *)(long)(m))
kfraser@15074 46 #endif
kfraser@15074 47
kaf24@5157 48 extern void dmi_scan_machine(void);
kaf24@5211 49 extern void generic_apic_probe(void);
kfraser@11971 50 extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
kaf24@5157 51
kfraser@15330 52 extern u16 boot_edid_caps;
kfraser@15330 53 extern u8 boot_edid_info[128];
kfraser@15330 54 extern struct boot_video_info boot_vid_info;
kfraser@15330 55
kaf24@3338 56 /*
kaf24@3338 57 * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
kaf24@8726 58 * page_info table and allocation bitmap.
kaf24@3338 59 */
kaf24@3338 60 static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
kaf24@4950 61 #if defined(CONFIG_X86_64)
kaf24@3338 62 integer_param("xenheap_megabytes", opt_xenheap_megabytes);
kaf24@3354 63 #endif
kaf24@1452 64
kaf24@5146 65 /* opt_nosmp: If true, secondary processors are ignored. */
kaf24@5900 66 static int opt_nosmp = 0;
kaf24@5146 67 boolean_param("nosmp", opt_nosmp);
kaf24@5146 68
kaf24@5146 69 /* maxcpus: maximum number of CPUs to activate. */
kaf24@5146 70 static unsigned int max_cpus = NR_CPUS;
shand@11156 71 integer_param("maxcpus", max_cpus);
kaf24@5146 72
kaf24@3334 73 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
kaf24@3334 74 static int opt_watchdog = 0;
kaf24@3334 75 boolean_param("watchdog", opt_watchdog);
kaf24@3334 76
kaf24@4850 77 /* **** Linux config option: propagated to domain0. */
kaf24@4850 78 /* "acpi=off": Sisables both ACPI table parsing and interpreter. */
kaf24@4850 79 /* "acpi=force": Override the disable blacklist. */
kaf24@4850 80 /* "acpi=strict": Disables out-of-spec workarounds. */
kaf24@4850 81 /* "acpi=ht": Limit ACPI just to boot-time to enable HT. */
kaf24@4850 82 /* "acpi=noirq": Disables ACPI interrupt routing. */
kaf24@4850 83 static void parse_acpi_param(char *s);
kaf24@4850 84 custom_param("acpi", parse_acpi_param);
kaf24@4850 85
kaf24@4850 86 /* **** Linux config option: propagated to domain0. */
kaf24@4850 87 /* acpi_skip_timer_override: Skip IRQ0 overrides. */
kaf24@4850 88 extern int acpi_skip_timer_override;
kaf24@4850 89 boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
kaf24@4850 90
kaf24@4850 91 /* **** Linux config option: propagated to domain0. */
kaf24@4850 92 /* noapic: Disable IOAPIC setup. */
kaf24@4850 93 extern int skip_ioapic_setup;
kaf24@4850 94 boolean_param("noapic", skip_ioapic_setup);
kaf24@4850 95
kaf24@3594 96 int early_boot = 1;
kaf24@3594 97
kaf24@5146 98 cpumask_t cpu_present_map;
kaf24@5146 99
kfraser@15074 100 unsigned long xen_phys_start;
kfraser@15074 101
kaf24@5003 102 /* Limits of Xen heap, used to initialise the allocator. */
kaf24@5003 103 unsigned long xenheap_phys_start, xenheap_phys_end;
kaf24@3338 104
kaf24@2298 105 extern void arch_init_memory(void);
kaf24@1589 106 extern void init_IRQ(void);
kaf24@1589 107 extern void trap_init(void);
kaf24@5604 108 extern void early_time_init(void);
kaf24@5167 109 extern void early_cpu_init(void);
kfraser@15747 110 extern void vesa_init(void);
kfraser@15747 111 extern void vesa_mtrr_init(void);
kaf24@1589 112
kaf24@8533 113 struct tss_struct init_tss[NR_CPUS];
kaf24@8533 114
kfraser@15490 115 char __attribute__ ((__section__(".bss.stack_aligned"))) cpu0_stack[STACK_SIZE];
kaf24@5011 116
kaf24@5214 117 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
kaf24@1452 118
kaf24@5237 119 #if CONFIG_PAGING_LEVELS > 2
kaf24@1670 120 unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
kaf24@1670 121 #else
kaf24@5593 122 unsigned long mmu_cr4_features = X86_CR4_PSE;
kaf24@1670 123 #endif
kaf24@1452 124 EXPORT_SYMBOL(mmu_cr4_features);
kaf24@1452 125
kaf24@4818 126 int acpi_disabled;
kaf24@1452 127
kaf24@4850 128 int acpi_force;
kaf24@4850 129 char acpi_param[10] = "";
kfraser@15074 130 static void __init parse_acpi_param(char *s)
kaf24@4850 131 {
kaf24@4850 132 /* Save the parameter so it can be propagated to domain0. */
kfraser@13689 133 safe_strcpy(acpi_param, s);
kaf24@4850 134
kaf24@4850 135 /* Interpret the parameter for use within Xen. */
kaf24@4850 136 if ( !strcmp(s, "off") )
kaf24@4850 137 {
kaf24@4850 138 disable_acpi();
kaf24@4850 139 }
kaf24@4850 140 else if ( !strcmp(s, "force") )
kaf24@4850 141 {
kaf24@4850 142 acpi_force = 1;
kaf24@4850 143 acpi_ht = 1;
kaf24@4850 144 acpi_disabled = 0;
kaf24@4850 145 }
kaf24@4850 146 else if ( !strcmp(s, "strict") )
kaf24@4850 147 {
kaf24@4850 148 acpi_strict = 1;
kaf24@4850 149 }
kaf24@4850 150 else if ( !strcmp(s, "ht") )
kaf24@4850 151 {
kaf24@4850 152 if ( !acpi_force )
kaf24@4850 153 disable_acpi();
kaf24@4850 154 acpi_ht = 1;
kaf24@4850 155 }
kaf24@4850 156 else if ( !strcmp(s, "noirq") )
kaf24@4850 157 {
kaf24@4850 158 acpi_noirq_set();
kaf24@4850 159 }
kaf24@4850 160 }
kaf24@4850 161
kaf24@1452 162 static void __init do_initcalls(void)
kaf24@1452 163 {
kaf24@1452 164 initcall_t *call;
kaf24@1452 165 for ( call = &__initcall_start; call < &__initcall_end; call++ )
kaf24@1452 166 (*call)();
kaf24@1452 167 }
kaf24@1452 168
kfraser@15074 169 #define EARLY_FAIL(f, a...) do { \
kfraser@15074 170 printk( f , ## a ); \
kfraser@15074 171 for ( ; ; ) __asm__ __volatile__ ( "hlt" ); \
kfraser@15074 172 } while (0)
kaf24@8459 173
kfraser@15074 174 static unsigned long __initdata initial_images_start, initial_images_end;
kaf24@9067 175
kfraser@15074 176 unsigned long __init initial_images_nrpages(void)
kaf24@9067 177 {
kfraser@15489 178 ASSERT(!(initial_images_start & ~PAGE_MASK));
kfraser@15489 179 ASSERT(!(initial_images_end & ~PAGE_MASK));
kfraser@15489 180 return ((initial_images_end >> PAGE_SHIFT) -
kfraser@15489 181 (initial_images_start >> PAGE_SHIFT));
kaf24@9067 182 }
kaf24@9067 183
kfraser@15074 184 void __init discard_initial_images(void)
kaf24@9067 185 {
kaf24@9067 186 init_domheap_pages(initial_images_start, initial_images_end);
kaf24@9067 187 }
kaf24@9067 188
kaf24@9818 189 extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[];
kaf24@9818 190
kfraser@11241 191 static void __init percpu_init_areas(void)
kaf24@9818 192 {
kaf24@9818 193 unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start;
kfraser@15074 194 unsigned int first_unused;
kaf24@9818 195
kaf24@9818 196 BUG_ON(data_size > PERCPU_SIZE);
kaf24@9818 197
kfraser@15074 198 /* Initialise per-cpu data area for all possible secondary CPUs. */
kfraser@15074 199 for ( i = 1; (i < NR_CPUS) && cpu_possible(i); i++ )
kfraser@15074 200 memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
kfraser@15074 201 __per_cpu_start,
kfraser@15074 202 data_size);
kaf24@9818 203 first_unused = i;
kaf24@9818 204
kfraser@14340 205 /* Check that there are no holes in cpu_possible_map. */
kaf24@9818 206 for ( ; i < NR_CPUS; i++ )
kfraser@14340 207 BUG_ON(cpu_possible(i));
kaf24@9818 208
kfraser@11241 209 #ifndef MEMORY_GUARD
kaf24@9818 210 init_xenheap_pages(__pa(__per_cpu_start) + (first_unused << PERCPU_SHIFT),
kaf24@9818 211 __pa(__per_cpu_end));
kfraser@11241 212 #endif
keir@15082 213 memguard_guard_range(&__per_cpu_start[first_unused << PERCPU_SHIFT],
keir@15082 214 (NR_CPUS - first_unused) << PERCPU_SHIFT);
keir@15082 215 #if defined(CONFIG_X86_64)
keir@15082 216 /* Also zap the mapping in the 1:1 area. */
keir@15082 217 memguard_guard_range(__va(__pa(__per_cpu_start)) +
keir@15082 218 (first_unused << PERCPU_SHIFT),
keir@15082 219 (NR_CPUS - first_unused) << PERCPU_SHIFT);
keir@15082 220 #endif
kaf24@9818 221 }
kaf24@9818 222
kfraser@11881 223 /* Fetch acm policy module from multiboot modules. */
kfraser@15074 224 static void __init extract_acm_policy(
kfraser@11881 225 multiboot_info_t *mbi,
kfraser@11881 226 unsigned int *initrdidx,
kfraser@11881 227 char **_policy_start,
kfraser@11881 228 unsigned long *_policy_len)
kfraser@11881 229 {
kfraser@11881 230 int i;
kfraser@11881 231 module_t *mod = (module_t *)__va(mbi->mods_addr);
kfraser@11881 232 unsigned long start, policy_len;
kfraser@11881 233 char *policy_start;
kfraser@11881 234
kfraser@11881 235 /*
kfraser@11881 236 * Try all modules and see whichever could be the binary policy.
kfraser@11881 237 * Adjust the initrdidx if module[1] is the binary policy.
kfraser@11881 238 */
kfraser@11881 239 for ( i = mbi->mods_count-1; i >= 1; i-- )
kfraser@11881 240 {
kfraser@11881 241 start = initial_images_start + (mod[i].mod_start-mod[0].mod_start);
kfraser@15074 242 policy_start = maddr_to_bootstrap_virt(start);
kfraser@11881 243 policy_len = mod[i].mod_end - mod[i].mod_start;
kfraser@11881 244 if ( acm_is_policy(policy_start, policy_len) )
kfraser@11881 245 {
kfraser@11881 246 printk("Policy len 0x%lx, start at %p - module %d.\n",
kfraser@11881 247 policy_len, policy_start, i);
kfraser@11881 248 *_policy_start = policy_start;
kfraser@11881 249 *_policy_len = policy_len;
kfraser@11881 250 if ( i == 1 )
kfraser@11881 251 *initrdidx = (mbi->mods_count > 2) ? 2 : 0;
kfraser@11881 252 break;
kfraser@11881 253 }
kfraser@11881 254 }
kfraser@11881 255 }
kfraser@11881 256
kfraser@11241 257 static void __init init_idle_domain(void)
kfraser@11240 258 {
kfraser@11240 259 struct domain *idle_domain;
kfraser@11240 260
kfraser@11240 261 /* Domain creation requires that scheduler structures are initialised. */
kfraser@11240 262 scheduler_init();
kfraser@11240 263
kfraser@14911 264 idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0);
kfraser@11240 265 if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) )
kfraser@11240 266 BUG();
kfraser@11240 267
kfraser@11240 268 set_current(idle_domain->vcpu[0]);
kfraser@11240 269 idle_vcpu[0] = this_cpu(curr_vcpu) = current;
kfraser@11240 270
kfraser@11240 271 setup_idle_pagetable();
kfraser@11240 272 }
kfraser@11240 273
kfraser@15074 274 static void __init srat_detect_node(int cpu)
kfraser@11971 275 {
kfraser@11998 276 unsigned node;
kfraser@11998 277 u8 apicid = x86_cpu_to_apicid[cpu];
kfraser@11971 278
kfraser@11998 279 node = apicid_to_node[apicid];
kfraser@11998 280 if ( node == NUMA_NO_NODE )
kfraser@11998 281 node = 0;
kfraser@11998 282 numa_set_node(cpu, node);
kfraser@11971 283
kfraser@11998 284 if ( acpi_numa > 0 )
kfraser@11998 285 printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
kfraser@11971 286 }
kfraser@11971 287
kfraser@15740 288 /*
kfraser@15740 289 * Ensure a given physical memory range is present in the bootstrap mappings.
kfraser@15740 290 * Use superpage mappings to ensure that pagetable memory needn't be allocated.
kfraser@15740 291 */
kfraser@15740 292 static void __init bootstrap_map(unsigned long start, unsigned long end)
kfraser@15740 293 {
kfraser@15740 294 unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
kfraser@15740 295 start = start & ~mask;
kfraser@15740 296 end = (end + mask) & ~mask;
kfraser@15740 297 if ( end > BOOTSTRAP_DIRECTMAP_END )
kfraser@15740 298 panic("Cannot access memory beyond end of "
kfraser@15740 299 "bootstrap direct-map area\n");
kfraser@15740 300 map_pages_to_xen(
kfraser@15740 301 (unsigned long)maddr_to_bootstrap_virt(start),
kfraser@15740 302 start >> PAGE_SHIFT, (end-start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
kfraser@15740 303 }
kfraser@15740 304
kfraser@15074 305 static void __init move_memory(
kfraser@15074 306 unsigned long dst, unsigned long src_start, unsigned long src_end)
ian@12677 307 {
kfraser@15740 308 bootstrap_map(src_start, src_end);
kfraser@15740 309 bootstrap_map(dst, dst + src_end - src_start);
kfraser@15074 310 memmove(maddr_to_bootstrap_virt(dst),
kfraser@15074 311 maddr_to_bootstrap_virt(src_start),
ian@12677 312 src_end - src_start);
kfraser@15074 313 }
kfraser@15074 314
kfraser@15074 315 /* A temporary copy of the e820 map that we can mess with during bootstrap. */
kfraser@15074 316 static struct e820map __initdata boot_e820;
kfraser@15074 317
kfraser@15074 318 /* Reserve area (@s,@e) in the temporary bootstrap e820 map. */
kfraser@15489 319 static int __init reserve_in_boot_e820(unsigned long s, unsigned long e)
kfraser@15074 320 {
kfraser@15403 321 uint64_t rs, re;
kfraser@15074 322 int i;
kfraser@15074 323
kfraser@15074 324 for ( i = 0; i < boot_e820.nr_map; i++ )
kfraser@15074 325 {
kfraser@15074 326 /* Have we found the e820 region that includes the specified range? */
kfraser@15074 327 rs = boot_e820.map[i].addr;
kfraser@15403 328 re = rs + boot_e820.map[i].size;
kfraser@15489 329 if ( (s >= rs) && (e <= re) )
kfraser@15489 330 goto found;
kfraser@15489 331 }
kfraser@15074 332
kfraser@15489 333 return 0;
kfraser@15489 334
kfraser@15489 335 found:
kfraser@15489 336 /* Start fragment. */
kfraser@15489 337 boot_e820.map[i].size = s - rs;
kfraser@15489 338
kfraser@15489 339 /* End fragment. */
kfraser@15489 340 if ( e < re )
kfraser@15489 341 {
kfraser@15489 342 memmove(&boot_e820.map[i+1], &boot_e820.map[i],
kfraser@15489 343 (boot_e820.nr_map-i) * sizeof(boot_e820.map[0]));
kfraser@15489 344 boot_e820.nr_map++;
kfraser@15489 345 i++;
kfraser@15489 346 boot_e820.map[i].addr = e;
kfraser@15489 347 boot_e820.map[i].size = re - e;
kfraser@15074 348 }
kfraser@15489 349
kfraser@15489 350 return 1;
ian@12677 351 }
ian@12677 352
keir@15298 353 struct boot_video_info {
keir@15298 354 u8 orig_x; /* 0x00 */
keir@15298 355 u8 orig_y; /* 0x01 */
keir@15298 356 u8 orig_video_mode; /* 0x02 */
keir@15298 357 u8 orig_video_cols; /* 0x03 */
keir@15298 358 u8 orig_video_lines; /* 0x04 */
keir@15298 359 u8 orig_video_isVGA; /* 0x05 */
keir@15298 360 u16 orig_video_points; /* 0x06 */
keir@15298 361
keir@15298 362 /* VESA graphic mode -- linear frame buffer */
keir@15298 363 u32 capabilities; /* 0x08 */
keir@15298 364 u16 lfb_linelength; /* 0x0c */
keir@15298 365 u16 lfb_width; /* 0x0e */
keir@15298 366 u16 lfb_height; /* 0x10 */
keir@15298 367 u16 lfb_depth; /* 0x12 */
keir@15298 368 u32 lfb_base; /* 0x14 */
keir@15298 369 u32 lfb_size; /* 0x18 */
keir@15298 370 u8 red_size; /* 0x1c */
keir@15298 371 u8 red_pos; /* 0x1d */
keir@15298 372 u8 green_size; /* 0x1e */
keir@15298 373 u8 green_pos; /* 0x1f */
keir@15298 374 u8 blue_size; /* 0x20 */
keir@15298 375 u8 blue_pos; /* 0x21 */
keir@15298 376 u8 rsvd_size; /* 0x22 */
keir@15298 377 u8 rsvd_pos; /* 0x23 */
keir@15298 378 u16 vesapm_seg; /* 0x24 */
keir@15298 379 u16 vesapm_off; /* 0x26 */
keir@15298 380 };
keir@15298 381
keir@15298 382 static void __init parse_video_info(void)
keir@15298 383 {
keir@15298 384 struct boot_video_info *bvi = &bootsym(boot_vid_info);
keir@15298 385
keir@15298 386 if ( (bvi->orig_video_isVGA == 1) && (bvi->orig_video_mode == 3) )
keir@15298 387 {
keir@15298 388 vga_console_info.video_type = XEN_VGATYPE_TEXT_MODE_3;
keir@15298 389 vga_console_info.u.text_mode_3.font_height = bvi->orig_video_points;
keir@15298 390 vga_console_info.u.text_mode_3.cursor_x = bvi->orig_x;
keir@15298 391 vga_console_info.u.text_mode_3.cursor_y = bvi->orig_y;
keir@15298 392 vga_console_info.u.text_mode_3.rows = bvi->orig_video_lines;
keir@15298 393 vga_console_info.u.text_mode_3.columns = bvi->orig_video_cols;
keir@15298 394 }
keir@15298 395 else if ( bvi->orig_video_isVGA == 0x23 )
keir@15298 396 {
keir@15298 397 vga_console_info.video_type = XEN_VGATYPE_VESA_LFB;
keir@15298 398 vga_console_info.u.vesa_lfb.width = bvi->lfb_width;
keir@15298 399 vga_console_info.u.vesa_lfb.height = bvi->lfb_height;
keir@15298 400 vga_console_info.u.vesa_lfb.bytes_per_line = bvi->lfb_linelength;
keir@15298 401 vga_console_info.u.vesa_lfb.bits_per_pixel = bvi->lfb_depth;
keir@15298 402 vga_console_info.u.vesa_lfb.lfb_base = bvi->lfb_base;
keir@15298 403 vga_console_info.u.vesa_lfb.lfb_size = bvi->lfb_size;
keir@15298 404 vga_console_info.u.vesa_lfb.red_pos = bvi->red_pos;
keir@15298 405 vga_console_info.u.vesa_lfb.red_size = bvi->red_size;
keir@15298 406 vga_console_info.u.vesa_lfb.green_pos = bvi->green_pos;
keir@15298 407 vga_console_info.u.vesa_lfb.green_size = bvi->green_size;
keir@15298 408 vga_console_info.u.vesa_lfb.blue_pos = bvi->blue_pos;
keir@15298 409 vga_console_info.u.vesa_lfb.blue_size = bvi->blue_size;
keir@15298 410 vga_console_info.u.vesa_lfb.rsvd_pos = bvi->rsvd_pos;
keir@15298 411 vga_console_info.u.vesa_lfb.rsvd_size = bvi->rsvd_size;
keir@15298 412 }
keir@15298 413 }
keir@15298 414
keir@15082 415 void init_done(void)
keir@15082 416 {
keir@15082 417 extern char __init_begin[], __init_end[];
keir@15082 418
keir@15082 419 /* Free (or page-protect) the init areas. */
keir@15082 420 #ifndef MEMORY_GUARD
keir@15082 421 init_xenheap_pages(__pa(__init_begin), __pa(__init_end));
keir@15082 422 #endif
keir@15082 423 memguard_guard_range(__init_begin, __init_end - __init_begin);
keir@15082 424 #if defined(CONFIG_X86_64)
keir@15082 425 /* Also zap the mapping in the 1:1 area. */
keir@15082 426 memguard_guard_range(__va(__pa(__init_begin)), __init_end - __init_begin);
keir@15082 427 #endif
keir@15082 428 printk("Freed %ldkB init memory.\n", (long)(__init_end-__init_begin)>>10);
keir@15082 429
keir@15082 430 startup_cpu_idle_loop();
keir@15082 431 }
keir@15082 432
kfraser@15796 433 static char * __init cmdline_cook(char *p)
kfraser@15796 434 {
kfraser@15796 435 p = p ? : "";
kfraser@15796 436 while ( *p == ' ' )
kfraser@15796 437 p++;
kfraser@15796 438 while ( (*p != ' ') && (*p != '\0') )
kfraser@15796 439 p++;
kfraser@15796 440 while ( *p == ' ' )
kfraser@15796 441 p++;
kfraser@15796 442 return p;
kfraser@15796 443 }
kfraser@15796 444
kfraser@15379 445 void __init __start_xen(unsigned long mbi_p)
kaf24@1452 446 {
kfraser@15293 447 char *memmap_type = NULL;
kfraser@15796 448 char *cmdline, *kextra;
kaf24@8457 449 unsigned long _initrd_start = 0, _initrd_len = 0;
kaf24@8457 450 unsigned int initrdidx = 1;
kfraser@11881 451 char *_policy_start = NULL;
kfraser@11881 452 unsigned long _policy_len = 0;
kfraser@15379 453 multiboot_info_t *mbi = __va(mbi_p);
kaf24@8457 454 module_t *mod = (module_t *)__va(mbi->mods_addr);
kaf24@6111 455 unsigned long nr_pages, modules_length;
kfraser@15293 456 int i, e820_warn = 0, bytes = 0;
kaf24@5776 457 struct ns16550_defaults ns16550 = {
kaf24@5776 458 .data_bits = 8,
kaf24@5776 459 .parity = 'n',
kaf24@5776 460 .stop_bits = 1
kaf24@5776 461 };
kaf24@3338 462
kfraser@12853 463 extern void early_page_fault(void);
kfraser@12853 464 set_intr_gate(TRAP_page_fault, &early_page_fault);
kfraser@12853 465
kaf24@3338 466 /* Parse the command-line options. */
kfraser@15796 467 cmdline = cmdline_cook((mbi->flags & MBI_CMDLINE) ?
kfraser@15796 468 __va(mbi->cmdline) : NULL);
kfraser@15426 469 if ( (kextra = strstr(cmdline, " -- ")) != NULL )
kfraser@15426 470 {
kfraser@15426 471 /*
kfraser@15426 472 * Options after ' -- ' separator belong to dom0.
kfraser@15426 473 * 1. Orphan dom0's options from Xen's command line.
kfraser@15426 474 * 2. Skip all but final leading space from dom0's options.
kfraser@15426 475 */
kfraser@15426 476 *kextra = '\0';
kfraser@15426 477 kextra += 3;
kfraser@15426 478 while ( kextra[1] == ' ' ) kextra++;
kfraser@15426 479 }
kaf24@9823 480 cmdline_parse(cmdline);
kaf24@3338 481
keir@15298 482 parse_video_info();
keir@15298 483
kaf24@8534 484 set_current((struct vcpu *)0xfffff000); /* debug sanity */
kfraser@11240 485 idle_vcpu[0] = current;
kaf24@8534 486 set_processor_id(0); /* needed early, for smp_processor_id() */
kaf24@3338 487
kaf24@5146 488 smp_prepare_boot_cpu();
kaf24@5146 489
kaf24@3338 490 /* We initialise the serial devices very early so we can get debugging. */
kaf24@5776 491 ns16550.io_base = 0x3f8;
kaf24@5776 492 ns16550.irq = 4;
kaf24@5776 493 ns16550_init(0, &ns16550);
kaf24@5776 494 ns16550.io_base = 0x2f8;
kaf24@5776 495 ns16550.irq = 3;
kaf24@5776 496 ns16550_init(1, &ns16550);
kaf24@5195 497 serial_init_preirq();
kaf24@3338 498
kaf24@3338 499 init_console();
kaf24@3338 500
kfraser@11947 501 printk("Command line: %s\n", cmdline);
kaf24@9823 502
kfraser@15330 503 printk("Video information:\n");
kfraser@15330 504
kfraser@15330 505 /* Print VGA display mode information. */
keir@15298 506 switch ( vga_console_info.video_type )
keir@15298 507 {
keir@15298 508 case XEN_VGATYPE_TEXT_MODE_3:
kfraser@15330 509 printk(" VGA is text mode %dx%d, font 8x%d\n",
keir@15298 510 vga_console_info.u.text_mode_3.columns,
keir@15298 511 vga_console_info.u.text_mode_3.rows,
keir@15298 512 vga_console_info.u.text_mode_3.font_height);
keir@15298 513 break;
keir@15298 514 case XEN_VGATYPE_VESA_LFB:
kfraser@15330 515 printk(" VGA is graphics mode %dx%d, %d bpp\n",
keir@15298 516 vga_console_info.u.vesa_lfb.width,
keir@15298 517 vga_console_info.u.vesa_lfb.height,
keir@15298 518 vga_console_info.u.vesa_lfb.bits_per_pixel);
keir@15298 519 break;
kfraser@15330 520 default:
kfraser@15330 521 printk(" No VGA detected\n");
kfraser@15330 522 break;
kfraser@15330 523 }
kfraser@15330 524
kfraser@15330 525 /* Print VBE/DDC EDID information. */
kfraser@15330 526 if ( bootsym(boot_edid_caps) != 0x1313 )
kfraser@15330 527 {
kfraser@15330 528 u16 caps = bootsym(boot_edid_caps);
kfraser@15330 529 printk(" VBE/DDC methods:%s%s%s; ",
kfraser@15330 530 (caps & 1) ? " V1" : "",
kfraser@15330 531 (caps & 2) ? " V2" : "",
kfraser@15330 532 !(caps & 3) ? " none" : "");
kfraser@15330 533 printk("EDID transfer time: %d seconds\n", caps >> 8);
kfraser@15330 534 if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 )
kfraser@15330 535 {
kfraser@15330 536 printk(" EDID info not retrieved because ");
kfraser@15330 537 if ( !(caps & 3) )
kfraser@15330 538 printk("no DDC retrieval method detected\n");
kfraser@15330 539 else if ( (caps >> 8) > 5 )
kfraser@15330 540 printk("takes longer than 5 seconds\n");
kfraser@15330 541 else
kfraser@15330 542 printk("of reasons unknown\n");
kfraser@15330 543 }
keir@15298 544 }
keir@15298 545
kfraser@15336 546 printk("Disc information:\n");
kfraser@15336 547 printk(" Found %d MBR signatures\n",
kfraser@15430 548 bootsym(boot_mbr_signature_nr));
kfraser@15336 549 printk(" Found %d EDD information structures\n",
kfraser@15336 550 bootsym(boot_edd_info_nr));
kfraser@15336 551
kaf24@3344 552 /* Check that we have at least one Multiboot module. */
kaf24@3344 553 if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
kfraser@15074 554 EARLY_FAIL("dom0 kernel not specified. "
kfraser@15074 555 "Check bootloader configuration.\n");
kaf24@5011 556
kaf24@5011 557 if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
kfraser@15074 558 EARLY_FAIL("Misaligned CPU0 stack.\n");
kaf24@3338 559
kfraser@11618 560 /*
kfraser@11618 561 * Since there are some stubs getting built on the stacks which use
kfraser@11618 562 * direct calls/jumps, the heap must be confined to the lower 2G so
kfraser@11618 563 * that those branches can reach their targets.
kfraser@11618 564 */
kfraser@11618 565 if ( opt_xenheap_megabytes > 2048 )
kfraser@11618 566 opt_xenheap_megabytes = 2048;
kaf24@3338 567
kfraser@15293 568 if ( e820_raw_nr != 0 )
kfraser@15292 569 {
kfraser@15293 570 memmap_type = "Xen-e820";
kfraser@15292 571 }
kfraser@15293 572 else if ( bootsym(lowmem_kb) )
kfraser@15292 573 {
kfraser@15293 574 memmap_type = "Xen-e801";
kfraser@15292 575 e820_raw[0].addr = 0;
kfraser@15293 576 e820_raw[0].size = bootsym(lowmem_kb) << 10;
kfraser@15292 577 e820_raw[0].type = E820_RAM;
kfraser@15292 578 e820_raw[1].addr = 0x100000;
kfraser@15293 579 e820_raw[1].size = bootsym(highmem_kb) << 10;
kfraser@15292 580 e820_raw[1].type = E820_RAM;
kfraser@15292 581 e820_raw_nr = 2;
kfraser@15292 582 }
kfraser@15292 583 else if ( mbi->flags & MBI_MEMMAP )
kaf24@3344 584 {
kfraser@15293 585 memmap_type = "Multiboot-e820";
kaf24@3344 586 while ( bytes < mbi->mmap_length )
kaf24@3344 587 {
kaf24@3344 588 memory_map_t *map = __va(mbi->mmap_addr + bytes);
kaf24@8402 589
kaf24@8402 590 /*
kaf24@8403 591 * This is a gross workaround for a BIOS bug. Some bootloaders do
kaf24@8402 592 * not write e820 map entries into pre-zeroed memory. This is
kaf24@8402 593 * okay if the BIOS fills in all fields of the map entry, but
kaf24@8402 594 * some broken BIOSes do not bother to write the high word of
kaf24@8402 595 * the length field if the length is smaller than 4GB. We
kaf24@8402 596 * detect and fix this by flagging sections below 4GB that
kaf24@8403 597 * appear to be larger than 4GB in size.
kaf24@8402 598 */
kaf24@8403 599 if ( (map->base_addr_high == 0) && (map->length_high != 0) )
kaf24@8402 600 {
kfraser@15292 601 if ( !e820_warn )
kfraser@15292 602 {
kfraser@15292 603 printk("WARNING: Buggy e820 map detected and fixed "
kfraser@15292 604 "(truncated length fields).\n");
kfraser@15292 605 e820_warn = 1;
kfraser@15292 606 }
kaf24@8402 607 map->length_high = 0;
kaf24@8402 608 }
kaf24@8402 609
kaf24@3344 610 e820_raw[e820_raw_nr].addr =
kaf24@3344 611 ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
kaf24@3344 612 e820_raw[e820_raw_nr].size =
kaf24@3344 613 ((u64)map->length_high << 32) | (u64)map->length_low;
kfraser@15799 614 e820_raw[e820_raw_nr].type = map->type;
kaf24@3344 615 e820_raw_nr++;
kaf24@8402 616
kaf24@3344 617 bytes += map->size + 4;
kaf24@3344 618 }
kaf24@3344 619 }
kaf24@3344 620 else if ( mbi->flags & MBI_MEMLIMITS )
kaf24@3344 621 {
kfraser@15293 622 memmap_type = "Multiboot-e801";
kaf24@3344 623 e820_raw[0].addr = 0;
kaf24@3344 624 e820_raw[0].size = mbi->mem_lower << 10;
kaf24@3344 625 e820_raw[0].type = E820_RAM;
kaf24@3354 626 e820_raw[1].addr = 0x100000;
kaf24@3354 627 e820_raw[1].size = mbi->mem_upper << 10;
kaf24@3354 628 e820_raw[1].type = E820_RAM;
kaf24@3344 629 e820_raw_nr = 2;
kaf24@3344 630 }
kaf24@3344 631 else
kaf24@3344 632 {
kfraser@15074 633 EARLY_FAIL("Bootloader provided no memory information.\n");
kaf24@3344 634 }
kaf24@3344 635
kaf24@13427 636 /* Ensure that all E820 RAM regions are page-aligned and -sized. */
kaf24@13427 637 for ( i = 0; i < e820_raw_nr; i++ )
kaf24@13427 638 {
kaf24@13427 639 uint64_t s, e;
kfraser@15292 640
kaf24@13427 641 if ( e820_raw[i].type != E820_RAM )
kaf24@13427 642 continue;
kaf24@13427 643 s = PFN_UP(e820_raw[i].addr);
kaf24@13427 644 e = PFN_DOWN(e820_raw[i].addr + e820_raw[i].size);
kaf24@13427 645 e820_raw[i].size = 0; /* discarded later */
kaf24@13427 646 if ( s < e )
kaf24@13427 647 {
kaf24@13427 648 e820_raw[i].addr = s << PAGE_SHIFT;
kaf24@13427 649 e820_raw[i].size = (e - s) << PAGE_SHIFT;
kaf24@13427 650 }
kaf24@13427 651 }
kaf24@13427 652
kaf24@13427 653 /* Sanitise the raw E820 map to produce a final clean version. */
kfraser@15293 654 max_page = init_e820(memmap_type, e820_raw, &e820_raw_nr);
kaf24@3338 655
kfraser@15074 656 /*
kfraser@15074 657 * Create a temporary copy of the E820 map. Truncate it to above 16MB
kfraser@15074 658 * as anything below that is already mapped and has a statically-allocated
kfraser@15074 659 * purpose.
kfraser@15074 660 */
kfraser@15074 661 memcpy(&boot_e820, &e820, sizeof(e820));
kfraser@15074 662 for ( i = 0; i < boot_e820.nr_map; i++ )
kaf24@3338 663 {
kfraser@15074 664 uint64_t s, e, min = 16 << 20; /* 16MB */
kfraser@15074 665 s = boot_e820.map[i].addr;
kfraser@15074 666 e = boot_e820.map[i].addr + boot_e820.map[i].size;
kfraser@15074 667 if ( s >= min )
kfraser@15074 668 continue;
kfraser@15074 669 if ( e > min )
kaf24@6111 670 {
kfraser@15074 671 boot_e820.map[i].addr = min;
kfraser@15074 672 boot_e820.map[i].size = e - min;
kaf24@6111 673 }
kfraser@15074 674 else
kfraser@15074 675 boot_e820.map[i].type = E820_RESERVED;
kaf24@3338 676 }
kaf24@6111 677
kfraser@15074 678 /*
keir@15077 679 * Iterate backwards over all superpage-aligned RAM regions.
kfraser@15074 680 *
kfraser@15074 681 * We require superpage alignment because the boot allocator is not yet
kfraser@15074 682 * initialised. Hence we can only map superpages in the address range
kfraser@15074 683 * 0 to BOOTSTRAP_DIRECTMAP_END, as this is guaranteed not to require
kfraser@15074 684 * dynamic allocation of pagetables.
kfraser@15074 685 *
kfraser@15074 686 * As well as mapping superpages in that range, in preparation for
kfraser@15074 687 * initialising the boot allocator, we also look for a region to which
kfraser@15074 688 * we can relocate the dom0 kernel and other multiboot modules. Also, on
kfraser@15074 689 * x86/64, we relocate Xen to higher memory.
kfraser@15074 690 */
kfraser@15074 691 modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
keir@15077 692 for ( i = boot_e820.nr_map-1; i >= 0; i-- )
kfraser@15074 693 {
kfraser@15074 694 uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
kaf24@6134 695
kfraser@15074 696 /* Superpage-aligned chunks up to BOOTSTRAP_DIRECTMAP_END, please. */
kfraser@15074 697 s = (boot_e820.map[i].addr + mask) & ~mask;
kfraser@15074 698 e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
kfraser@15074 699 e = min_t(uint64_t, e, BOOTSTRAP_DIRECTMAP_END);
kfraser@15074 700 if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
kaf24@3354 701 continue;
kaf24@6111 702
kfraser@15074 703 /* Map the chunk. No memory will need to be allocated to do this. */
kfraser@15074 704 map_pages_to_xen(
kfraser@15074 705 (unsigned long)maddr_to_bootstrap_virt(s),
kfraser@15074 706 s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
kaf24@6111 707
kfraser@14084 708 #if defined(CONFIG_X86_64)
kfraser@15074 709 /* Is the region suitable for relocating Xen? */
kfraser@15074 710 if ( !xen_phys_start && (((e-s) >> 20) >= opt_xenheap_megabytes) )
kaf24@5003 711 {
kfraser@15074 712 extern l2_pgentry_t l2_xenmap[];
kfraser@15074 713 l4_pgentry_t *pl4e;
kfraser@15074 714 l3_pgentry_t *pl3e;
kfraser@15074 715 l2_pgentry_t *pl2e;
kfraser@15074 716 int i, j;
kfraser@15074 717
kfraser@15074 718 /* Select relocation address. */
kfraser@15074 719 e = (e - (opt_xenheap_megabytes << 20)) & ~mask;
kfraser@15074 720 xen_phys_start = e;
kfraser@15292 721 bootsym(trampoline_xen_phys_start) = e;
kfraser@15074 722
kfraser@15074 723 /*
kfraser@15074 724 * Perform relocation to new physical address.
kfraser@15074 725 * Before doing so we must sync static/global data with main memory
kfraser@15074 726 * with a barrier(). After this we must *not* modify static/global
kfraser@15074 727 * data until after we have switched to the relocated pagetables!
kfraser@15074 728 */
kfraser@15074 729 barrier();
kfraser@15074 730 move_memory(e, 0, __pa(&_end) - xen_phys_start);
kfraser@15074 731
kfraser@15379 732 /* Poison low 1MB to detect stray pointers to physical 0-1MB. */
kfraser@15379 733 memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
kfraser@15379 734
kfraser@15074 735 /* Walk initial pagetables, relocating page directory entries. */
kfraser@15074 736 pl4e = __va(__pa(idle_pg_table));
kfraser@15074 737 for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
kfraser@15074 738 {
kfraser@15074 739 if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
kfraser@15074 740 continue;
kfraser@15074 741 *pl4e = l4e_from_intpte(l4e_get_intpte(*pl4e) +
kfraser@15074 742 xen_phys_start);
kfraser@15074 743 pl3e = l4e_to_l3e(*pl4e);
kfraser@15074 744 for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ )
kfraser@15074 745 {
kfraser@15074 746 /* Not present or already relocated? */
kfraser@15074 747 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ||
kfraser@15074 748 (l3e_get_pfn(*pl3e) > 0x1000) )
kfraser@15074 749 continue;
kfraser@15074 750 *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) +
kfraser@15074 751 xen_phys_start);
kfraser@15074 752 }
kfraser@15074 753 }
kfraser@15074 754
kfraser@15074 755 /* The only data mappings to be relocated are in the Xen area. */
kfraser@15074 756 pl2e = __va(__pa(l2_xenmap));
kfraser@15074 757 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ )
kfraser@15074 758 {
kfraser@15074 759 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
kfraser@15074 760 continue;
kfraser@15074 761 *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
kfraser@15074 762 xen_phys_start);
kfraser@15074 763 }
kfraser@15074 764
kfraser@15074 765 /* Re-sync the stack and then switch to relocated pagetables. */
kfraser@15074 766 asm volatile (
kfraser@15074 767 "rep movsb ; " /* re-sync the stack */
kfraser@15074 768 "movq %%cr4,%%rsi ; "
kfraser@15074 769 "andb $0x7f,%%sil ; "
kfraser@15074 770 "movq %%rsi,%%cr4 ; " /* CR4.PGE == 0 */
kfraser@15074 771 "movq %0,%%cr3 ; " /* CR3 == new pagetables */
kfraser@15074 772 "orb $0x80,%%sil ; "
kfraser@15074 773 "movq %%rsi,%%cr4 " /* CR4.PGE == 1 */
kfraser@15074 774 : : "r" (__pa(idle_pg_table)), "S" (cpu0_stack),
kfraser@15074 775 "D" (__va(__pa(cpu0_stack))), "c" (STACK_SIZE) : "memory" );
kaf24@5003 776 }
kaf24@5003 777 #endif
keir@15077 778
keir@15077 779 /* Is the region suitable for relocating the multiboot modules? */
keir@15077 780 if ( !initial_images_start && (s < e) && ((e-s) >= modules_length) )
keir@15077 781 {
kfraser@15489 782 initial_images_end = e;
kfraser@15489 783 e = (e - modules_length) & PAGE_MASK;
keir@15077 784 initial_images_start = e;
keir@15077 785 move_memory(initial_images_start,
keir@15077 786 mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
keir@15077 787 }
kfraser@15489 788
kfraser@15489 789 if ( !kexec_crash_area.start && (s < e) &&
kfraser@15489 790 ((e-s) >= kexec_crash_area.size) )
kfraser@15489 791 {
kfraser@15489 792 e = (e - kexec_crash_area.size) & PAGE_MASK;
kfraser@15489 793 kexec_crash_area.start = e;
kfraser@15489 794 }
kaf24@3354 795 }
kaf24@3354 796
kfraser@15074 797 if ( !initial_images_start )
kfraser@15074 798 EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
kfraser@15074 799 reserve_in_boot_e820(initial_images_start, initial_images_end);
kfraser@15074 800
kfraser@15074 801 /*
kfraser@15074 802 * With modules (and Xen itself, on x86/64) relocated out of the way, we
kfraser@15074 803 * can now initialise the boot allocator with some memory.
kfraser@15074 804 */
kfraser@15074 805 xenheap_phys_start = init_boot_allocator(__pa(&_end));
kfraser@15074 806 xenheap_phys_end = opt_xenheap_megabytes << 20;
kfraser@15074 807 #if defined(CONFIG_X86_64)
kfraser@15074 808 if ( !xen_phys_start )
kfraser@15074 809 EARLY_FAIL("Not enough memory to relocate Xen.\n");
kfraser@15074 810 xenheap_phys_end += xen_phys_start;
kfraser@15074 811 reserve_in_boot_e820(xen_phys_start,
kfraser@15074 812 xen_phys_start + (opt_xenheap_megabytes<<20));
kfraser@15074 813 init_boot_pages(1<<20, 16<<20); /* Initial seed: 15MB */
kfraser@15074 814 #else
kfraser@15074 815 init_boot_pages(xenheap_phys_end, 16<<20); /* Initial seed: 4MB */
kfraser@15074 816 #endif
kfraser@15074 817
kfraser@15489 818 if ( kexec_crash_area.size != 0 )
kfraser@15489 819 {
kfraser@15489 820 unsigned long kdump_start = kexec_crash_area.start;
kfraser@15489 821 unsigned long kdump_size = kexec_crash_area.size;
kfraser@15489 822
kfraser@15489 823 kdump_size = (kdump_size + PAGE_SIZE - 1) & PAGE_MASK;
kfraser@15489 824
kfraser@15489 825 if ( !reserve_in_boot_e820(kdump_start, kdump_size) )
kfraser@15489 826 {
kfraser@15489 827 printk("Kdump: DISABLED (failed to reserve %luMB (%lukB) at 0x%lx)"
kfraser@15489 828 "\n", kdump_size >> 20, kdump_size >> 10, kdump_start);
kfraser@15489 829 kexec_crash_area.start = kexec_crash_area.size = 0;
kfraser@15489 830 }
kfraser@15489 831 else
kfraser@15489 832 {
kfraser@15489 833 printk("Kdump: %luMB (%lukB) at 0x%lx\n",
kfraser@15489 834 kdump_size >> 20, kdump_size >> 10, kdump_start);
kfraser@15489 835 }
kfraser@15489 836 }
kfraser@15489 837
kfraser@15074 838 /*
kfraser@15074 839 * With the boot allocator now seeded, we can walk every RAM region and
kfraser@15074 840 * map it in its entirety (on x86/64, at least) and notify it to the
kfraser@15074 841 * boot allocator.
kfraser@15074 842 */
kfraser@15074 843 for ( i = 0; i < boot_e820.nr_map; i++ )
kfraser@15074 844 {
kfraser@15074 845 uint64_t s, e, map_e, mask = PAGE_SIZE - 1;
kfraser@15074 846
kfraser@15074 847 /* Only page alignment required now. */
kfraser@15074 848 s = (boot_e820.map[i].addr + mask) & ~mask;
kfraser@15074 849 e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
kfraser@15074 850 if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
kfraser@15074 851 continue;
kfraser@15074 852
kfraser@15074 853 /* Perform the mapping (truncated in 32-bit mode). */
kfraser@15074 854 map_e = e;
kfraser@15074 855 #if defined(CONFIG_X86_32)
kfraser@15074 856 map_e = min_t(uint64_t, map_e, BOOTSTRAP_DIRECTMAP_END);
kfraser@15074 857 #endif
kfraser@15074 858 if ( s < map_e )
kfraser@15074 859 map_pages_to_xen(
kfraser@15074 860 (unsigned long)maddr_to_bootstrap_virt(s),
kfraser@15074 861 s >> PAGE_SHIFT, (map_e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
kfraser@15074 862
kfraser@15074 863 init_boot_pages(s, e);
kfraser@15074 864 }
kfraser@15074 865
kaf24@5003 866 memguard_init();
kaf24@4950 867
kfraser@15074 868 nr_pages = 0;
kfraser@15074 869 for ( i = 0; i < e820.nr_map; i++ )
kfraser@15074 870 if ( e820.map[i].type == E820_RAM )
kfraser@15074 871 nr_pages += e820.map[i].size >> PAGE_SHIFT;
ian@12681 872 printk("System RAM: %luMB (%lukB)\n",
kaf24@3354 873 nr_pages >> (20 - PAGE_SHIFT),
kaf24@3354 874 nr_pages << (PAGE_SHIFT - 10));
kaf24@7220 875 total_pages = nr_pages;
kaf24@3354 876
kfraser@11296 877 /* Sanity check for unwanted bloat of certain hypercall structures. */
kfraser@11296 878 BUILD_BUG_ON(sizeof(((struct xen_platform_op *)0)->u) !=
kfraser@11296 879 sizeof(((struct xen_platform_op *)0)->u.pad));
kfraser@11296 880 BUILD_BUG_ON(sizeof(((struct xen_domctl *)0)->u) !=
kfraser@11296 881 sizeof(((struct xen_domctl *)0)->u.pad));
kfraser@11296 882 BUILD_BUG_ON(sizeof(((struct xen_sysctl *)0)->u) !=
kfraser@11296 883 sizeof(((struct xen_sysctl *)0)->u.pad));
kaf24@7388 884
kaf24@9878 885 BUILD_BUG_ON(sizeof(start_info_t) > PAGE_SIZE);
kaf24@9878 886 BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
ack@13292 887 BUILD_BUG_ON(sizeof(struct vcpu_info) != 64);
kaf24@7744 888
ack@13291 889 #ifdef CONFIG_COMPAT
ack@13291 890 BUILD_BUG_ON(sizeof(((struct compat_platform_op *)0)->u) !=
ack@13291 891 sizeof(((struct compat_platform_op *)0)->u.pad));
ack@13291 892 BUILD_BUG_ON(sizeof(start_info_compat_t) > PAGE_SIZE);
ack@13292 893 BUILD_BUG_ON(sizeof(struct compat_vcpu_info) != 64);
ack@13291 894 #endif
ack@13291 895
kfraser@10492 896 /* Check definitions in public headers match internal defs. */
kaf24@9878 897 BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
kaf24@8521 898 #ifdef HYPERVISOR_VIRT_END
kaf24@9878 899 BUILD_BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END);
kaf24@8521 900 #endif
kfraser@10492 901 BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START);
kfraser@10492 902 BUILD_BUG_ON(MACH2PHYS_VIRT_END != RO_MPT_VIRT_END);
kaf24@8521 903
kaf24@3354 904 init_frametable();
kaf24@3338 905
kfraser@11971 906 acpi_boot_table_init();
kfraser@11971 907
kfraser@11971 908 acpi_numa_init();
kfraser@11971 909
kfraser@11971 910 numa_initmem_init(0, max_page);
kfraser@11971 911
kaf24@6111 912 /* Initialise the Xen heap, skipping RAM holes. */
kfraser@15074 913 init_xenheap_pages(xenheap_phys_start, xenheap_phys_end);
kfraser@15074 914 nr_pages = (xenheap_phys_end - xenheap_phys_start) >> PAGE_SHIFT;
kfraser@15074 915 #ifdef __x86_64__
kfraser@15074 916 init_xenheap_pages(xen_phys_start, __pa(&_start));
kfraser@15074 917 nr_pages += (__pa(&_start) - xen_phys_start) >> PAGE_SHIFT;
kfraser@15747 918 vesa_init();
kfraser@15074 919 #endif
kfraser@15074 920 xenheap_phys_start = xen_phys_start;
kaf24@6111 921 printk("Xen heap: %luMB (%lukB)\n",
kaf24@6111 922 nr_pages >> (20 - PAGE_SHIFT),
kaf24@6111 923 nr_pages << (PAGE_SHIFT - 10));
kaf24@3338 924
keir@14680 925 end_boot_allocator();
keir@14680 926
kaf24@3594 927 early_boot = 0;
kaf24@3338 928
kaf24@8459 929 early_cpu_init();
kaf24@8459 930
kaf24@8459 931 paging_init();
kaf24@8459 932
kaf24@8459 933 /* Unmap the first page of CPU0's stack. */
kaf24@8459 934 memguard_guard_stack(cpu0_stack);
kaf24@8459 935
kaf24@8459 936 open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
kaf24@8459 937
kaf24@8459 938 if ( opt_watchdog )
kaf24@8459 939 nmi_watchdog = NMI_LOCAL_APIC;
kaf24@8459 940
kaf24@8459 941 sort_exception_tables();
kaf24@8459 942
kaf24@8459 943 find_smp_config();
kaf24@8459 944
kaf24@8459 945 dmi_scan_machine();
kaf24@8459 946
kaf24@8459 947 generic_apic_probe();
kaf24@8459 948
kaf24@8459 949 acpi_boot_init();
kaf24@8459 950
kfraser@11971 951 init_cpu_to_node();
kfraser@11971 952
kfraser@11241 953 if ( smp_found_config )
kaf24@8459 954 get_smp_config();
kaf24@8459 955
keir@15083 956 #ifdef CONFIG_X86_64
keir@15083 957 /* Low mappings were only needed for some BIOS table parsing. */
keir@15083 958 zap_low_mappings();
keir@15083 959 #endif
keir@15083 960
kaf24@8459 961 init_apic_mappings();
kaf24@8459 962
kaf24@8459 963 init_IRQ();
kaf24@8459 964
kfraser@11241 965 percpu_init_areas();
kfraser@11241 966
kfraser@15815 967 xsm_init(&initrdidx, mbi, initial_images_start);
kfraser@15815 968
kfraser@11240 969 init_idle_domain();
kfraser@11240 970
kaf24@8459 971 trap_init();
kaf24@8459 972
kaf24@13662 973 rcu_init();
kaf24@13662 974
kaf24@8586 975 timer_init();
kaf24@8459 976
kaf24@8459 977 early_time_init();
kaf24@8459 978
kaf24@8459 979 arch_init_memory();
kaf24@8459 980
kaf24@8459 981 identify_cpu(&boot_cpu_data);
kaf24@8459 982 if ( cpu_has_fxsr )
kaf24@8459 983 set_in_cr4(X86_CR4_OSFXSR);
kaf24@8459 984 if ( cpu_has_xmm )
kaf24@8459 985 set_in_cr4(X86_CR4_OSXMMEXCPT);
kfraser@15747 986 #ifdef CONFIG_X86_64
kfraser@15747 987 vesa_mtrr_init();
kfraser@15747 988 #endif
kaf24@8459 989
kaf24@8459 990 if ( opt_nosmp )
kaf24@8459 991 max_cpus = 0;
kaf24@8459 992
kaf24@8459 993 smp_prepare_cpus(max_cpus);
kaf24@8459 994
kaf24@8459 995 /*
kaf24@8459 996 * Initialise higher-level timer functions. We do this fairly late
kaf24@8459 997 * (post-SMP) because the time bases and scale factors need to be updated
kaf24@8459 998 * regularly, and SMP initialisation can cause a long delay with
kaf24@8459 999 * interrupts not yet enabled.
kaf24@8459 1000 */
kaf24@8459 1001 init_xen_time();
kaf24@8459 1002
kaf24@8459 1003 initialize_keytable();
kaf24@8459 1004
kaf24@8459 1005 serial_init_postirq();
kaf24@8459 1006
kaf24@8459 1007 BUG_ON(!local_irq_is_enabled());
kaf24@8459 1008
kaf24@8459 1009 for_each_present_cpu ( i )
kaf24@8459 1010 {
kaf24@8459 1011 if ( num_online_cpus() >= max_cpus )
kaf24@8459 1012 break;
kaf24@8459 1013 if ( !cpu_online(i) )
kaf24@13662 1014 {
kaf24@13662 1015 rcu_online_cpu(i);
kaf24@8459 1016 __cpu_up(i);
kaf24@13662 1017 }
kfraser@11971 1018
kfraser@11998 1019 /* Set up cpu_to_node[]. */
kfraser@11971 1020 srat_detect_node(i);
kfraser@11998 1021 /* Set up node_to_cpumask based on cpu_to_node[]. */
kfraser@11971 1022 numa_add_cpu(i);
kaf24@8459 1023 }
kaf24@8459 1024
kaf24@8459 1025 printk("Brought up %ld CPUs\n", (long)num_online_cpus());
kaf24@8459 1026 smp_cpus_done(max_cpus);
kaf24@8459 1027
kaf24@9117 1028 initialise_gdb(); /* could be moved earlier */
kaf24@9117 1029
kaf24@8459 1030 do_initcalls();
kaf24@8459 1031
kaf24@8594 1032 if ( opt_watchdog )
kaf24@8594 1033 watchdog_enable();
kaf24@8459 1034
kfraser@11881 1035 /* Extract policy from multiboot. */
kfraser@11881 1036 extract_acm_policy(mbi, &initrdidx, &_policy_start, &_policy_len);
kfraser@11881 1037
kaf24@8459 1038 /* initialize access control security module */
kfraser@11881 1039 acm_init(_policy_start, _policy_len);
kaf24@8459 1040
kaf24@8459 1041 /* Create initial domain 0. */
kfraser@14911 1042 dom0 = domain_create(0, 0, DOM0_SSIDREF);
kfraser@10655 1043 if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) )
kaf24@8459 1044 panic("Error creating domain 0\n");
kaf24@8459 1045
kfraser@12210 1046 dom0->is_privileged = 1;
kfraser@12210 1047
kaf24@8459 1048 /* Grab the DOM0 command line. */
kaf24@8459 1049 cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
kfraser@15426 1050 if ( (cmdline != NULL) || (kextra != NULL) )
kaf24@8459 1051 {
kaf24@8459 1052 static char dom0_cmdline[MAX_GUEST_CMDLINE];
kaf24@8459 1053
kfraser@15796 1054 cmdline = cmdline_cook(cmdline);
kfraser@15796 1055 safe_strcpy(dom0_cmdline, cmdline);
kaf24@8459 1056
kfraser@15426 1057 if ( kextra != NULL )
kfraser@15426 1058 /* kextra always includes exactly one leading space. */
kfraser@15426 1059 safe_strcat(dom0_cmdline, kextra);
kfraser@15426 1060
kaf24@8459 1061 /* Append any extra parameters. */
kfraser@13691 1062 if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
kfraser@13691 1063 safe_strcat(dom0_cmdline, " noapic");
kaf24@8459 1064 if ( acpi_skip_timer_override &&
kfraser@13691 1065 !strstr(dom0_cmdline, "acpi_skip_timer_override") )
kfraser@13691 1066 safe_strcat(dom0_cmdline, " acpi_skip_timer_override");
kfraser@13691 1067 if ( (strlen(acpi_param) != 0) && !strstr(dom0_cmdline, "acpi=") )
kaf24@8459 1068 {
kfraser@13691 1069 safe_strcat(dom0_cmdline, " acpi=");
kfraser@13691 1070 safe_strcat(dom0_cmdline, acpi_param);
kaf24@8459 1071 }
kfraser@13691 1072
kfraser@13691 1073 cmdline = dom0_cmdline;
kaf24@8459 1074 }
kaf24@8459 1075
kaf24@8459 1076 if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
kaf24@8459 1077 {
kaf24@8459 1078 _initrd_start = initial_images_start +
kaf24@8459 1079 (mod[initrdidx].mod_start - mod[0].mod_start);
kaf24@8459 1080 _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
kaf24@8459 1081 }
kaf24@8459 1082
kaf24@8459 1083 /*
kaf24@8459 1084 * We're going to setup domain0 using the module(s) that we stashed safely
kaf24@8459 1085 * above our heap. The second module, if present, is an initrd ramdisk.
kaf24@8459 1086 */
kaf24@8459 1087 if ( construct_dom0(dom0,
kaf24@8459 1088 initial_images_start,
kaf24@8459 1089 mod[0].mod_end-mod[0].mod_start,
kaf24@8459 1090 _initrd_start,
kaf24@8459 1091 _initrd_len,
kaf24@8459 1092 cmdline) != 0)
kaf24@8459 1093 panic("Could not set up DOM0 guest OS\n");
kaf24@8459 1094
kaf24@8459 1095 /* Scrub RAM that is still free and so may go to an unprivileged domain. */
kaf24@8459 1096 scrub_heap_pages();
kaf24@8459 1097
kaf24@8459 1098 init_trace_bufs();
kaf24@8459 1099
kaf24@10502 1100 console_endboot();
kaf24@8459 1101
kaf24@8459 1102 /* Hide UART from DOM0 if we're using it */
kaf24@8459 1103 serial_endboot();
kaf24@8459 1104
kaf24@8459 1105 domain_unpause_by_systemcontroller(dom0);
kaf24@8459 1106
keir@15082 1107 reset_stack_and_jump(init_done);
kaf24@8459 1108 }
kaf24@8459 1109
ian@13763 1110 void arch_get_xen_caps(xen_capabilities_info_t *info)
iap10@6721 1111 {
kfraser@14997 1112 /* Interface name is always xen-3.0-* for Xen-3.x. */
kfraser@14997 1113 int major = 3, minor = 0;
keir@13754 1114 char s[32];
keir@13754 1115
ian@13763 1116 (*info)[0] = '\0';
iap10@6721 1117
kaf24@6725 1118 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
kaf24@6725 1119
keir@13754 1120 snprintf(s, sizeof(s), "xen-%d.%d-x86_32 ", major, minor);
ian@13763 1121 safe_strcat(*info, s);
keir@13754 1122 if ( hvm_enabled )
keir@13754 1123 {
keir@13754 1124 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor);
ian@13763 1125 safe_strcat(*info, s);
kfraser@13685 1126 }
kaf24@6725 1127
kaf24@6725 1128 #elif defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE)
kaf24@6725 1129
keir@13754 1130 snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor);
ian@13763 1131 safe_strcat(*info, s);
kaf24@6725 1132 if ( hvm_enabled )
iap10@6721 1133 {
keir@13754 1134 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor);
ian@13763 1135 safe_strcat(*info, s);
keir@13754 1136 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor);
ian@13763 1137 safe_strcat(*info, s);
iap10@6721 1138 }
iap10@6721 1139
kaf24@6725 1140 #elif defined(CONFIG_X86_64)
iap10@6721 1141
keir@13754 1142 snprintf(s, sizeof(s), "xen-%d.%d-x86_64 ", major, minor);
ian@13763 1143 safe_strcat(*info, s);
ack@13288 1144 #ifdef CONFIG_COMPAT
keir@13754 1145 snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor);
ian@13763 1146 safe_strcat(*info, s);
ack@13288 1147 #endif
kaf24@6725 1148 if ( hvm_enabled )
iap10@6721 1149 {
keir@13754 1150 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor);
ian@13763 1151 safe_strcat(*info, s);
keir@13754 1152 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor);
ian@13763 1153 safe_strcat(*info, s);
keir@13754 1154 snprintf(s, sizeof(s), "hvm-%d.%d-x86_64 ", major, minor);
ian@13763 1155 safe_strcat(*info, s);
iap10@6721 1156 }
kaf24@6725 1157
iap10@6721 1158 #endif
iap10@6721 1159 }
iap10@6721 1160
kaf24@3914 1161 /*
kaf24@3914 1162 * Local variables:
kaf24@3914 1163 * mode: C
kaf24@3914 1164 * c-set-style: "BSD"
kaf24@3914 1165 * c-basic-offset: 4
kaf24@3914 1166 * tab-width: 4
kaf24@3914 1167 * indent-tabs-mode: nil
kaf24@3988 1168 * End:
kaf24@3914 1169 */