ia64/xen-unstable

annotate xen/arch/x86/setup.c @ 11998:3db344ea8070

[XEN] Clean up NUMA stuff and disable by default ('numa=on' enables it).
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Oct 25 15:17:40 2006 +0100 (2006-10-25)
parents f312c2d01d8b
children 7b5115221dfc
rev   line source
kaf24@1452 1 #include <xen/config.h>
kaf24@1452 2 #include <xen/init.h>
kaf24@1452 3 #include <xen/lib.h>
kaf24@1452 4 #include <xen/sched.h>
cl349@5247 5 #include <xen/domain.h>
kaf24@1452 6 #include <xen/serial.h>
kaf24@1506 7 #include <xen/softirq.h>
kaf24@1452 8 #include <xen/acpi.h>
kaf24@3338 9 #include <xen/console.h>
iap10@4287 10 #include <xen/serial.h>
kaf24@3338 11 #include <xen/trace.h>
kaf24@3338 12 #include <xen/multiboot.h>
kaf24@5356 13 #include <xen/domain_page.h>
kfraser@10890 14 #include <xen/version.h>
kaf24@9117 15 #include <xen/gdbstub.h>
kaf24@9818 16 #include <xen/percpu.h>
kfraser@11296 17 #include <xen/hypercall.h>
kfraser@11601 18 #include <xen/keyhandler.h>
kfraser@11971 19 #include <xen/numa.h>
iap10@6721 20 #include <public/version.h>
kaf24@1452 21 #include <asm/bitops.h>
kaf24@1452 22 #include <asm/smp.h>
kaf24@1452 23 #include <asm/processor.h>
kaf24@1452 24 #include <asm/mpspec.h>
kaf24@1452 25 #include <asm/apic.h>
kaf24@1452 26 #include <asm/desc.h>
kaf24@3338 27 #include <asm/shadow.h>
kaf24@3344 28 #include <asm/e820.h>
kaf24@5536 29 #include <acm/acm_hooks.h>
kaf24@3338 30
kaf24@5157 31 extern void dmi_scan_machine(void);
kaf24@5211 32 extern void generic_apic_probe(void);
kfraser@11971 33 extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
kaf24@5157 34
kaf24@3338 35 /*
kaf24@3338 36 * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
kaf24@8726 37 * page_info table and allocation bitmap.
kaf24@3338 38 */
kaf24@3338 39 static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
kaf24@4950 40 #if defined(CONFIG_X86_64)
kaf24@3338 41 integer_param("xenheap_megabytes", opt_xenheap_megabytes);
kaf24@3354 42 #endif
kaf24@1452 43
kaf24@5146 44 /* opt_nosmp: If true, secondary processors are ignored. */
kaf24@5900 45 static int opt_nosmp = 0;
kaf24@5146 46 boolean_param("nosmp", opt_nosmp);
kaf24@5146 47
kaf24@5146 48 /* maxcpus: maximum number of CPUs to activate. */
kaf24@5146 49 static unsigned int max_cpus = NR_CPUS;
shand@11156 50 integer_param("maxcpus", max_cpus);
kaf24@5146 51
kaf24@3334 52 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
kaf24@3334 53 static int opt_watchdog = 0;
kaf24@3334 54 boolean_param("watchdog", opt_watchdog);
kaf24@3334 55
kaf24@4850 56 /* **** Linux config option: propagated to domain0. */
kaf24@4850 57 /* "acpi=off": Sisables both ACPI table parsing and interpreter. */
kaf24@4850 58 /* "acpi=force": Override the disable blacklist. */
kaf24@4850 59 /* "acpi=strict": Disables out-of-spec workarounds. */
kaf24@4850 60 /* "acpi=ht": Limit ACPI just to boot-time to enable HT. */
kaf24@4850 61 /* "acpi=noirq": Disables ACPI interrupt routing. */
kaf24@4850 62 static void parse_acpi_param(char *s);
kaf24@4850 63 custom_param("acpi", parse_acpi_param);
kaf24@4850 64
kaf24@4850 65 /* **** Linux config option: propagated to domain0. */
kaf24@4850 66 /* acpi_skip_timer_override: Skip IRQ0 overrides. */
kaf24@4850 67 extern int acpi_skip_timer_override;
kaf24@4850 68 boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
kaf24@4850 69
kaf24@4850 70 /* **** Linux config option: propagated to domain0. */
kaf24@4850 71 /* noapic: Disable IOAPIC setup. */
kaf24@4850 72 extern int skip_ioapic_setup;
kaf24@4850 73 boolean_param("noapic", skip_ioapic_setup);
kaf24@4850 74
kaf24@3594 75 int early_boot = 1;
kaf24@3594 76
kaf24@5146 77 cpumask_t cpu_present_map;
kaf24@5146 78
kaf24@5003 79 /* Limits of Xen heap, used to initialise the allocator. */
kaf24@5003 80 unsigned long xenheap_phys_start, xenheap_phys_end;
kaf24@3338 81
kaf24@2298 82 extern void arch_init_memory(void);
kaf24@1589 83 extern void init_IRQ(void);
kaf24@1589 84 extern void trap_init(void);
kaf24@5604 85 extern void early_time_init(void);
kaf24@5167 86 extern void early_cpu_init(void);
kaf24@1589 87
kaf24@8533 88 struct tss_struct init_tss[NR_CPUS];
kaf24@8533 89
kaf24@5011 90 extern unsigned long cpu0_stack[];
kaf24@5011 91
kaf24@5214 92 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
kaf24@1452 93
kaf24@5237 94 #if CONFIG_PAGING_LEVELS > 2
kaf24@1670 95 unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
kaf24@1670 96 #else
kaf24@5593 97 unsigned long mmu_cr4_features = X86_CR4_PSE;
kaf24@1670 98 #endif
kaf24@1452 99 EXPORT_SYMBOL(mmu_cr4_features);
kaf24@1452 100
kaf24@4818 101 int acpi_disabled;
kaf24@1452 102
kaf24@4850 103 int acpi_force;
kaf24@4850 104 char acpi_param[10] = "";
kaf24@4850 105 static void parse_acpi_param(char *s)
kaf24@4850 106 {
kaf24@4850 107 /* Save the parameter so it can be propagated to domain0. */
kaf24@4850 108 strncpy(acpi_param, s, sizeof(acpi_param));
kaf24@4850 109 acpi_param[sizeof(acpi_param)-1] = '\0';
kaf24@4850 110
kaf24@4850 111 /* Interpret the parameter for use within Xen. */
kaf24@4850 112 if ( !strcmp(s, "off") )
kaf24@4850 113 {
kaf24@4850 114 disable_acpi();
kaf24@4850 115 }
kaf24@4850 116 else if ( !strcmp(s, "force") )
kaf24@4850 117 {
kaf24@4850 118 acpi_force = 1;
kaf24@4850 119 acpi_ht = 1;
kaf24@4850 120 acpi_disabled = 0;
kaf24@4850 121 }
kaf24@4850 122 else if ( !strcmp(s, "strict") )
kaf24@4850 123 {
kaf24@4850 124 acpi_strict = 1;
kaf24@4850 125 }
kaf24@4850 126 else if ( !strcmp(s, "ht") )
kaf24@4850 127 {
kaf24@4850 128 if ( !acpi_force )
kaf24@4850 129 disable_acpi();
kaf24@4850 130 acpi_ht = 1;
kaf24@4850 131 }
kaf24@4850 132 else if ( !strcmp(s, "noirq") )
kaf24@4850 133 {
kaf24@4850 134 acpi_noirq_set();
kaf24@4850 135 }
kaf24@4850 136 }
kaf24@4850 137
kaf24@1452 138 static void __init do_initcalls(void)
kaf24@1452 139 {
kaf24@1452 140 initcall_t *call;
kaf24@1452 141 for ( call = &__initcall_start; call < &__initcall_end; call++ )
kaf24@1452 142 (*call)();
kaf24@1452 143 }
kaf24@1452 144
kaf24@8459 145 #define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
kaf24@8459 146
kaf24@8459 147 static struct e820entry e820_raw[E820MAX];
kaf24@8459 148
kaf24@9067 149 static unsigned long initial_images_start, initial_images_end;
kaf24@9067 150
kaf24@9067 151 unsigned long initial_images_nrpages(void)
kaf24@9067 152 {
kaf24@9067 153 unsigned long s = initial_images_start + PAGE_SIZE - 1;
kaf24@9067 154 unsigned long e = initial_images_end;
kaf24@9067 155 return ((e >> PAGE_SHIFT) - (s >> PAGE_SHIFT));
kaf24@9067 156 }
kaf24@9067 157
kaf24@9067 158 void discard_initial_images(void)
kaf24@9067 159 {
kaf24@9067 160 init_domheap_pages(initial_images_start, initial_images_end);
kaf24@9067 161 }
kaf24@9067 162
kaf24@9818 163 extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[];
kaf24@9818 164
kfraser@11241 165 static void __init percpu_init_areas(void)
kaf24@9818 166 {
kaf24@9818 167 unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start;
kaf24@9818 168
kaf24@9818 169 BUG_ON(data_size > PERCPU_SIZE);
kaf24@9818 170
kfraser@11241 171 for_each_cpu ( i )
kfraser@11241 172 {
kfraser@11241 173 memguard_unguard_range(__per_cpu_start + (i << PERCPU_SHIFT),
kfraser@11241 174 1 << PERCPU_SHIFT);
kfraser@11241 175 if ( i != 0 )
kfraser@11241 176 memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
kfraser@11241 177 __per_cpu_start,
kfraser@11241 178 data_size);
kfraser@11241 179 }
kaf24@9818 180 }
kaf24@9818 181
kfraser@11241 182 static void __init percpu_guard_areas(void)
kfraser@11241 183 {
kfraser@11241 184 memguard_guard_range(__per_cpu_start, __per_cpu_end - __per_cpu_start);
kfraser@11241 185 }
kfraser@11241 186
kfraser@11241 187 static void __init percpu_free_unused_areas(void)
kaf24@9818 188 {
kaf24@9818 189 unsigned int i, first_unused;
kaf24@9818 190
kaf24@9818 191 /* Find first unused CPU number. */
kaf24@9818 192 for ( i = 0; i < NR_CPUS; i++ )
kaf24@9818 193 if ( !cpu_online(i) )
kaf24@9818 194 break;
kaf24@9818 195 first_unused = i;
kaf24@9818 196
kaf24@9818 197 /* Check that there are no holes in cpu_online_map. */
kaf24@9818 198 for ( ; i < NR_CPUS; i++ )
kaf24@9818 199 BUG_ON(cpu_online(i));
kaf24@9818 200
kfraser@11241 201 #ifndef MEMORY_GUARD
kaf24@9818 202 init_xenheap_pages(__pa(__per_cpu_start) + (first_unused << PERCPU_SHIFT),
kaf24@9818 203 __pa(__per_cpu_end));
kfraser@11241 204 #endif
kaf24@9818 205 }
kaf24@9818 206
kfraser@11881 207 /* Fetch acm policy module from multiboot modules. */
kfraser@11881 208 static void extract_acm_policy(
kfraser@11881 209 multiboot_info_t *mbi,
kfraser@11881 210 unsigned int *initrdidx,
kfraser@11881 211 char **_policy_start,
kfraser@11881 212 unsigned long *_policy_len)
kfraser@11881 213 {
kfraser@11881 214 int i;
kfraser@11881 215 module_t *mod = (module_t *)__va(mbi->mods_addr);
kfraser@11881 216 unsigned long start, policy_len;
kfraser@11881 217 char *policy_start;
kfraser@11881 218
kfraser@11881 219 /*
kfraser@11881 220 * Try all modules and see whichever could be the binary policy.
kfraser@11881 221 * Adjust the initrdidx if module[1] is the binary policy.
kfraser@11881 222 */
kfraser@11881 223 for ( i = mbi->mods_count-1; i >= 1; i-- )
kfraser@11881 224 {
kfraser@11881 225 start = initial_images_start + (mod[i].mod_start-mod[0].mod_start);
kfraser@11881 226 #if defined(__i386__)
kfraser@11881 227 policy_start = (char *)start;
kfraser@11881 228 #elif defined(__x86_64__)
kfraser@11881 229 policy_start = __va(start);
kfraser@11881 230 #endif
kfraser@11881 231 policy_len = mod[i].mod_end - mod[i].mod_start;
kfraser@11881 232 if ( acm_is_policy(policy_start, policy_len) )
kfraser@11881 233 {
kfraser@11881 234 printk("Policy len 0x%lx, start at %p - module %d.\n",
kfraser@11881 235 policy_len, policy_start, i);
kfraser@11881 236 *_policy_start = policy_start;
kfraser@11881 237 *_policy_len = policy_len;
kfraser@11881 238 if ( i == 1 )
kfraser@11881 239 *initrdidx = (mbi->mods_count > 2) ? 2 : 0;
kfraser@11881 240 break;
kfraser@11881 241 }
kfraser@11881 242 }
kfraser@11881 243 }
kfraser@11881 244
kfraser@11241 245 static void __init init_idle_domain(void)
kfraser@11240 246 {
kfraser@11240 247 struct domain *idle_domain;
kfraser@11240 248
kfraser@11240 249 /* Domain creation requires that scheduler structures are initialised. */
kfraser@11240 250 scheduler_init();
kfraser@11240 251
kfraser@11240 252 idle_domain = domain_create(IDLE_DOMAIN_ID);
kfraser@11240 253 if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) )
kfraser@11240 254 BUG();
kfraser@11240 255
kfraser@11240 256 set_current(idle_domain->vcpu[0]);
kfraser@11240 257 idle_vcpu[0] = this_cpu(curr_vcpu) = current;
kfraser@11240 258
kfraser@11240 259 setup_idle_pagetable();
kfraser@11240 260 }
kfraser@11240 261
kfraser@11971 262 static void srat_detect_node(int cpu)
kfraser@11971 263 {
kfraser@11998 264 unsigned node;
kfraser@11998 265 u8 apicid = x86_cpu_to_apicid[cpu];
kfraser@11971 266
kfraser@11998 267 node = apicid_to_node[apicid];
kfraser@11998 268 if ( node == NUMA_NO_NODE )
kfraser@11998 269 node = 0;
kfraser@11998 270 numa_set_node(cpu, node);
kfraser@11971 271
kfraser@11998 272 if ( acpi_numa > 0 )
kfraser@11998 273 printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
kfraser@11971 274 }
kfraser@11971 275
kaf24@8463 276 void __init __start_xen(multiboot_info_t *mbi)
kaf24@1452 277 {
kaf24@9823 278 char __cmdline[] = "", *cmdline = __cmdline;
kaf24@8457 279 unsigned long _initrd_start = 0, _initrd_len = 0;
kaf24@8457 280 unsigned int initrdidx = 1;
kfraser@11881 281 char *_policy_start = NULL;
kfraser@11881 282 unsigned long _policy_len = 0;
kaf24@8457 283 module_t *mod = (module_t *)__va(mbi->mods_addr);
kaf24@6111 284 unsigned long nr_pages, modules_length;
kaf24@8726 285 paddr_t s, e;
kaf24@8402 286 int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0;
kaf24@5776 287 struct ns16550_defaults ns16550 = {
kaf24@5776 288 .data_bits = 8,
kaf24@5776 289 .parity = 'n',
kaf24@5776 290 .stop_bits = 1
kaf24@5776 291 };
kaf24@3338 292
kaf24@3338 293 /* Parse the command-line options. */
kaf24@3344 294 if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
kaf24@9823 295 cmdline = __va(mbi->cmdline);
kaf24@9823 296 cmdline_parse(cmdline);
kaf24@3338 297
kaf24@8534 298 set_current((struct vcpu *)0xfffff000); /* debug sanity */
kfraser@11240 299 idle_vcpu[0] = current;
kaf24@8534 300 set_processor_id(0); /* needed early, for smp_processor_id() */
kaf24@3338 301
kaf24@5146 302 smp_prepare_boot_cpu();
kaf24@5146 303
kaf24@3338 304 /* We initialise the serial devices very early so we can get debugging. */
kaf24@5776 305 ns16550.io_base = 0x3f8;
kaf24@5776 306 ns16550.irq = 4;
kaf24@5776 307 ns16550_init(0, &ns16550);
kaf24@5776 308 ns16550.io_base = 0x2f8;
kaf24@5776 309 ns16550.irq = 3;
kaf24@5776 310 ns16550_init(1, &ns16550);
kaf24@5195 311 serial_init_preirq();
kaf24@3338 312
kaf24@3338 313 init_console();
kaf24@3338 314
kfraser@11947 315 printk("Command line: %s\n", cmdline);
kaf24@9823 316
kaf24@3344 317 /* Check that we have at least one Multiboot module. */
kaf24@3344 318 if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
kaf24@3338 319 {
kaf24@5542 320 printk("FATAL ERROR: dom0 kernel not specified."
kaf24@5542 321 " Check bootloader configuration.\n");
kaf24@5011 322 EARLY_FAIL();
kaf24@5011 323 }
kaf24@5011 324
kaf24@5011 325 if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
kaf24@5011 326 {
kaf24@5011 327 printk("FATAL ERROR: Misaligned CPU0 stack.\n");
kaf24@5011 328 EARLY_FAIL();
kaf24@3338 329 }
kaf24@3338 330
kfraser@11618 331 /*
kfraser@11618 332 * Since there are some stubs getting built on the stacks which use
kfraser@11618 333 * direct calls/jumps, the heap must be confined to the lower 2G so
kfraser@11618 334 * that those branches can reach their targets.
kfraser@11618 335 */
kfraser@11618 336 if ( opt_xenheap_megabytes > 2048 )
kfraser@11618 337 opt_xenheap_megabytes = 2048;
kaf24@3338 338 xenheap_phys_end = opt_xenheap_megabytes << 20;
kaf24@3338 339
kaf24@3344 340 if ( mbi->flags & MBI_MEMMAP )
kaf24@3344 341 {
kaf24@3344 342 while ( bytes < mbi->mmap_length )
kaf24@3344 343 {
kaf24@3344 344 memory_map_t *map = __va(mbi->mmap_addr + bytes);
kaf24@8402 345
kaf24@8402 346 /*
kaf24@8403 347 * This is a gross workaround for a BIOS bug. Some bootloaders do
kaf24@8402 348 * not write e820 map entries into pre-zeroed memory. This is
kaf24@8402 349 * okay if the BIOS fills in all fields of the map entry, but
kaf24@8402 350 * some broken BIOSes do not bother to write the high word of
kaf24@8402 351 * the length field if the length is smaller than 4GB. We
kaf24@8402 352 * detect and fix this by flagging sections below 4GB that
kaf24@8403 353 * appear to be larger than 4GB in size.
kaf24@8402 354 */
kaf24@8403 355 if ( (map->base_addr_high == 0) && (map->length_high != 0) )
kaf24@8402 356 {
kaf24@8402 357 e820_warn = 1;
kaf24@8402 358 map->length_high = 0;
kaf24@8402 359 }
kaf24@8402 360
kaf24@3344 361 e820_raw[e820_raw_nr].addr =
kaf24@3344 362 ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
kaf24@3344 363 e820_raw[e820_raw_nr].size =
kaf24@3344 364 ((u64)map->length_high << 32) | (u64)map->length_low;
kaf24@3344 365 e820_raw[e820_raw_nr].type =
kaf24@3346 366 (map->type > E820_SHARED_PAGE) ? E820_RESERVED : map->type;
kaf24@3344 367 e820_raw_nr++;
kaf24@8402 368
kaf24@3344 369 bytes += map->size + 4;
kaf24@3344 370 }
kaf24@3344 371 }
kaf24@3344 372 else if ( mbi->flags & MBI_MEMLIMITS )
kaf24@3344 373 {
kaf24@3344 374 e820_raw[0].addr = 0;
kaf24@3344 375 e820_raw[0].size = mbi->mem_lower << 10;
kaf24@3344 376 e820_raw[0].type = E820_RAM;
kaf24@3354 377 e820_raw[1].addr = 0x100000;
kaf24@3354 378 e820_raw[1].size = mbi->mem_upper << 10;
kaf24@3354 379 e820_raw[1].type = E820_RAM;
kaf24@3344 380 e820_raw_nr = 2;
kaf24@3344 381 }
kaf24@3344 382 else
kaf24@3344 383 {
kaf24@3344 384 printk("FATAL ERROR: Bootloader provided no memory information.\n");
kaf24@3344 385 for ( ; ; ) ;
kaf24@3344 386 }
kaf24@3344 387
kaf24@8402 388 if ( e820_warn )
kaf24@8402 389 printk("WARNING: Buggy e820 map detected and fixed "
kaf24@8402 390 "(truncated length fields).\n");
kaf24@8402 391
kaf24@4950 392 max_page = init_e820(e820_raw, &e820_raw_nr);
kaf24@3338 393
kaf24@6111 394 modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
kaf24@3338 395
kaf24@6111 396 /* Find a large enough RAM extent to stash the DOM0 modules. */
kaf24@6111 397 for ( i = 0; ; i++ )
kaf24@3338 398 {
kaf24@6111 399 if ( i == e820.nr_map )
kaf24@6111 400 {
kaf24@6111 401 printk("Not enough memory to stash the DOM0 kernel image.\n");
kaf24@6111 402 for ( ; ; ) ;
kaf24@6111 403 }
kaf24@6134 404
kaf24@6134 405 if ( (e820.map[i].type == E820_RAM) &&
kaf24@6134 406 (e820.map[i].size >= modules_length) &&
kaf24@6134 407 ((e820.map[i].addr + e820.map[i].size) >=
kaf24@6134 408 (xenheap_phys_end + modules_length)) )
kaf24@6134 409 break;
kaf24@3338 410 }
kaf24@6111 411
kaf24@6134 412 /* Stash as near as possible to the beginning of the RAM extent. */
kaf24@6134 413 initial_images_start = e820.map[i].addr;
kaf24@6134 414 if ( initial_images_start < xenheap_phys_end )
kaf24@6134 415 initial_images_start = xenheap_phys_end;
kaf24@6134 416 initial_images_end = initial_images_start + modules_length;
kaf24@6134 417
kaf24@4950 418 #if defined(CONFIG_X86_32)
kaf24@3338 419 memmove((void *)initial_images_start, /* use low mapping */
kaf24@3338 420 (void *)mod[0].mod_start, /* use low mapping */
kaf24@3338 421 mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
kaf24@4950 422 #elif defined(CONFIG_X86_64)
kaf24@3338 423 memmove(__va(initial_images_start),
kaf24@3338 424 __va(mod[0].mod_start),
kaf24@3338 425 mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
kaf24@3338 426 #endif
kaf24@3338 427
kaf24@3354 428 /* Initialise boot-time allocator with all RAM situated after modules. */
kaf24@5003 429 xenheap_phys_start = init_boot_allocator(__pa(&_end));
kaf24@6111 430 nr_pages = 0;
kaf24@3354 431 for ( i = 0; i < e820.nr_map; i++ )
kaf24@3354 432 {
kaf24@3354 433 if ( e820.map[i].type != E820_RAM )
kaf24@3354 434 continue;
kaf24@6111 435
kaf24@3354 436 nr_pages += e820.map[i].size >> PAGE_SHIFT;
kaf24@6111 437
kaf24@6111 438 /* Initialise boot heap, skipping Xen heap and dom0 modules. */
kaf24@6111 439 s = e820.map[i].addr;
kaf24@6111 440 e = s + e820.map[i].size;
kaf24@6111 441 if ( s < xenheap_phys_end )
kaf24@6111 442 s = xenheap_phys_end;
kaf24@6111 443 if ( (s < initial_images_end) && (e > initial_images_start) )
kaf24@6111 444 s = initial_images_end;
kaf24@6111 445 init_boot_pages(s, e);
kaf24@6111 446
kaf24@5003 447 #if defined (CONFIG_X86_64)
kaf24@5003 448 /*
kaf24@5003 449 * x86/64 maps all registered RAM. Points to note:
kaf24@9783 450 * 1. The initial pagetable already maps low 1GB, so skip that.
kaf24@5003 451 * 2. We must map *only* RAM areas, taking care to avoid I/O holes.
kaf24@5003 452 * Failure to do this can cause coherency problems and deadlocks
kaf24@5003 453 * due to cache-attribute mismatches (e.g., AMD/AGP Linux bug).
kaf24@5003 454 */
kaf24@5003 455 {
kaf24@5004 456 /* Calculate page-frame range, discarding partial frames. */
kaf24@5004 457 unsigned long start, end;
kaf24@9783 458 unsigned long init_mapped = 1UL << (30 - PAGE_SHIFT); /* 1GB */
kaf24@5005 459 start = PFN_UP(e820.map[i].addr);
kaf24@5005 460 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
kaf24@9800 461 /* Clip the range to exclude what the bootstrapper initialised. */
kaf24@9783 462 if ( end < init_mapped )
kaf24@5004 463 continue;
kaf24@9783 464 if ( start < init_mapped )
kaf24@9783 465 start = init_mapped;
kaf24@5004 466 /* Request the mapping. */
kaf24@5003 467 map_pages_to_xen(
kaf24@5004 468 PAGE_OFFSET + (start << PAGE_SHIFT),
kaf24@5004 469 start, end-start, PAGE_HYPERVISOR);
kaf24@5003 470 }
kaf24@5003 471 #endif
kaf24@3354 472 }
kaf24@3354 473
kaf24@5003 474 memguard_init();
kfraser@11241 475 percpu_guard_areas();
kaf24@4950 476
kaf24@3354 477 printk("System RAM: %luMB (%lukB)\n",
kaf24@3354 478 nr_pages >> (20 - PAGE_SHIFT),
kaf24@3354 479 nr_pages << (PAGE_SHIFT - 10));
kaf24@7220 480 total_pages = nr_pages;
kaf24@3354 481
kfraser@11296 482 /* Sanity check for unwanted bloat of certain hypercall structures. */
kfraser@11296 483 BUILD_BUG_ON(sizeof(((struct xen_platform_op *)0)->u) !=
kfraser@11296 484 sizeof(((struct xen_platform_op *)0)->u.pad));
kfraser@11296 485 BUILD_BUG_ON(sizeof(((struct xen_domctl *)0)->u) !=
kfraser@11296 486 sizeof(((struct xen_domctl *)0)->u.pad));
kfraser@11296 487 BUILD_BUG_ON(sizeof(((struct xen_sysctl *)0)->u) !=
kfraser@11296 488 sizeof(((struct xen_sysctl *)0)->u.pad));
kaf24@7388 489
kaf24@9878 490 BUILD_BUG_ON(sizeof(start_info_t) > PAGE_SIZE);
kaf24@9878 491 BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
kaf24@9878 492 BUILD_BUG_ON(sizeof(vcpu_info_t) != 64);
kaf24@7744 493
kfraser@10492 494 /* Check definitions in public headers match internal defs. */
kaf24@9878 495 BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
kaf24@8521 496 #ifdef HYPERVISOR_VIRT_END
kaf24@9878 497 BUILD_BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END);
kaf24@8521 498 #endif
kfraser@10492 499 BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START);
kfraser@10492 500 BUILD_BUG_ON(MACH2PHYS_VIRT_END != RO_MPT_VIRT_END);
kaf24@8521 501
kaf24@3354 502 init_frametable();
kaf24@3338 503
kfraser@11971 504 acpi_boot_table_init();
kfraser@11971 505
kfraser@11971 506 acpi_numa_init();
kfraser@11971 507
kfraser@11971 508 numa_initmem_init(0, max_page);
kfraser@11971 509
kaf24@3354 510 end_boot_allocator();
kaf24@3354 511
kaf24@6111 512 /* Initialise the Xen heap, skipping RAM holes. */
kaf24@6111 513 nr_pages = 0;
kaf24@6111 514 for ( i = 0; i < e820.nr_map; i++ )
kaf24@6111 515 {
kaf24@6111 516 if ( e820.map[i].type != E820_RAM )
kaf24@6111 517 continue;
kaf24@6111 518
kaf24@6111 519 s = e820.map[i].addr;
kaf24@6111 520 e = s + e820.map[i].size;
kaf24@6111 521 if ( s < xenheap_phys_start )
kaf24@6111 522 s = xenheap_phys_start;
kaf24@6111 523 if ( e > xenheap_phys_end )
kaf24@6111 524 e = xenheap_phys_end;
kaf24@6111 525
kaf24@6111 526 if ( s < e )
kaf24@6111 527 {
kaf24@6111 528 nr_pages += (e - s) >> PAGE_SHIFT;
kaf24@6111 529 init_xenheap_pages(s, e);
kaf24@6111 530 }
kaf24@6111 531 }
kaf24@6111 532
kaf24@6111 533 printk("Xen heap: %luMB (%lukB)\n",
kaf24@6111 534 nr_pages >> (20 - PAGE_SHIFT),
kaf24@6111 535 nr_pages << (PAGE_SHIFT - 10));
kaf24@3338 536
kaf24@3594 537 early_boot = 0;
kaf24@3338 538
kaf24@8459 539 early_cpu_init();
kaf24@8459 540
kaf24@8459 541 paging_init();
kaf24@8459 542
kaf24@8459 543 /* Unmap the first page of CPU0's stack. */
kaf24@8459 544 memguard_guard_stack(cpu0_stack);
kaf24@8459 545
kaf24@8459 546 open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
kaf24@8459 547
kaf24@8459 548 if ( opt_watchdog )
kaf24@8459 549 nmi_watchdog = NMI_LOCAL_APIC;
kaf24@8459 550
kaf24@8459 551 sort_exception_tables();
kaf24@8459 552
kaf24@8459 553 find_smp_config();
kaf24@8459 554
kaf24@8459 555 smp_alloc_memory();
kaf24@8459 556
kaf24@8459 557 dmi_scan_machine();
kaf24@8459 558
kaf24@8459 559 generic_apic_probe();
kaf24@8459 560
kaf24@8459 561 acpi_boot_init();
kaf24@8459 562
kfraser@11971 563 init_cpu_to_node();
kfraser@11971 564
kfraser@11241 565 if ( smp_found_config )
kaf24@8459 566 get_smp_config();
kaf24@8459 567
kaf24@8459 568 init_apic_mappings();
kaf24@8459 569
kaf24@8459 570 init_IRQ();
kaf24@8459 571
kfraser@11241 572 percpu_init_areas();
kfraser@11241 573
kfraser@11240 574 init_idle_domain();
kfraser@11240 575
kaf24@8459 576 trap_init();
kaf24@8459 577
kaf24@8586 578 timer_init();
kaf24@8459 579
kaf24@8459 580 early_time_init();
kaf24@8459 581
kaf24@8459 582 arch_init_memory();
kaf24@8459 583
kaf24@8459 584 identify_cpu(&boot_cpu_data);
kaf24@8459 585 if ( cpu_has_fxsr )
kaf24@8459 586 set_in_cr4(X86_CR4_OSFXSR);
kaf24@8459 587 if ( cpu_has_xmm )
kaf24@8459 588 set_in_cr4(X86_CR4_OSXMMEXCPT);
kaf24@8459 589
kaf24@8459 590 if ( opt_nosmp )
kaf24@8459 591 max_cpus = 0;
kaf24@8459 592
kaf24@8459 593 smp_prepare_cpus(max_cpus);
kaf24@8459 594
kaf24@8459 595 /*
kaf24@8459 596 * Initialise higher-level timer functions. We do this fairly late
kaf24@8459 597 * (post-SMP) because the time bases and scale factors need to be updated
kaf24@8459 598 * regularly, and SMP initialisation can cause a long delay with
kaf24@8459 599 * interrupts not yet enabled.
kaf24@8459 600 */
kaf24@8459 601 init_xen_time();
kaf24@8459 602
kaf24@8459 603 initialize_keytable();
kaf24@8459 604
kaf24@8459 605 serial_init_postirq();
kaf24@8459 606
kaf24@8459 607 BUG_ON(!local_irq_is_enabled());
kaf24@8459 608
kaf24@8459 609 for_each_present_cpu ( i )
kaf24@8459 610 {
kaf24@8459 611 if ( num_online_cpus() >= max_cpus )
kaf24@8459 612 break;
kaf24@8459 613 if ( !cpu_online(i) )
kaf24@8459 614 __cpu_up(i);
kfraser@11971 615
kfraser@11998 616 /* Set up cpu_to_node[]. */
kfraser@11971 617 srat_detect_node(i);
kfraser@11998 618 /* Set up node_to_cpumask based on cpu_to_node[]. */
kfraser@11971 619 numa_add_cpu(i);
kaf24@8459 620 }
kaf24@8459 621
kaf24@8459 622 printk("Brought up %ld CPUs\n", (long)num_online_cpus());
kaf24@8459 623 smp_cpus_done(max_cpus);
kaf24@8459 624
kaf24@9818 625 percpu_free_unused_areas();
kaf24@9818 626
kaf24@9117 627 initialise_gdb(); /* could be moved earlier */
kaf24@9117 628
kaf24@8459 629 do_initcalls();
kaf24@8459 630
kaf24@8459 631 schedulers_start();
kaf24@8459 632
kaf24@8594 633 if ( opt_watchdog )
kaf24@8594 634 watchdog_enable();
kaf24@8459 635
kfraser@11881 636 /* Extract policy from multiboot. */
kfraser@11881 637 extract_acm_policy(mbi, &initrdidx, &_policy_start, &_policy_len);
kfraser@11881 638
kaf24@8459 639 /* initialize access control security module */
kfraser@11881 640 acm_init(_policy_start, _policy_len);
kaf24@8459 641
kaf24@8459 642 /* Create initial domain 0. */
kfraser@10655 643 dom0 = domain_create(0);
kfraser@10655 644 if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) )
kaf24@8459 645 panic("Error creating domain 0\n");
kaf24@8459 646
kaf24@8459 647 set_bit(_DOMF_privileged, &dom0->domain_flags);
kaf24@8459 648 /* post-create hooks sets security label */
kaf24@8459 649 acm_post_domain0_create(dom0->domain_id);
kaf24@8459 650
kaf24@8459 651 /* Grab the DOM0 command line. */
kaf24@8459 652 cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
kaf24@8459 653 if ( cmdline != NULL )
kaf24@8459 654 {
kaf24@8459 655 static char dom0_cmdline[MAX_GUEST_CMDLINE];
kaf24@8459 656
kaf24@8459 657 /* Skip past the image name and copy to a local buffer. */
kaf24@8459 658 while ( *cmdline == ' ' ) cmdline++;
kaf24@8459 659 if ( (cmdline = strchr(cmdline, ' ')) != NULL )
kaf24@8459 660 {
kaf24@8459 661 while ( *cmdline == ' ' ) cmdline++;
kaf24@8459 662 strcpy(dom0_cmdline, cmdline);
kaf24@8459 663 }
kaf24@8459 664
kaf24@8459 665 cmdline = dom0_cmdline;
kaf24@8459 666
kaf24@8459 667 /* Append any extra parameters. */
kaf24@8459 668 if ( skip_ioapic_setup && !strstr(cmdline, "noapic") )
kaf24@8459 669 strcat(cmdline, " noapic");
kaf24@8459 670 if ( acpi_skip_timer_override &&
kaf24@8459 671 !strstr(cmdline, "acpi_skip_timer_override") )
kaf24@8459 672 strcat(cmdline, " acpi_skip_timer_override");
kaf24@8459 673 if ( (strlen(acpi_param) != 0) && !strstr(cmdline, "acpi=") )
kaf24@8459 674 {
kaf24@8459 675 strcat(cmdline, " acpi=");
kaf24@8459 676 strcat(cmdline, acpi_param);
kaf24@8459 677 }
kaf24@8459 678 }
kaf24@8459 679
kaf24@8459 680 if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
kaf24@8459 681 {
kaf24@8459 682 _initrd_start = initial_images_start +
kaf24@8459 683 (mod[initrdidx].mod_start - mod[0].mod_start);
kaf24@8459 684 _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
kaf24@8459 685 }
kaf24@8459 686
kaf24@8459 687 /*
kaf24@8459 688 * We're going to setup domain0 using the module(s) that we stashed safely
kaf24@8459 689 * above our heap. The second module, if present, is an initrd ramdisk.
kaf24@8459 690 */
kaf24@8459 691 if ( construct_dom0(dom0,
kaf24@8459 692 initial_images_start,
kaf24@8459 693 mod[0].mod_end-mod[0].mod_start,
kaf24@8459 694 _initrd_start,
kaf24@8459 695 _initrd_len,
kaf24@8459 696 cmdline) != 0)
kaf24@8459 697 panic("Could not set up DOM0 guest OS\n");
kaf24@8459 698
kaf24@8459 699 /* Scrub RAM that is still free and so may go to an unprivileged domain. */
kaf24@8459 700 scrub_heap_pages();
kaf24@8459 701
kaf24@8459 702 init_trace_bufs();
kaf24@8459 703
kaf24@10502 704 console_endboot();
kaf24@8459 705
kaf24@8459 706 /* Hide UART from DOM0 if we're using it */
kaf24@8459 707 serial_endboot();
kaf24@8459 708
kaf24@8459 709 domain_unpause_by_systemcontroller(dom0);
kaf24@8459 710
kaf24@8459 711 startup_cpu_idle_loop();
kaf24@8459 712 }
kaf24@8459 713
kaf24@6725 714 void arch_get_xen_caps(xen_capabilities_info_t info)
iap10@6721 715 {
kaf24@6725 716 char *p = info;
kfraser@10890 717 int major = xen_major_version();
kfraser@10890 718 int minor = xen_minor_version();
iap10@6721 719
kaf24@6725 720 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
kaf24@6725 721
kfraser@10890 722 p += sprintf(p, "xen-%d.%d-x86_32 ", major, minor);
kaf24@6725 723 if ( hvm_enabled )
kfraser@10890 724 p += sprintf(p, "hvm-%d.%d-x86_32 ", major, minor);
kaf24@6725 725
kaf24@6725 726 #elif defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE)
kaf24@6725 727
kfraser@10890 728 p += sprintf(p, "xen-%d.%d-x86_32p ", major, minor);
kaf24@6725 729 if ( hvm_enabled )
iap10@6721 730 {
kfraser@10890 731 p += sprintf(p, "hvm-%d.%d-x86_32 ", major, minor);
kfraser@10890 732 p += sprintf(p, "hvm-%d.%d-x86_32p ", major, minor);
iap10@6721 733 }
iap10@6721 734
kaf24@6725 735 #elif defined(CONFIG_X86_64)
iap10@6721 736
kfraser@10890 737 p += sprintf(p, "xen-%d.%d-x86_64 ", major, minor);
kaf24@6725 738 if ( hvm_enabled )
iap10@6721 739 {
kfraser@10890 740 p += sprintf(p, "hvm-%d.%d-x86_32 ", major, minor);
kfraser@10890 741 p += sprintf(p, "hvm-%d.%d-x86_32p ", major, minor);
kfraser@10890 742 p += sprintf(p, "hvm-%d.%d-x86_64 ", major, minor);
iap10@6721 743 }
kaf24@6725 744
kaf24@6725 745 #else
kaf24@6725 746
kaf24@6725 747 p++;
kaf24@6725 748
iap10@6721 749 #endif
kaf24@8246 750
kaf24@6725 751 *(p-1) = 0;
iap10@6721 752
shand@6734 753 BUG_ON((p - info) > sizeof(xen_capabilities_info_t));
iap10@6721 754 }
iap10@6721 755
kaf24@3914 756 /*
kaf24@3914 757 * Local variables:
kaf24@3914 758 * mode: C
kaf24@3914 759 * c-set-style: "BSD"
kaf24@3914 760 * c-basic-offset: 4
kaf24@3914 761 * tab-width: 4
kaf24@3914 762 * indent-tabs-mode: nil
kaf24@3988 763 * End:
kaf24@3914 764 */