ia64/xen-unstable

annotate xen/arch/x86/setup.c @ 11971:f312c2d01d8b

[XEN] Add basic NUMA/SRAT support to Xen from Linux 2.6.16.29.
Signed-off-by: Ryan Harper <ryanh@us.ibm.com>
author kfraser@localhost.localdomain
date Wed Oct 25 12:25:54 2006 +0100 (2006-10-25)
parents 29b02d929b7e
children 3db344ea8070
rev   line source
kaf24@1452 1 #include <xen/config.h>
kaf24@1452 2 #include <xen/init.h>
kaf24@1452 3 #include <xen/lib.h>
kaf24@1452 4 #include <xen/sched.h>
cl349@5247 5 #include <xen/domain.h>
kaf24@1452 6 #include <xen/serial.h>
kaf24@1506 7 #include <xen/softirq.h>
kaf24@1452 8 #include <xen/acpi.h>
kaf24@3338 9 #include <xen/console.h>
iap10@4287 10 #include <xen/serial.h>
kaf24@3338 11 #include <xen/trace.h>
kaf24@3338 12 #include <xen/multiboot.h>
kaf24@5356 13 #include <xen/domain_page.h>
kfraser@10890 14 #include <xen/version.h>
kaf24@9117 15 #include <xen/gdbstub.h>
kaf24@9818 16 #include <xen/percpu.h>
kfraser@11296 17 #include <xen/hypercall.h>
kfraser@11601 18 #include <xen/keyhandler.h>
kfraser@11971 19 #include <xen/numa.h>
iap10@6721 20 #include <public/version.h>
kaf24@1452 21 #include <asm/bitops.h>
kaf24@1452 22 #include <asm/smp.h>
kaf24@1452 23 #include <asm/processor.h>
kaf24@1452 24 #include <asm/mpspec.h>
kaf24@1452 25 #include <asm/apic.h>
kaf24@1452 26 #include <asm/desc.h>
kaf24@3338 27 #include <asm/shadow.h>
kaf24@3344 28 #include <asm/e820.h>
kfraser@11971 29 #include <asm/numa.h>
kaf24@5536 30 #include <acm/acm_hooks.h>
kaf24@3338 31
kaf24@5157 32 extern void dmi_scan_machine(void);
kaf24@5211 33 extern void generic_apic_probe(void);
kfraser@11971 34 extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
kaf24@5157 35
kaf24@3338 36 /*
kaf24@3338 37 * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
kaf24@8726 38 * page_info table and allocation bitmap.
kaf24@3338 39 */
kaf24@3338 40 static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
kaf24@4950 41 #if defined(CONFIG_X86_64)
kaf24@3338 42 integer_param("xenheap_megabytes", opt_xenheap_megabytes);
kaf24@3354 43 #endif
kaf24@1452 44
kaf24@5146 45 /* opt_nosmp: If true, secondary processors are ignored. */
kaf24@5900 46 static int opt_nosmp = 0;
kaf24@5146 47 boolean_param("nosmp", opt_nosmp);
kaf24@5146 48
kaf24@5146 49 /* maxcpus: maximum number of CPUs to activate. */
kaf24@5146 50 static unsigned int max_cpus = NR_CPUS;
shand@11156 51 integer_param("maxcpus", max_cpus);
kaf24@5146 52
kaf24@3334 53 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
kaf24@3334 54 static int opt_watchdog = 0;
kaf24@3334 55 boolean_param("watchdog", opt_watchdog);
kaf24@3334 56
kaf24@4850 57 /* **** Linux config option: propagated to domain0. */
kaf24@4850 58 /* "acpi=off": Sisables both ACPI table parsing and interpreter. */
kaf24@4850 59 /* "acpi=force": Override the disable blacklist. */
kaf24@4850 60 /* "acpi=strict": Disables out-of-spec workarounds. */
kaf24@4850 61 /* "acpi=ht": Limit ACPI just to boot-time to enable HT. */
kaf24@4850 62 /* "acpi=noirq": Disables ACPI interrupt routing. */
kaf24@4850 63 static void parse_acpi_param(char *s);
kaf24@4850 64 custom_param("acpi", parse_acpi_param);
kaf24@4850 65
kfraser@11971 66 extern int numa_setup(char *s);
kfraser@11971 67 custom_param("numa", numa_setup);
kfraser@11971 68
kaf24@4850 69 /* **** Linux config option: propagated to domain0. */
kaf24@4850 70 /* acpi_skip_timer_override: Skip IRQ0 overrides. */
kaf24@4850 71 extern int acpi_skip_timer_override;
kaf24@4850 72 boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
kaf24@4850 73
kaf24@4850 74 /* **** Linux config option: propagated to domain0. */
kaf24@4850 75 /* noapic: Disable IOAPIC setup. */
kaf24@4850 76 extern int skip_ioapic_setup;
kaf24@4850 77 boolean_param("noapic", skip_ioapic_setup);
kaf24@4850 78
kaf24@3594 79 int early_boot = 1;
kaf24@3594 80
kaf24@5146 81 cpumask_t cpu_present_map;
kaf24@5146 82
kaf24@5003 83 /* Limits of Xen heap, used to initialise the allocator. */
kaf24@5003 84 unsigned long xenheap_phys_start, xenheap_phys_end;
kaf24@3338 85
kaf24@2298 86 extern void arch_init_memory(void);
kaf24@1589 87 extern void init_IRQ(void);
kaf24@1589 88 extern void trap_init(void);
kaf24@5604 89 extern void early_time_init(void);
kaf24@5167 90 extern void early_cpu_init(void);
kaf24@1589 91
kaf24@8533 92 struct tss_struct init_tss[NR_CPUS];
kaf24@8533 93
kaf24@5011 94 extern unsigned long cpu0_stack[];
kaf24@5011 95
kaf24@5214 96 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
kaf24@1452 97
kaf24@5237 98 #if CONFIG_PAGING_LEVELS > 2
kaf24@1670 99 unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
kaf24@1670 100 #else
kaf24@5593 101 unsigned long mmu_cr4_features = X86_CR4_PSE;
kaf24@1670 102 #endif
kaf24@1452 103 EXPORT_SYMBOL(mmu_cr4_features);
kaf24@1452 104
kaf24@4818 105 int acpi_disabled;
kaf24@1452 106
kaf24@4850 107 int acpi_force;
kaf24@4850 108 char acpi_param[10] = "";
kaf24@4850 109 static void parse_acpi_param(char *s)
kaf24@4850 110 {
kaf24@4850 111 /* Save the parameter so it can be propagated to domain0. */
kaf24@4850 112 strncpy(acpi_param, s, sizeof(acpi_param));
kaf24@4850 113 acpi_param[sizeof(acpi_param)-1] = '\0';
kaf24@4850 114
kaf24@4850 115 /* Interpret the parameter for use within Xen. */
kaf24@4850 116 if ( !strcmp(s, "off") )
kaf24@4850 117 {
kaf24@4850 118 disable_acpi();
kaf24@4850 119 }
kaf24@4850 120 else if ( !strcmp(s, "force") )
kaf24@4850 121 {
kaf24@4850 122 acpi_force = 1;
kaf24@4850 123 acpi_ht = 1;
kaf24@4850 124 acpi_disabled = 0;
kaf24@4850 125 }
kaf24@4850 126 else if ( !strcmp(s, "strict") )
kaf24@4850 127 {
kaf24@4850 128 acpi_strict = 1;
kaf24@4850 129 }
kaf24@4850 130 else if ( !strcmp(s, "ht") )
kaf24@4850 131 {
kaf24@4850 132 if ( !acpi_force )
kaf24@4850 133 disable_acpi();
kaf24@4850 134 acpi_ht = 1;
kaf24@4850 135 }
kaf24@4850 136 else if ( !strcmp(s, "noirq") )
kaf24@4850 137 {
kaf24@4850 138 acpi_noirq_set();
kaf24@4850 139 }
kaf24@4850 140 }
kaf24@4850 141
kaf24@1452 142 static void __init do_initcalls(void)
kaf24@1452 143 {
kaf24@1452 144 initcall_t *call;
kaf24@1452 145 for ( call = &__initcall_start; call < &__initcall_end; call++ )
kaf24@1452 146 (*call)();
kaf24@1452 147 }
kaf24@1452 148
kaf24@8459 149 #define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
kaf24@8459 150
kaf24@8459 151 static struct e820entry e820_raw[E820MAX];
kaf24@8459 152
kaf24@9067 153 static unsigned long initial_images_start, initial_images_end;
kaf24@9067 154
kaf24@9067 155 unsigned long initial_images_nrpages(void)
kaf24@9067 156 {
kaf24@9067 157 unsigned long s = initial_images_start + PAGE_SIZE - 1;
kaf24@9067 158 unsigned long e = initial_images_end;
kaf24@9067 159 return ((e >> PAGE_SHIFT) - (s >> PAGE_SHIFT));
kaf24@9067 160 }
kaf24@9067 161
kaf24@9067 162 void discard_initial_images(void)
kaf24@9067 163 {
kaf24@9067 164 init_domheap_pages(initial_images_start, initial_images_end);
kaf24@9067 165 }
kaf24@9067 166
kaf24@9818 167 extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[];
kaf24@9818 168
kfraser@11241 169 static void __init percpu_init_areas(void)
kaf24@9818 170 {
kaf24@9818 171 unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start;
kaf24@9818 172
kaf24@9818 173 BUG_ON(data_size > PERCPU_SIZE);
kaf24@9818 174
kfraser@11241 175 for_each_cpu ( i )
kfraser@11241 176 {
kfraser@11241 177 memguard_unguard_range(__per_cpu_start + (i << PERCPU_SHIFT),
kfraser@11241 178 1 << PERCPU_SHIFT);
kfraser@11241 179 if ( i != 0 )
kfraser@11241 180 memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
kfraser@11241 181 __per_cpu_start,
kfraser@11241 182 data_size);
kfraser@11241 183 }
kaf24@9818 184 }
kaf24@9818 185
kfraser@11241 186 static void __init percpu_guard_areas(void)
kfraser@11241 187 {
kfraser@11241 188 memguard_guard_range(__per_cpu_start, __per_cpu_end - __per_cpu_start);
kfraser@11241 189 }
kfraser@11241 190
kfraser@11241 191 static void __init percpu_free_unused_areas(void)
kaf24@9818 192 {
kaf24@9818 193 unsigned int i, first_unused;
kaf24@9818 194
kaf24@9818 195 /* Find first unused CPU number. */
kaf24@9818 196 for ( i = 0; i < NR_CPUS; i++ )
kaf24@9818 197 if ( !cpu_online(i) )
kaf24@9818 198 break;
kaf24@9818 199 first_unused = i;
kaf24@9818 200
kaf24@9818 201 /* Check that there are no holes in cpu_online_map. */
kaf24@9818 202 for ( ; i < NR_CPUS; i++ )
kaf24@9818 203 BUG_ON(cpu_online(i));
kaf24@9818 204
kfraser@11241 205 #ifndef MEMORY_GUARD
kaf24@9818 206 init_xenheap_pages(__pa(__per_cpu_start) + (first_unused << PERCPU_SHIFT),
kaf24@9818 207 __pa(__per_cpu_end));
kfraser@11241 208 #endif
kaf24@9818 209 }
kaf24@9818 210
kfraser@11881 211 /* Fetch acm policy module from multiboot modules. */
kfraser@11881 212 static void extract_acm_policy(
kfraser@11881 213 multiboot_info_t *mbi,
kfraser@11881 214 unsigned int *initrdidx,
kfraser@11881 215 char **_policy_start,
kfraser@11881 216 unsigned long *_policy_len)
kfraser@11881 217 {
kfraser@11881 218 int i;
kfraser@11881 219 module_t *mod = (module_t *)__va(mbi->mods_addr);
kfraser@11881 220 unsigned long start, policy_len;
kfraser@11881 221 char *policy_start;
kfraser@11881 222
kfraser@11881 223 /*
kfraser@11881 224 * Try all modules and see whichever could be the binary policy.
kfraser@11881 225 * Adjust the initrdidx if module[1] is the binary policy.
kfraser@11881 226 */
kfraser@11881 227 for ( i = mbi->mods_count-1; i >= 1; i-- )
kfraser@11881 228 {
kfraser@11881 229 start = initial_images_start + (mod[i].mod_start-mod[0].mod_start);
kfraser@11881 230 #if defined(__i386__)
kfraser@11881 231 policy_start = (char *)start;
kfraser@11881 232 #elif defined(__x86_64__)
kfraser@11881 233 policy_start = __va(start);
kfraser@11881 234 #endif
kfraser@11881 235 policy_len = mod[i].mod_end - mod[i].mod_start;
kfraser@11881 236 if ( acm_is_policy(policy_start, policy_len) )
kfraser@11881 237 {
kfraser@11881 238 printk("Policy len 0x%lx, start at %p - module %d.\n",
kfraser@11881 239 policy_len, policy_start, i);
kfraser@11881 240 *_policy_start = policy_start;
kfraser@11881 241 *_policy_len = policy_len;
kfraser@11881 242 if ( i == 1 )
kfraser@11881 243 *initrdidx = (mbi->mods_count > 2) ? 2 : 0;
kfraser@11881 244 break;
kfraser@11881 245 }
kfraser@11881 246 }
kfraser@11881 247 }
kfraser@11881 248
kfraser@11241 249 static void __init init_idle_domain(void)
kfraser@11240 250 {
kfraser@11240 251 struct domain *idle_domain;
kfraser@11240 252
kfraser@11240 253 /* Domain creation requires that scheduler structures are initialised. */
kfraser@11240 254 scheduler_init();
kfraser@11240 255
kfraser@11240 256 idle_domain = domain_create(IDLE_DOMAIN_ID);
kfraser@11240 257 if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) )
kfraser@11240 258 BUG();
kfraser@11240 259
kfraser@11240 260 set_current(idle_domain->vcpu[0]);
kfraser@11240 261 idle_vcpu[0] = this_cpu(curr_vcpu) = current;
kfraser@11240 262
kfraser@11240 263 setup_idle_pagetable();
kfraser@11240 264 }
kfraser@11240 265
kfraser@11971 266 static void srat_detect_node(int cpu)
kfraser@11971 267 {
kfraser@11971 268 unsigned node;
kfraser@11971 269 u8 apicid = x86_cpu_to_apicid[cpu];
kfraser@11971 270
kfraser@11971 271 node = apicid_to_node[apicid];
kfraser@11971 272 if (node == NUMA_NO_NODE)
kfraser@11971 273 node = 0;
kfraser@11971 274 numa_set_node(cpu, node);
kfraser@11971 275
kfraser@11971 276 if (acpi_numa > 0)
kfraser@11971 277 printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
kfraser@11971 278 }
kfraser@11971 279
kaf24@8463 280 void __init __start_xen(multiboot_info_t *mbi)
kaf24@1452 281 {
kaf24@9823 282 char __cmdline[] = "", *cmdline = __cmdline;
kaf24@8457 283 unsigned long _initrd_start = 0, _initrd_len = 0;
kaf24@8457 284 unsigned int initrdidx = 1;
kfraser@11881 285 char *_policy_start = NULL;
kfraser@11881 286 unsigned long _policy_len = 0;
kaf24@8457 287 module_t *mod = (module_t *)__va(mbi->mods_addr);
kaf24@6111 288 unsigned long nr_pages, modules_length;
kaf24@8726 289 paddr_t s, e;
kaf24@8402 290 int i, e820_warn = 0, e820_raw_nr = 0, bytes = 0;
kaf24@5776 291 struct ns16550_defaults ns16550 = {
kaf24@5776 292 .data_bits = 8,
kaf24@5776 293 .parity = 'n',
kaf24@5776 294 .stop_bits = 1
kaf24@5776 295 };
kaf24@3338 296
kaf24@3338 297 /* Parse the command-line options. */
kaf24@3344 298 if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
kaf24@9823 299 cmdline = __va(mbi->cmdline);
kaf24@9823 300 cmdline_parse(cmdline);
kaf24@3338 301
kaf24@8534 302 set_current((struct vcpu *)0xfffff000); /* debug sanity */
kfraser@11240 303 idle_vcpu[0] = current;
kaf24@8534 304 set_processor_id(0); /* needed early, for smp_processor_id() */
kaf24@3338 305
kaf24@5146 306 smp_prepare_boot_cpu();
kaf24@5146 307
kaf24@3338 308 /* We initialise the serial devices very early so we can get debugging. */
kaf24@5776 309 ns16550.io_base = 0x3f8;
kaf24@5776 310 ns16550.irq = 4;
kaf24@5776 311 ns16550_init(0, &ns16550);
kaf24@5776 312 ns16550.io_base = 0x2f8;
kaf24@5776 313 ns16550.irq = 3;
kaf24@5776 314 ns16550_init(1, &ns16550);
kaf24@5195 315 serial_init_preirq();
kaf24@3338 316
kaf24@3338 317 init_console();
kaf24@3338 318
kfraser@11947 319 printk("Command line: %s\n", cmdline);
kaf24@9823 320
kaf24@3344 321 /* Check that we have at least one Multiboot module. */
kaf24@3344 322 if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
kaf24@3338 323 {
kaf24@5542 324 printk("FATAL ERROR: dom0 kernel not specified."
kaf24@5542 325 " Check bootloader configuration.\n");
kaf24@5011 326 EARLY_FAIL();
kaf24@5011 327 }
kaf24@5011 328
kaf24@5011 329 if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
kaf24@5011 330 {
kaf24@5011 331 printk("FATAL ERROR: Misaligned CPU0 stack.\n");
kaf24@5011 332 EARLY_FAIL();
kaf24@3338 333 }
kaf24@3338 334
kfraser@11618 335 /*
kfraser@11618 336 * Since there are some stubs getting built on the stacks which use
kfraser@11618 337 * direct calls/jumps, the heap must be confined to the lower 2G so
kfraser@11618 338 * that those branches can reach their targets.
kfraser@11618 339 */
kfraser@11618 340 if ( opt_xenheap_megabytes > 2048 )
kfraser@11618 341 opt_xenheap_megabytes = 2048;
kaf24@3338 342 xenheap_phys_end = opt_xenheap_megabytes << 20;
kaf24@3338 343
kaf24@3344 344 if ( mbi->flags & MBI_MEMMAP )
kaf24@3344 345 {
kaf24@3344 346 while ( bytes < mbi->mmap_length )
kaf24@3344 347 {
kaf24@3344 348 memory_map_t *map = __va(mbi->mmap_addr + bytes);
kaf24@8402 349
kaf24@8402 350 /*
kaf24@8403 351 * This is a gross workaround for a BIOS bug. Some bootloaders do
kaf24@8402 352 * not write e820 map entries into pre-zeroed memory. This is
kaf24@8402 353 * okay if the BIOS fills in all fields of the map entry, but
kaf24@8402 354 * some broken BIOSes do not bother to write the high word of
kaf24@8402 355 * the length field if the length is smaller than 4GB. We
kaf24@8402 356 * detect and fix this by flagging sections below 4GB that
kaf24@8403 357 * appear to be larger than 4GB in size.
kaf24@8402 358 */
kaf24@8403 359 if ( (map->base_addr_high == 0) && (map->length_high != 0) )
kaf24@8402 360 {
kaf24@8402 361 e820_warn = 1;
kaf24@8402 362 map->length_high = 0;
kaf24@8402 363 }
kaf24@8402 364
kaf24@3344 365 e820_raw[e820_raw_nr].addr =
kaf24@3344 366 ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
kaf24@3344 367 e820_raw[e820_raw_nr].size =
kaf24@3344 368 ((u64)map->length_high << 32) | (u64)map->length_low;
kaf24@3344 369 e820_raw[e820_raw_nr].type =
kaf24@3346 370 (map->type > E820_SHARED_PAGE) ? E820_RESERVED : map->type;
kaf24@3344 371 e820_raw_nr++;
kaf24@8402 372
kaf24@3344 373 bytes += map->size + 4;
kaf24@3344 374 }
kaf24@3344 375 }
kaf24@3344 376 else if ( mbi->flags & MBI_MEMLIMITS )
kaf24@3344 377 {
kaf24@3344 378 e820_raw[0].addr = 0;
kaf24@3344 379 e820_raw[0].size = mbi->mem_lower << 10;
kaf24@3344 380 e820_raw[0].type = E820_RAM;
kaf24@3354 381 e820_raw[1].addr = 0x100000;
kaf24@3354 382 e820_raw[1].size = mbi->mem_upper << 10;
kaf24@3354 383 e820_raw[1].type = E820_RAM;
kaf24@3344 384 e820_raw_nr = 2;
kaf24@3344 385 }
kaf24@3344 386 else
kaf24@3344 387 {
kaf24@3344 388 printk("FATAL ERROR: Bootloader provided no memory information.\n");
kaf24@3344 389 for ( ; ; ) ;
kaf24@3344 390 }
kaf24@3344 391
kaf24@8402 392 if ( e820_warn )
kaf24@8402 393 printk("WARNING: Buggy e820 map detected and fixed "
kaf24@8402 394 "(truncated length fields).\n");
kaf24@8402 395
kaf24@4950 396 max_page = init_e820(e820_raw, &e820_raw_nr);
kaf24@3338 397
kaf24@6111 398 modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
kaf24@3338 399
kaf24@6111 400 /* Find a large enough RAM extent to stash the DOM0 modules. */
kaf24@6111 401 for ( i = 0; ; i++ )
kaf24@3338 402 {
kaf24@6111 403 if ( i == e820.nr_map )
kaf24@6111 404 {
kaf24@6111 405 printk("Not enough memory to stash the DOM0 kernel image.\n");
kaf24@6111 406 for ( ; ; ) ;
kaf24@6111 407 }
kaf24@6134 408
kaf24@6134 409 if ( (e820.map[i].type == E820_RAM) &&
kaf24@6134 410 (e820.map[i].size >= modules_length) &&
kaf24@6134 411 ((e820.map[i].addr + e820.map[i].size) >=
kaf24@6134 412 (xenheap_phys_end + modules_length)) )
kaf24@6134 413 break;
kaf24@3338 414 }
kaf24@6111 415
kaf24@6134 416 /* Stash as near as possible to the beginning of the RAM extent. */
kaf24@6134 417 initial_images_start = e820.map[i].addr;
kaf24@6134 418 if ( initial_images_start < xenheap_phys_end )
kaf24@6134 419 initial_images_start = xenheap_phys_end;
kaf24@6134 420 initial_images_end = initial_images_start + modules_length;
kaf24@6134 421
kaf24@4950 422 #if defined(CONFIG_X86_32)
kaf24@3338 423 memmove((void *)initial_images_start, /* use low mapping */
kaf24@3338 424 (void *)mod[0].mod_start, /* use low mapping */
kaf24@3338 425 mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
kaf24@4950 426 #elif defined(CONFIG_X86_64)
kaf24@3338 427 memmove(__va(initial_images_start),
kaf24@3338 428 __va(mod[0].mod_start),
kaf24@3338 429 mod[mbi->mods_count-1].mod_end - mod[0].mod_start);
kaf24@3338 430 #endif
kaf24@3338 431
kaf24@3354 432 /* Initialise boot-time allocator with all RAM situated after modules. */
kaf24@5003 433 xenheap_phys_start = init_boot_allocator(__pa(&_end));
kaf24@6111 434 nr_pages = 0;
kaf24@3354 435 for ( i = 0; i < e820.nr_map; i++ )
kaf24@3354 436 {
kaf24@3354 437 if ( e820.map[i].type != E820_RAM )
kaf24@3354 438 continue;
kaf24@6111 439
kaf24@3354 440 nr_pages += e820.map[i].size >> PAGE_SHIFT;
kaf24@6111 441
kaf24@6111 442 /* Initialise boot heap, skipping Xen heap and dom0 modules. */
kaf24@6111 443 s = e820.map[i].addr;
kaf24@6111 444 e = s + e820.map[i].size;
kaf24@6111 445 if ( s < xenheap_phys_end )
kaf24@6111 446 s = xenheap_phys_end;
kaf24@6111 447 if ( (s < initial_images_end) && (e > initial_images_start) )
kaf24@6111 448 s = initial_images_end;
kaf24@6111 449 init_boot_pages(s, e);
kaf24@6111 450
kaf24@5003 451 #if defined (CONFIG_X86_64)
kaf24@5003 452 /*
kaf24@5003 453 * x86/64 maps all registered RAM. Points to note:
kaf24@9783 454 * 1. The initial pagetable already maps low 1GB, so skip that.
kaf24@5003 455 * 2. We must map *only* RAM areas, taking care to avoid I/O holes.
kaf24@5003 456 * Failure to do this can cause coherency problems and deadlocks
kaf24@5003 457 * due to cache-attribute mismatches (e.g., AMD/AGP Linux bug).
kaf24@5003 458 */
kaf24@5003 459 {
kaf24@5004 460 /* Calculate page-frame range, discarding partial frames. */
kaf24@5004 461 unsigned long start, end;
kaf24@9783 462 unsigned long init_mapped = 1UL << (30 - PAGE_SHIFT); /* 1GB */
kaf24@5005 463 start = PFN_UP(e820.map[i].addr);
kaf24@5005 464 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
kaf24@9800 465 /* Clip the range to exclude what the bootstrapper initialised. */
kaf24@9783 466 if ( end < init_mapped )
kaf24@5004 467 continue;
kaf24@9783 468 if ( start < init_mapped )
kaf24@9783 469 start = init_mapped;
kaf24@5004 470 /* Request the mapping. */
kaf24@5003 471 map_pages_to_xen(
kaf24@5004 472 PAGE_OFFSET + (start << PAGE_SHIFT),
kaf24@5004 473 start, end-start, PAGE_HYPERVISOR);
kaf24@5003 474 }
kaf24@5003 475 #endif
kaf24@3354 476 }
kaf24@3354 477
kaf24@5003 478 memguard_init();
kfraser@11241 479 percpu_guard_areas();
kaf24@4950 480
kaf24@3354 481 printk("System RAM: %luMB (%lukB)\n",
kaf24@3354 482 nr_pages >> (20 - PAGE_SHIFT),
kaf24@3354 483 nr_pages << (PAGE_SHIFT - 10));
kaf24@7220 484 total_pages = nr_pages;
kaf24@3354 485
kfraser@11296 486 /* Sanity check for unwanted bloat of certain hypercall structures. */
kfraser@11296 487 BUILD_BUG_ON(sizeof(((struct xen_platform_op *)0)->u) !=
kfraser@11296 488 sizeof(((struct xen_platform_op *)0)->u.pad));
kfraser@11296 489 BUILD_BUG_ON(sizeof(((struct xen_domctl *)0)->u) !=
kfraser@11296 490 sizeof(((struct xen_domctl *)0)->u.pad));
kfraser@11296 491 BUILD_BUG_ON(sizeof(((struct xen_sysctl *)0)->u) !=
kfraser@11296 492 sizeof(((struct xen_sysctl *)0)->u.pad));
kaf24@7388 493
kaf24@9878 494 BUILD_BUG_ON(sizeof(start_info_t) > PAGE_SIZE);
kaf24@9878 495 BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
kaf24@9878 496 BUILD_BUG_ON(sizeof(vcpu_info_t) != 64);
kaf24@7744 497
kfraser@10492 498 /* Check definitions in public headers match internal defs. */
kaf24@9878 499 BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
kaf24@8521 500 #ifdef HYPERVISOR_VIRT_END
kaf24@9878 501 BUILD_BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END);
kaf24@8521 502 #endif
kfraser@10492 503 BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START);
kfraser@10492 504 BUILD_BUG_ON(MACH2PHYS_VIRT_END != RO_MPT_VIRT_END);
kaf24@8521 505
kaf24@3354 506 init_frametable();
kaf24@3338 507
kfraser@11971 508 acpi_boot_table_init();
kfraser@11971 509
kfraser@11971 510 acpi_numa_init();
kfraser@11971 511
kfraser@11971 512 numa_initmem_init(0, max_page);
kfraser@11971 513
kaf24@3354 514 end_boot_allocator();
kaf24@3354 515
kaf24@6111 516 /* Initialise the Xen heap, skipping RAM holes. */
kaf24@6111 517 nr_pages = 0;
kaf24@6111 518 for ( i = 0; i < e820.nr_map; i++ )
kaf24@6111 519 {
kaf24@6111 520 if ( e820.map[i].type != E820_RAM )
kaf24@6111 521 continue;
kaf24@6111 522
kaf24@6111 523 s = e820.map[i].addr;
kaf24@6111 524 e = s + e820.map[i].size;
kaf24@6111 525 if ( s < xenheap_phys_start )
kaf24@6111 526 s = xenheap_phys_start;
kaf24@6111 527 if ( e > xenheap_phys_end )
kaf24@6111 528 e = xenheap_phys_end;
kaf24@6111 529
kaf24@6111 530 if ( s < e )
kaf24@6111 531 {
kaf24@6111 532 nr_pages += (e - s) >> PAGE_SHIFT;
kaf24@6111 533 init_xenheap_pages(s, e);
kaf24@6111 534 }
kaf24@6111 535 }
kaf24@6111 536
kaf24@6111 537 printk("Xen heap: %luMB (%lukB)\n",
kaf24@6111 538 nr_pages >> (20 - PAGE_SHIFT),
kaf24@6111 539 nr_pages << (PAGE_SHIFT - 10));
kaf24@3338 540
kaf24@3594 541 early_boot = 0;
kaf24@3338 542
kaf24@8459 543 early_cpu_init();
kaf24@8459 544
kaf24@8459 545 paging_init();
kaf24@8459 546
kaf24@8459 547 /* Unmap the first page of CPU0's stack. */
kaf24@8459 548 memguard_guard_stack(cpu0_stack);
kaf24@8459 549
kaf24@8459 550 open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
kaf24@8459 551
kaf24@8459 552 if ( opt_watchdog )
kaf24@8459 553 nmi_watchdog = NMI_LOCAL_APIC;
kaf24@8459 554
kaf24@8459 555 sort_exception_tables();
kaf24@8459 556
kaf24@8459 557 find_smp_config();
kaf24@8459 558
kaf24@8459 559 smp_alloc_memory();
kaf24@8459 560
kaf24@8459 561 dmi_scan_machine();
kaf24@8459 562
kaf24@8459 563 generic_apic_probe();
kaf24@8459 564
kaf24@8459 565 acpi_boot_init();
kaf24@8459 566
kfraser@11971 567 init_cpu_to_node();
kfraser@11971 568
kfraser@11241 569 if ( smp_found_config )
kaf24@8459 570 get_smp_config();
kaf24@8459 571
kaf24@8459 572 init_apic_mappings();
kaf24@8459 573
kaf24@8459 574 init_IRQ();
kaf24@8459 575
kfraser@11241 576 percpu_init_areas();
kfraser@11241 577
kfraser@11240 578 init_idle_domain();
kfraser@11240 579
kaf24@8459 580 trap_init();
kaf24@8459 581
kaf24@8586 582 timer_init();
kaf24@8459 583
kaf24@8459 584 early_time_init();
kaf24@8459 585
kaf24@8459 586 arch_init_memory();
kaf24@8459 587
kaf24@8459 588 identify_cpu(&boot_cpu_data);
kaf24@8459 589 if ( cpu_has_fxsr )
kaf24@8459 590 set_in_cr4(X86_CR4_OSFXSR);
kaf24@8459 591 if ( cpu_has_xmm )
kaf24@8459 592 set_in_cr4(X86_CR4_OSXMMEXCPT);
kaf24@8459 593
kaf24@8459 594 if ( opt_nosmp )
kaf24@8459 595 max_cpus = 0;
kaf24@8459 596
kaf24@8459 597 smp_prepare_cpus(max_cpus);
kaf24@8459 598
kaf24@8459 599 /*
kaf24@8459 600 * Initialise higher-level timer functions. We do this fairly late
kaf24@8459 601 * (post-SMP) because the time bases and scale factors need to be updated
kaf24@8459 602 * regularly, and SMP initialisation can cause a long delay with
kaf24@8459 603 * interrupts not yet enabled.
kaf24@8459 604 */
kaf24@8459 605 init_xen_time();
kaf24@8459 606
kaf24@8459 607 initialize_keytable();
kaf24@8459 608
kaf24@8459 609 serial_init_postirq();
kaf24@8459 610
kaf24@8459 611 BUG_ON(!local_irq_is_enabled());
kaf24@8459 612
kaf24@8459 613 for_each_present_cpu ( i )
kaf24@8459 614 {
kaf24@8459 615 if ( num_online_cpus() >= max_cpus )
kaf24@8459 616 break;
kaf24@8459 617 if ( !cpu_online(i) )
kaf24@8459 618 __cpu_up(i);
kfraser@11971 619
kfraser@11971 620 /* setup cpu_to_node[] */
kfraser@11971 621 srat_detect_node(i);
kfraser@11971 622 /* setup node_to_cpumask based on cpu_to_node[] */
kfraser@11971 623 numa_add_cpu(i);
kaf24@8459 624 }
kaf24@8459 625
kaf24@8459 626 printk("Brought up %ld CPUs\n", (long)num_online_cpus());
kaf24@8459 627 smp_cpus_done(max_cpus);
kaf24@8459 628
kaf24@9818 629 percpu_free_unused_areas();
kaf24@9818 630
kaf24@9117 631 initialise_gdb(); /* could be moved earlier */
kaf24@9117 632
kaf24@8459 633 do_initcalls();
kaf24@8459 634
kaf24@8459 635 schedulers_start();
kaf24@8459 636
kaf24@8594 637 if ( opt_watchdog )
kaf24@8594 638 watchdog_enable();
kaf24@8459 639
kfraser@11881 640 /* Extract policy from multiboot. */
kfraser@11881 641 extract_acm_policy(mbi, &initrdidx, &_policy_start, &_policy_len);
kfraser@11881 642
kaf24@8459 643 /* initialize access control security module */
kfraser@11881 644 acm_init(_policy_start, _policy_len);
kaf24@8459 645
kaf24@8459 646 /* Create initial domain 0. */
kfraser@10655 647 dom0 = domain_create(0);
kfraser@10655 648 if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) )
kaf24@8459 649 panic("Error creating domain 0\n");
kaf24@8459 650
kaf24@8459 651 set_bit(_DOMF_privileged, &dom0->domain_flags);
kaf24@8459 652 /* post-create hooks sets security label */
kaf24@8459 653 acm_post_domain0_create(dom0->domain_id);
kaf24@8459 654
kaf24@8459 655 /* Grab the DOM0 command line. */
kaf24@8459 656 cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
kaf24@8459 657 if ( cmdline != NULL )
kaf24@8459 658 {
kaf24@8459 659 static char dom0_cmdline[MAX_GUEST_CMDLINE];
kaf24@8459 660
kaf24@8459 661 /* Skip past the image name and copy to a local buffer. */
kaf24@8459 662 while ( *cmdline == ' ' ) cmdline++;
kaf24@8459 663 if ( (cmdline = strchr(cmdline, ' ')) != NULL )
kaf24@8459 664 {
kaf24@8459 665 while ( *cmdline == ' ' ) cmdline++;
kaf24@8459 666 strcpy(dom0_cmdline, cmdline);
kaf24@8459 667 }
kaf24@8459 668
kaf24@8459 669 cmdline = dom0_cmdline;
kaf24@8459 670
kaf24@8459 671 /* Append any extra parameters. */
kaf24@8459 672 if ( skip_ioapic_setup && !strstr(cmdline, "noapic") )
kaf24@8459 673 strcat(cmdline, " noapic");
kaf24@8459 674 if ( acpi_skip_timer_override &&
kaf24@8459 675 !strstr(cmdline, "acpi_skip_timer_override") )
kaf24@8459 676 strcat(cmdline, " acpi_skip_timer_override");
kaf24@8459 677 if ( (strlen(acpi_param) != 0) && !strstr(cmdline, "acpi=") )
kaf24@8459 678 {
kaf24@8459 679 strcat(cmdline, " acpi=");
kaf24@8459 680 strcat(cmdline, acpi_param);
kaf24@8459 681 }
kaf24@8459 682 }
kaf24@8459 683
kaf24@8459 684 if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
kaf24@8459 685 {
kaf24@8459 686 _initrd_start = initial_images_start +
kaf24@8459 687 (mod[initrdidx].mod_start - mod[0].mod_start);
kaf24@8459 688 _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
kaf24@8459 689 }
kaf24@8459 690
kaf24@8459 691 /*
kaf24@8459 692 * We're going to setup domain0 using the module(s) that we stashed safely
kaf24@8459 693 * above our heap. The second module, if present, is an initrd ramdisk.
kaf24@8459 694 */
kaf24@8459 695 if ( construct_dom0(dom0,
kaf24@8459 696 initial_images_start,
kaf24@8459 697 mod[0].mod_end-mod[0].mod_start,
kaf24@8459 698 _initrd_start,
kaf24@8459 699 _initrd_len,
kaf24@8459 700 cmdline) != 0)
kaf24@8459 701 panic("Could not set up DOM0 guest OS\n");
kaf24@8459 702
kaf24@8459 703 /* Scrub RAM that is still free and so may go to an unprivileged domain. */
kaf24@8459 704 scrub_heap_pages();
kaf24@8459 705
kaf24@8459 706 init_trace_bufs();
kaf24@8459 707
kaf24@10502 708 console_endboot();
kaf24@8459 709
kaf24@8459 710 /* Hide UART from DOM0 if we're using it */
kaf24@8459 711 serial_endboot();
kaf24@8459 712
kaf24@8459 713 domain_unpause_by_systemcontroller(dom0);
kaf24@8459 714
kaf24@8459 715 startup_cpu_idle_loop();
kaf24@8459 716 }
kaf24@8459 717
kaf24@6725 718 void arch_get_xen_caps(xen_capabilities_info_t info)
iap10@6721 719 {
kaf24@6725 720 char *p = info;
kfraser@10890 721 int major = xen_major_version();
kfraser@10890 722 int minor = xen_minor_version();
iap10@6721 723
kaf24@6725 724 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
kaf24@6725 725
kfraser@10890 726 p += sprintf(p, "xen-%d.%d-x86_32 ", major, minor);
kaf24@6725 727 if ( hvm_enabled )
kfraser@10890 728 p += sprintf(p, "hvm-%d.%d-x86_32 ", major, minor);
kaf24@6725 729
kaf24@6725 730 #elif defined(CONFIG_X86_32) && defined(CONFIG_X86_PAE)
kaf24@6725 731
kfraser@10890 732 p += sprintf(p, "xen-%d.%d-x86_32p ", major, minor);
kaf24@6725 733 if ( hvm_enabled )
iap10@6721 734 {
kfraser@10890 735 p += sprintf(p, "hvm-%d.%d-x86_32 ", major, minor);
kfraser@10890 736 p += sprintf(p, "hvm-%d.%d-x86_32p ", major, minor);
iap10@6721 737 }
iap10@6721 738
kaf24@6725 739 #elif defined(CONFIG_X86_64)
iap10@6721 740
kfraser@10890 741 p += sprintf(p, "xen-%d.%d-x86_64 ", major, minor);
kaf24@6725 742 if ( hvm_enabled )
iap10@6721 743 {
kfraser@10890 744 p += sprintf(p, "hvm-%d.%d-x86_32 ", major, minor);
kfraser@10890 745 p += sprintf(p, "hvm-%d.%d-x86_32p ", major, minor);
kfraser@10890 746 p += sprintf(p, "hvm-%d.%d-x86_64 ", major, minor);
iap10@6721 747 }
kaf24@6725 748
kaf24@6725 749 #else
kaf24@6725 750
kaf24@6725 751 p++;
kaf24@6725 752
iap10@6721 753 #endif
kaf24@8246 754
kaf24@6725 755 *(p-1) = 0;
iap10@6721 756
shand@6734 757 BUG_ON((p - info) > sizeof(xen_capabilities_info_t));
iap10@6721 758 }
iap10@6721 759
kaf24@3914 760 /*
kaf24@3914 761 * Local variables:
kaf24@3914 762 * mode: C
kaf24@3914 763 * c-set-style: "BSD"
kaf24@3914 764 * c-basic-offset: 4
kaf24@3914 765 * tab-width: 4
kaf24@3914 766 * indent-tabs-mode: nil
kaf24@3988 767 * End:
kaf24@3914 768 */