ia64/xen-unstable

annotate xen/arch/x86/setup.c @ 19788:2f9e1348aa98

x86_64: allow more vCPU-s per guest

Since the shared info layout is fixed, guests are required to use
VCPUOP_register_vcpu_info prior to booting any vCPU beyond the
traditional limit of 32.

MAX_VIRT_CPUS, being an implemetation detail of the hypervisor, is no
longer being exposed in the public headers.

The tools changes are clearly incomplete (and done only so things
would
build again), and the current state of the tools (using scalar
variables all over the place to represent vCPU bitmaps) very likely
doesn't permit booting DomU-s with more than the traditional number of
vCPU-s. Testing of the extended functionality was done with Dom0 (96
vCPU-s, as well as 128 vCPU-s out of which the kernel elected - by way
of a simple kernel side patch - to use only some, resulting in a
sparse
bitmap).

ia64 changes only to make things build, and build-tested only (and the
tools part only as far as the build would go without encountering
unrelated problems in the blktap code).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 18 10:14:16 2009 +0100 (2009-06-18)
parents f210a633571c
children
rev   line source
kaf24@1452 1 #include <xen/config.h>
kaf24@1452 2 #include <xen/init.h>
kaf24@1452 3 #include <xen/lib.h>
kaf24@1452 4 #include <xen/sched.h>
cl349@5247 5 #include <xen/domain.h>
kaf24@1452 6 #include <xen/serial.h>
kaf24@1506 7 #include <xen/softirq.h>
kaf24@1452 8 #include <xen/acpi.h>
kaf24@3338 9 #include <xen/console.h>
iap10@4287 10 #include <xen/serial.h>
kaf24@3338 11 #include <xen/trace.h>
kaf24@3338 12 #include <xen/multiboot.h>
kaf24@5356 13 #include <xen/domain_page.h>
kfraser@10890 14 #include <xen/version.h>
kaf24@9117 15 #include <xen/gdbstub.h>
kaf24@9818 16 #include <xen/percpu.h>
kfraser@11296 17 #include <xen/hypercall.h>
kfraser@11601 18 #include <xen/keyhandler.h>
kfraser@11971 19 #include <xen/numa.h>
kaf24@13662 20 #include <xen/rcupdate.h>
keir@15298 21 #include <xen/vga.h>
keir@15988 22 #include <xen/dmi.h>
iap10@6721 23 #include <public/version.h>
ack@13291 24 #ifdef CONFIG_COMPAT
ack@13291 25 #include <compat/platform.h>
ack@13291 26 #include <compat/xen.h>
ack@13291 27 #endif
kaf24@1452 28 #include <asm/bitops.h>
kaf24@1452 29 #include <asm/smp.h>
kaf24@1452 30 #include <asm/processor.h>
kaf24@1452 31 #include <asm/mpspec.h>
kaf24@1452 32 #include <asm/apic.h>
kaf24@1452 33 #include <asm/desc.h>
Tim@13909 34 #include <asm/paging.h>
kaf24@3344 35 #include <asm/e820.h>
kfraser@15819 36 #include <xsm/acm/acm_hooks.h>
ian@12677 37 #include <xen/kexec.h>
kfraser@15336 38 #include <asm/edd.h>
kfraser@15815 39 #include <xsm/xsm.h>
keir@16274 40 #include <asm/tboot.h>
kaf24@3338 41
keir@19076 42 int __init bzimage_headroom(char *image_start, unsigned long image_length);
keir@19076 43
kfraser@15074 44 #if defined(CONFIG_X86_64)
kfraser@15597 45 #define BOOTSTRAP_DIRECTMAP_END (1UL << 32) /* 4GB */
kfraser@15074 46 #define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
kfraser@15074 47 #else
kfraser@15597 48 #define BOOTSTRAP_DIRECTMAP_END (1UL << 30) /* 1GB */
kfraser@15074 49 #define maddr_to_bootstrap_virt(m) ((void *)(long)(m))
kfraser@15074 50 #endif
kfraser@15074 51
kaf24@5211 52 extern void generic_apic_probe(void);
kfraser@11971 53 extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
kaf24@5157 54
kfraser@15330 55 extern u16 boot_edid_caps;
kfraser@15330 56 extern u8 boot_edid_info[128];
kfraser@15330 57 extern struct boot_video_info boot_vid_info;
kfraser@15330 58
kaf24@5146 59 /* opt_nosmp: If true, secondary processors are ignored. */
kaf24@5900 60 static int opt_nosmp = 0;
kaf24@5146 61 boolean_param("nosmp", opt_nosmp);
kaf24@5146 62
kaf24@5146 63 /* maxcpus: maximum number of CPUs to activate. */
kaf24@5146 64 static unsigned int max_cpus = NR_CPUS;
shand@11156 65 integer_param("maxcpus", max_cpus);
kaf24@5146 66
kaf24@3334 67 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
kaf24@3334 68 static int opt_watchdog = 0;
kaf24@3334 69 boolean_param("watchdog", opt_watchdog);
kaf24@3334 70
kaf24@4850 71 /* **** Linux config option: propagated to domain0. */
kaf24@4850 72 /* "acpi=off": Sisables both ACPI table parsing and interpreter. */
kaf24@4850 73 /* "acpi=force": Override the disable blacklist. */
kaf24@4850 74 /* "acpi=strict": Disables out-of-spec workarounds. */
kaf24@4850 75 /* "acpi=ht": Limit ACPI just to boot-time to enable HT. */
kaf24@4850 76 /* "acpi=noirq": Disables ACPI interrupt routing. */
kaf24@4850 77 static void parse_acpi_param(char *s);
kaf24@4850 78 custom_param("acpi", parse_acpi_param);
kaf24@4850 79
kaf24@4850 80 /* **** Linux config option: propagated to domain0. */
kaf24@4850 81 /* acpi_skip_timer_override: Skip IRQ0 overrides. */
kaf24@4850 82 extern int acpi_skip_timer_override;
kaf24@4850 83 boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
kaf24@4850 84
kaf24@4850 85 /* **** Linux config option: propagated to domain0. */
kaf24@4850 86 /* noapic: Disable IOAPIC setup. */
kaf24@4850 87 extern int skip_ioapic_setup;
kaf24@4850 88 boolean_param("noapic", skip_ioapic_setup);
kaf24@4850 89
keir@17546 90 /* **** Linux config option: propagated to domain0. */
keir@17657 91 /* xen_cpuidle: xen control cstate. */
keir@19545 92 /*static*/ int xen_cpuidle = -1;
keir@17657 93 boolean_param("cpuidle", xen_cpuidle);
keir@17546 94
kaf24@3594 95 int early_boot = 1;
kaf24@3594 96
kaf24@5146 97 cpumask_t cpu_present_map;
kaf24@5146 98
kfraser@15074 99 unsigned long xen_phys_start;
keir@19266 100 unsigned long allocator_bitmap_end;
kfraser@15074 101
keir@19055 102 #ifdef CONFIG_X86_32
kaf24@5003 103 /* Limits of Xen heap, used to initialise the allocator. */
keir@19061 104 unsigned long xenheap_initial_phys_start, xenheap_phys_end;
keir@19055 105 #endif
kaf24@3338 106
kaf24@2298 107 extern void arch_init_memory(void);
kaf24@1589 108 extern void init_IRQ(void);
kaf24@5604 109 extern void early_time_init(void);
kaf24@5167 110 extern void early_cpu_init(void);
kfraser@15747 111 extern void vesa_init(void);
kfraser@15747 112 extern void vesa_mtrr_init(void);
keir@19646 113 extern void init_tmem(void);
kaf24@1589 114
keir@18523 115 DEFINE_PER_CPU(struct desc_struct *, gdt_table) = boot_cpu_gdt_table;
keir@18523 116 #ifdef CONFIG_COMPAT
keir@18523 117 DEFINE_PER_CPU(struct desc_struct *, compat_gdt_table)
keir@18523 118 = boot_cpu_compat_gdt_table;
keir@18523 119 #endif
keir@18523 120
kaf24@8533 121 struct tss_struct init_tss[NR_CPUS];
kaf24@8533 122
kfraser@15490 123 char __attribute__ ((__section__(".bss.stack_aligned"))) cpu0_stack[STACK_SIZE];
kaf24@5011 124
keir@16144 125 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1 };
kaf24@1452 126
kaf24@1670 127 unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
kaf24@1452 128 EXPORT_SYMBOL(mmu_cr4_features);
kaf24@1452 129
kaf24@4818 130 int acpi_disabled;
kaf24@1452 131
kaf24@4850 132 int acpi_force;
kaf24@4850 133 char acpi_param[10] = "";
kfraser@15074 134 static void __init parse_acpi_param(char *s)
kaf24@4850 135 {
kaf24@4850 136 /* Save the parameter so it can be propagated to domain0. */
kfraser@13689 137 safe_strcpy(acpi_param, s);
kaf24@4850 138
kaf24@4850 139 /* Interpret the parameter for use within Xen. */
kaf24@4850 140 if ( !strcmp(s, "off") )
kaf24@4850 141 {
kaf24@4850 142 disable_acpi();
kaf24@4850 143 }
kaf24@4850 144 else if ( !strcmp(s, "force") )
kaf24@4850 145 {
kaf24@4850 146 acpi_force = 1;
kaf24@4850 147 acpi_ht = 1;
kaf24@4850 148 acpi_disabled = 0;
kaf24@4850 149 }
kaf24@4850 150 else if ( !strcmp(s, "strict") )
kaf24@4850 151 {
kaf24@4850 152 acpi_strict = 1;
kaf24@4850 153 }
kaf24@4850 154 else if ( !strcmp(s, "ht") )
kaf24@4850 155 {
kaf24@4850 156 if ( !acpi_force )
kaf24@4850 157 disable_acpi();
kaf24@4850 158 acpi_ht = 1;
kaf24@4850 159 }
kaf24@4850 160 else if ( !strcmp(s, "noirq") )
kaf24@4850 161 {
kaf24@4850 162 acpi_noirq_set();
kaf24@4850 163 }
kaf24@4850 164 }
kaf24@4850 165
kaf24@1452 166 static void __init do_initcalls(void)
kaf24@1452 167 {
kaf24@1452 168 initcall_t *call;
kaf24@1452 169 for ( call = &__initcall_start; call < &__initcall_end; call++ )
kaf24@1452 170 (*call)();
kaf24@1452 171 }
kaf24@1452 172
kfraser@15074 173 #define EARLY_FAIL(f, a...) do { \
kfraser@15074 174 printk( f , ## a ); \
kfraser@15871 175 for ( ; ; ) halt(); \
kfraser@15074 176 } while (0)
kaf24@8459 177
keir@19076 178 static unsigned long __initdata initial_images_base;
keir@19076 179 static unsigned long __initdata initial_images_start;
keir@19076 180 static unsigned long __initdata initial_images_end;
kaf24@9067 181
kfraser@15074 182 unsigned long __init initial_images_nrpages(void)
kaf24@9067 183 {
keir@19076 184 ASSERT(!(initial_images_base & ~PAGE_MASK));
kfraser@15489 185 ASSERT(!(initial_images_end & ~PAGE_MASK));
kfraser@15489 186 return ((initial_images_end >> PAGE_SHIFT) -
keir@19076 187 (initial_images_base >> PAGE_SHIFT));
kaf24@9067 188 }
kaf24@9067 189
kfraser@15074 190 void __init discard_initial_images(void)
kaf24@9067 191 {
keir@19076 192 init_domheap_pages(initial_images_base, initial_images_end);
kaf24@9067 193 }
kaf24@9067 194
kaf24@9818 195 extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[];
kaf24@9818 196
kfraser@11241 197 static void __init percpu_init_areas(void)
kaf24@9818 198 {
kaf24@9818 199 unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start;
kfraser@15074 200 unsigned int first_unused;
kaf24@9818 201
kaf24@9818 202 BUG_ON(data_size > PERCPU_SIZE);
kaf24@9818 203
kfraser@15074 204 /* Initialise per-cpu data area for all possible secondary CPUs. */
kfraser@15074 205 for ( i = 1; (i < NR_CPUS) && cpu_possible(i); i++ )
kfraser@15074 206 memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
kfraser@15074 207 __per_cpu_start,
kfraser@15074 208 data_size);
kaf24@9818 209 first_unused = i;
kaf24@9818 210
kfraser@14340 211 /* Check that there are no holes in cpu_possible_map. */
kaf24@9818 212 for ( ; i < NR_CPUS; i++ )
kfraser@14340 213 BUG_ON(cpu_possible(i));
kaf24@9818 214
kfraser@11241 215 #ifndef MEMORY_GUARD
kaf24@9818 216 init_xenheap_pages(__pa(__per_cpu_start) + (first_unused << PERCPU_SHIFT),
kaf24@9818 217 __pa(__per_cpu_end));
kfraser@11241 218 #endif
keir@15082 219 memguard_guard_range(&__per_cpu_start[first_unused << PERCPU_SHIFT],
keir@15082 220 (NR_CPUS - first_unused) << PERCPU_SHIFT);
keir@15082 221 #if defined(CONFIG_X86_64)
keir@15082 222 /* Also zap the mapping in the 1:1 area. */
keir@15082 223 memguard_guard_range(__va(__pa(__per_cpu_start)) +
keir@15082 224 (first_unused << PERCPU_SHIFT),
keir@15082 225 (NR_CPUS - first_unused) << PERCPU_SHIFT);
keir@15082 226 #endif
kaf24@9818 227 }
kaf24@9818 228
kfraser@11241 229 static void __init init_idle_domain(void)
kfraser@11240 230 {
kfraser@11240 231 struct domain *idle_domain;
kfraser@11240 232
kfraser@11240 233 /* Domain creation requires that scheduler structures are initialised. */
kfraser@11240 234 scheduler_init();
kfraser@11240 235
kfraser@14911 236 idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0);
keir@19788 237 if ( idle_domain == NULL )
keir@19788 238 BUG();
keir@19788 239 idle_domain->vcpu = idle_vcpu;
keir@19788 240 idle_domain->max_vcpus = NR_CPUS;
keir@19788 241 if ( alloc_vcpu(idle_domain, 0, 0) == NULL )
kfraser@11240 242 BUG();
kfraser@11240 243
keir@19788 244 set_current(idle_vcpu[0]);
keir@19788 245 this_cpu(curr_vcpu) = current;
kfraser@11240 246
kfraser@11240 247 setup_idle_pagetable();
kfraser@11240 248 }
kfraser@11240 249
kfraser@15074 250 static void __init srat_detect_node(int cpu)
kfraser@11971 251 {
kfraser@11998 252 unsigned node;
keir@17551 253 u32 apicid = x86_cpu_to_apicid[cpu];
kfraser@11971 254
kfraser@11998 255 node = apicid_to_node[apicid];
kfraser@11998 256 if ( node == NUMA_NO_NODE )
kfraser@11998 257 node = 0;
kfraser@11998 258 numa_set_node(cpu, node);
kfraser@11971 259
kfraser@11998 260 if ( acpi_numa > 0 )
kfraser@11998 261 printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
kfraser@11971 262 }
kfraser@11971 263
kfraser@15740 264 /*
kfraser@15740 265 * Ensure a given physical memory range is present in the bootstrap mappings.
kfraser@15740 266 * Use superpage mappings to ensure that pagetable memory needn't be allocated.
kfraser@15740 267 */
kfraser@15740 268 static void __init bootstrap_map(unsigned long start, unsigned long end)
kfraser@15740 269 {
kfraser@15740 270 unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
keir@16888 271 start = max_t(unsigned long, start & ~mask, 16UL << 20);
kfraser@15740 272 end = (end + mask) & ~mask;
keir@16888 273 if ( start >= end )
keir@16888 274 return;
kfraser@15740 275 if ( end > BOOTSTRAP_DIRECTMAP_END )
kfraser@15740 276 panic("Cannot access memory beyond end of "
kfraser@15740 277 "bootstrap direct-map area\n");
kfraser@15740 278 map_pages_to_xen(
kfraser@15740 279 (unsigned long)maddr_to_bootstrap_virt(start),
kfraser@15740 280 start >> PAGE_SHIFT, (end-start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
kfraser@15740 281 }
kfraser@15740 282
kfraser@15074 283 static void __init move_memory(
kfraser@15074 284 unsigned long dst, unsigned long src_start, unsigned long src_end)
ian@12677 285 {
kfraser@15740 286 bootstrap_map(src_start, src_end);
kfraser@15740 287 bootstrap_map(dst, dst + src_end - src_start);
kfraser@15074 288 memmove(maddr_to_bootstrap_virt(dst),
kfraser@15074 289 maddr_to_bootstrap_virt(src_start),
ian@12677 290 src_end - src_start);
kfraser@15074 291 }
kfraser@15074 292
kfraser@15074 293 /* A temporary copy of the e820 map that we can mess with during bootstrap. */
kfraser@15074 294 static struct e820map __initdata boot_e820;
kfraser@15074 295
keir@15298 296 struct boot_video_info {
keir@15298 297 u8 orig_x; /* 0x00 */
keir@15298 298 u8 orig_y; /* 0x01 */
keir@15298 299 u8 orig_video_mode; /* 0x02 */
keir@15298 300 u8 orig_video_cols; /* 0x03 */
keir@15298 301 u8 orig_video_lines; /* 0x04 */
keir@15298 302 u8 orig_video_isVGA; /* 0x05 */
keir@15298 303 u16 orig_video_points; /* 0x06 */
keir@15298 304
keir@15298 305 /* VESA graphic mode -- linear frame buffer */
keir@15298 306 u32 capabilities; /* 0x08 */
keir@15298 307 u16 lfb_linelength; /* 0x0c */
keir@15298 308 u16 lfb_width; /* 0x0e */
keir@15298 309 u16 lfb_height; /* 0x10 */
keir@15298 310 u16 lfb_depth; /* 0x12 */
keir@15298 311 u32 lfb_base; /* 0x14 */
keir@15298 312 u32 lfb_size; /* 0x18 */
keir@15298 313 u8 red_size; /* 0x1c */
keir@15298 314 u8 red_pos; /* 0x1d */
keir@15298 315 u8 green_size; /* 0x1e */
keir@15298 316 u8 green_pos; /* 0x1f */
keir@15298 317 u8 blue_size; /* 0x20 */
keir@15298 318 u8 blue_pos; /* 0x21 */
keir@15298 319 u8 rsvd_size; /* 0x22 */
keir@15298 320 u8 rsvd_pos; /* 0x23 */
keir@15298 321 u16 vesapm_seg; /* 0x24 */
keir@15298 322 u16 vesapm_off; /* 0x26 */
keir@16124 323 u16 vesa_attrib; /* 0x28 */
keir@15298 324 };
keir@15298 325
keir@15298 326 static void __init parse_video_info(void)
keir@15298 327 {
keir@15298 328 struct boot_video_info *bvi = &bootsym(boot_vid_info);
keir@15298 329
keir@15298 330 if ( (bvi->orig_video_isVGA == 1) && (bvi->orig_video_mode == 3) )
keir@15298 331 {
keir@15298 332 vga_console_info.video_type = XEN_VGATYPE_TEXT_MODE_3;
keir@15298 333 vga_console_info.u.text_mode_3.font_height = bvi->orig_video_points;
keir@15298 334 vga_console_info.u.text_mode_3.cursor_x = bvi->orig_x;
keir@15298 335 vga_console_info.u.text_mode_3.cursor_y = bvi->orig_y;
keir@15298 336 vga_console_info.u.text_mode_3.rows = bvi->orig_video_lines;
keir@15298 337 vga_console_info.u.text_mode_3.columns = bvi->orig_video_cols;
keir@15298 338 }
keir@15298 339 else if ( bvi->orig_video_isVGA == 0x23 )
keir@15298 340 {
keir@15298 341 vga_console_info.video_type = XEN_VGATYPE_VESA_LFB;
keir@15298 342 vga_console_info.u.vesa_lfb.width = bvi->lfb_width;
keir@15298 343 vga_console_info.u.vesa_lfb.height = bvi->lfb_height;
keir@15298 344 vga_console_info.u.vesa_lfb.bytes_per_line = bvi->lfb_linelength;
keir@15298 345 vga_console_info.u.vesa_lfb.bits_per_pixel = bvi->lfb_depth;
keir@15298 346 vga_console_info.u.vesa_lfb.lfb_base = bvi->lfb_base;
keir@15298 347 vga_console_info.u.vesa_lfb.lfb_size = bvi->lfb_size;
keir@15298 348 vga_console_info.u.vesa_lfb.red_pos = bvi->red_pos;
keir@15298 349 vga_console_info.u.vesa_lfb.red_size = bvi->red_size;
keir@15298 350 vga_console_info.u.vesa_lfb.green_pos = bvi->green_pos;
keir@15298 351 vga_console_info.u.vesa_lfb.green_size = bvi->green_size;
keir@15298 352 vga_console_info.u.vesa_lfb.blue_pos = bvi->blue_pos;
keir@15298 353 vga_console_info.u.vesa_lfb.blue_size = bvi->blue_size;
keir@15298 354 vga_console_info.u.vesa_lfb.rsvd_pos = bvi->rsvd_pos;
keir@15298 355 vga_console_info.u.vesa_lfb.rsvd_size = bvi->rsvd_size;
keir@16124 356 vga_console_info.u.vesa_lfb.gbl_caps = bvi->capabilities;
keir@16124 357 vga_console_info.u.vesa_lfb.mode_attrs = bvi->vesa_attrib;
keir@15298 358 }
keir@15298 359 }
keir@15298 360
keir@16563 361 void __init kexec_reserve_area(struct e820map *e820)
keir@16563 362 {
keir@16563 363 unsigned long kdump_start = kexec_crash_area.start;
keir@16563 364 unsigned long kdump_size = kexec_crash_area.size;
keir@16563 365 static int is_reserved = 0;
keir@16563 366
keir@16563 367 kdump_size = (kdump_size + PAGE_SIZE - 1) & PAGE_MASK;
keir@16563 368
keir@16563 369 if ( (kdump_start == 0) || (kdump_size == 0) || is_reserved )
keir@16563 370 return;
keir@16563 371
keir@16563 372 is_reserved = 1;
keir@16563 373
keir@17674 374 if ( !reserve_e820_ram(e820, kdump_start, kdump_start + kdump_size) )
keir@16563 375 {
keir@16563 376 printk("Kdump: DISABLED (failed to reserve %luMB (%lukB) at 0x%lx)"
keir@16563 377 "\n", kdump_size >> 20, kdump_size >> 10, kdump_start);
keir@16563 378 kexec_crash_area.start = kexec_crash_area.size = 0;
keir@16563 379 }
keir@16563 380 else
keir@16563 381 {
keir@16563 382 printk("Kdump: %luMB (%lukB) at 0x%lx\n",
keir@16563 383 kdump_size >> 20, kdump_size >> 10, kdump_start);
keir@16563 384 }
keir@16563 385 }
keir@16563 386
keir@15082 387 void init_done(void)
keir@15082 388 {
keir@15082 389 extern char __init_begin[], __init_end[];
keir@15082 390
keir@15082 391 /* Free (or page-protect) the init areas. */
keir@18988 392 memset(__init_begin, 0xcc, __init_end - __init_begin); /* int3 poison */
keir@15082 393 #ifndef MEMORY_GUARD
keir@15082 394 init_xenheap_pages(__pa(__init_begin), __pa(__init_end));
keir@15082 395 #endif
keir@15082 396 memguard_guard_range(__init_begin, __init_end - __init_begin);
keir@15082 397 #if defined(CONFIG_X86_64)
keir@15082 398 /* Also zap the mapping in the 1:1 area. */
keir@15082 399 memguard_guard_range(__va(__pa(__init_begin)), __init_end - __init_begin);
keir@15082 400 #endif
keir@15082 401 printk("Freed %ldkB init memory.\n", (long)(__init_end-__init_begin)>>10);
keir@15082 402
keir@15082 403 startup_cpu_idle_loop();
keir@15082 404 }
keir@15082 405
kfraser@15796 406 static char * __init cmdline_cook(char *p)
kfraser@15796 407 {
kfraser@15796 408 p = p ? : "";
kfraser@15796 409 while ( *p == ' ' )
kfraser@15796 410 p++;
kfraser@15796 411 while ( (*p != ' ') && (*p != '\0') )
kfraser@15796 412 p++;
kfraser@15796 413 while ( *p == ' ' )
kfraser@15796 414 p++;
kfraser@15796 415 return p;
kfraser@15796 416 }
kfraser@15796 417
kfraser@15379 418 void __init __start_xen(unsigned long mbi_p)
kaf24@1452 419 {
kfraser@15293 420 char *memmap_type = NULL;
kfraser@15796 421 char *cmdline, *kextra;
kaf24@8457 422 unsigned long _initrd_start = 0, _initrd_len = 0;
kaf24@8457 423 unsigned int initrdidx = 1;
kfraser@15379 424 multiboot_info_t *mbi = __va(mbi_p);
kaf24@8457 425 module_t *mod = (module_t *)__va(mbi->mods_addr);
keir@19135 426 unsigned long nr_pages, modules_length, modules_headroom;
keir@19544 427 int i, j, e820_warn = 0, bytes = 0;
kaf24@5776 428 struct ns16550_defaults ns16550 = {
kaf24@5776 429 .data_bits = 8,
kaf24@5776 430 .parity = 'n',
kaf24@5776 431 .stop_bits = 1
kaf24@5776 432 };
kaf24@3338 433
kfraser@12853 434 extern void early_page_fault(void);
kfraser@12853 435 set_intr_gate(TRAP_page_fault, &early_page_fault);
kfraser@12853 436
kaf24@3338 437 /* Parse the command-line options. */
kfraser@15796 438 cmdline = cmdline_cook((mbi->flags & MBI_CMDLINE) ?
kfraser@15796 439 __va(mbi->cmdline) : NULL);
kfraser@15426 440 if ( (kextra = strstr(cmdline, " -- ")) != NULL )
kfraser@15426 441 {
kfraser@15426 442 /*
kfraser@15426 443 * Options after ' -- ' separator belong to dom0.
kfraser@15426 444 * 1. Orphan dom0's options from Xen's command line.
kfraser@15426 445 * 2. Skip all but final leading space from dom0's options.
kfraser@15426 446 */
kfraser@15426 447 *kextra = '\0';
kfraser@15426 448 kextra += 3;
kfraser@15426 449 while ( kextra[1] == ' ' ) kextra++;
kfraser@15426 450 }
kaf24@9823 451 cmdline_parse(cmdline);
kaf24@3338 452
keir@15298 453 parse_video_info();
keir@15298 454
kaf24@8534 455 set_current((struct vcpu *)0xfffff000); /* debug sanity */
keir@18790 456 idle_vcpu[0] = current;
kaf24@8534 457 set_processor_id(0); /* needed early, for smp_processor_id() */
keir@16378 458 if ( cpu_has_efer )
keir@16378 459 rdmsrl(MSR_EFER, this_cpu(efer));
keir@16267 460 asm volatile ( "mov %%cr4,%0" : "=r" (this_cpu(cr4)) );
kaf24@3338 461
kaf24@5146 462 smp_prepare_boot_cpu();
kaf24@5146 463
kaf24@3338 464 /* We initialise the serial devices very early so we can get debugging. */
kaf24@5776 465 ns16550.io_base = 0x3f8;
kaf24@5776 466 ns16550.irq = 4;
kaf24@5776 467 ns16550_init(0, &ns16550);
kaf24@5776 468 ns16550.io_base = 0x2f8;
kaf24@5776 469 ns16550.irq = 3;
kaf24@5776 470 ns16550_init(1, &ns16550);
keir@19543 471 console_init_preirq();
kaf24@3338 472
kfraser@11947 473 printk("Command line: %s\n", cmdline);
kaf24@9823 474
kfraser@15330 475 printk("Video information:\n");
kfraser@15330 476
kfraser@15330 477 /* Print VGA display mode information. */
keir@15298 478 switch ( vga_console_info.video_type )
keir@15298 479 {
keir@15298 480 case XEN_VGATYPE_TEXT_MODE_3:
kfraser@15330 481 printk(" VGA is text mode %dx%d, font 8x%d\n",
keir@15298 482 vga_console_info.u.text_mode_3.columns,
keir@15298 483 vga_console_info.u.text_mode_3.rows,
keir@15298 484 vga_console_info.u.text_mode_3.font_height);
keir@15298 485 break;
keir@15298 486 case XEN_VGATYPE_VESA_LFB:
kfraser@15330 487 printk(" VGA is graphics mode %dx%d, %d bpp\n",
keir@15298 488 vga_console_info.u.vesa_lfb.width,
keir@15298 489 vga_console_info.u.vesa_lfb.height,
keir@15298 490 vga_console_info.u.vesa_lfb.bits_per_pixel);
keir@15298 491 break;
kfraser@15330 492 default:
kfraser@15330 493 printk(" No VGA detected\n");
kfraser@15330 494 break;
kfraser@15330 495 }
kfraser@15330 496
kfraser@15330 497 /* Print VBE/DDC EDID information. */
kfraser@15330 498 if ( bootsym(boot_edid_caps) != 0x1313 )
kfraser@15330 499 {
kfraser@15330 500 u16 caps = bootsym(boot_edid_caps);
kfraser@15330 501 printk(" VBE/DDC methods:%s%s%s; ",
kfraser@15330 502 (caps & 1) ? " V1" : "",
kfraser@15330 503 (caps & 2) ? " V2" : "",
kfraser@15330 504 !(caps & 3) ? " none" : "");
kfraser@15330 505 printk("EDID transfer time: %d seconds\n", caps >> 8);
kfraser@15330 506 if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 )
kfraser@15330 507 {
kfraser@15330 508 printk(" EDID info not retrieved because ");
kfraser@15330 509 if ( !(caps & 3) )
kfraser@15330 510 printk("no DDC retrieval method detected\n");
kfraser@15330 511 else if ( (caps >> 8) > 5 )
kfraser@15330 512 printk("takes longer than 5 seconds\n");
kfraser@15330 513 else
kfraser@15330 514 printk("of reasons unknown\n");
kfraser@15330 515 }
keir@15298 516 }
keir@15298 517
kfraser@15336 518 printk("Disc information:\n");
kfraser@15336 519 printk(" Found %d MBR signatures\n",
kfraser@15430 520 bootsym(boot_mbr_signature_nr));
kfraser@15336 521 printk(" Found %d EDD information structures\n",
kfraser@15336 522 bootsym(boot_edd_info_nr));
kfraser@15336 523
kaf24@3344 524 /* Check that we have at least one Multiboot module. */
kaf24@3344 525 if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
kfraser@15074 526 EARLY_FAIL("dom0 kernel not specified. "
kfraser@15074 527 "Check bootloader configuration.\n");
kaf24@5011 528
kaf24@5011 529 if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
kfraser@15074 530 EARLY_FAIL("Misaligned CPU0 stack.\n");
kaf24@3338 531
kfraser@15293 532 if ( e820_raw_nr != 0 )
kfraser@15292 533 {
kfraser@15293 534 memmap_type = "Xen-e820";
kfraser@15292 535 }
kfraser@15293 536 else if ( bootsym(lowmem_kb) )
kfraser@15292 537 {
kfraser@15293 538 memmap_type = "Xen-e801";
kfraser@15292 539 e820_raw[0].addr = 0;
kfraser@15293 540 e820_raw[0].size = bootsym(lowmem_kb) << 10;
kfraser@15292 541 e820_raw[0].type = E820_RAM;
kfraser@15292 542 e820_raw[1].addr = 0x100000;
kfraser@15293 543 e820_raw[1].size = bootsym(highmem_kb) << 10;
kfraser@15292 544 e820_raw[1].type = E820_RAM;
kfraser@15292 545 e820_raw_nr = 2;
kfraser@15292 546 }
kfraser@15292 547 else if ( mbi->flags & MBI_MEMMAP )
kaf24@3344 548 {
kfraser@15293 549 memmap_type = "Multiboot-e820";
keir@15988 550 while ( (bytes < mbi->mmap_length) && (e820_raw_nr < E820MAX) )
kaf24@3344 551 {
kaf24@3344 552 memory_map_t *map = __va(mbi->mmap_addr + bytes);
kaf24@8402 553
kaf24@8402 554 /*
kaf24@8403 555 * This is a gross workaround for a BIOS bug. Some bootloaders do
kaf24@8402 556 * not write e820 map entries into pre-zeroed memory. This is
kaf24@8402 557 * okay if the BIOS fills in all fields of the map entry, but
kaf24@8402 558 * some broken BIOSes do not bother to write the high word of
kaf24@8402 559 * the length field if the length is smaller than 4GB. We
kaf24@8402 560 * detect and fix this by flagging sections below 4GB that
kaf24@8403 561 * appear to be larger than 4GB in size.
kaf24@8402 562 */
kaf24@8403 563 if ( (map->base_addr_high == 0) && (map->length_high != 0) )
kaf24@8402 564 {
kfraser@15292 565 if ( !e820_warn )
kfraser@15292 566 {
kfraser@15292 567 printk("WARNING: Buggy e820 map detected and fixed "
kfraser@15292 568 "(truncated length fields).\n");
kfraser@15292 569 e820_warn = 1;
kfraser@15292 570 }
kaf24@8402 571 map->length_high = 0;
kaf24@8402 572 }
kaf24@8402 573
kaf24@3344 574 e820_raw[e820_raw_nr].addr =
kaf24@3344 575 ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
kaf24@3344 576 e820_raw[e820_raw_nr].size =
kaf24@3344 577 ((u64)map->length_high << 32) | (u64)map->length_low;
kfraser@15799 578 e820_raw[e820_raw_nr].type = map->type;
kaf24@3344 579 e820_raw_nr++;
kaf24@8402 580
kaf24@3344 581 bytes += map->size + 4;
kaf24@3344 582 }
kaf24@3344 583 }
kaf24@3344 584 else if ( mbi->flags & MBI_MEMLIMITS )
kaf24@3344 585 {
kfraser@15293 586 memmap_type = "Multiboot-e801";
kaf24@3344 587 e820_raw[0].addr = 0;
kaf24@3344 588 e820_raw[0].size = mbi->mem_lower << 10;
kaf24@3344 589 e820_raw[0].type = E820_RAM;
kaf24@3354 590 e820_raw[1].addr = 0x100000;
kaf24@3354 591 e820_raw[1].size = mbi->mem_upper << 10;
kaf24@3354 592 e820_raw[1].type = E820_RAM;
kaf24@3344 593 e820_raw_nr = 2;
kaf24@3344 594 }
kaf24@3344 595 else
kaf24@3344 596 {
kfraser@15074 597 EARLY_FAIL("Bootloader provided no memory information.\n");
kaf24@3344 598 }
kaf24@3344 599
kaf24@13427 600 /* Sanitise the raw E820 map to produce a final clean version. */
kfraser@15293 601 max_page = init_e820(memmap_type, e820_raw, &e820_raw_nr);
kaf24@3338 602
keir@16563 603 /* Create a temporary copy of the E820 map. */
kfraser@15074 604 memcpy(&boot_e820, &e820, sizeof(e820));
keir@16563 605
keir@16563 606 /* Early kexec reservation (explicit static start address). */
keir@16563 607 kexec_reserve_area(&boot_e820);
kaf24@6111 608
kfraser@15074 609 /*
keir@15077 610 * Iterate backwards over all superpage-aligned RAM regions.
kfraser@15074 611 *
kfraser@15074 612 * We require superpage alignment because the boot allocator is not yet
kfraser@15074 613 * initialised. Hence we can only map superpages in the address range
kfraser@15074 614 * 0 to BOOTSTRAP_DIRECTMAP_END, as this is guaranteed not to require
kfraser@15074 615 * dynamic allocation of pagetables.
kfraser@15074 616 *
kfraser@15074 617 * As well as mapping superpages in that range, in preparation for
kfraser@15074 618 * initialising the boot allocator, we also look for a region to which
kfraser@15074 619 * we can relocate the dom0 kernel and other multiboot modules. Also, on
kfraser@15074 620 * x86/64, we relocate Xen to higher memory.
kfraser@15074 621 */
keir@19544 622 modules_length = 0;
keir@19544 623 for ( i = 0; i < mbi->mods_count; i++ )
keir@19544 624 modules_length += mod[i].mod_end - mod[i].mod_start;
keir@19076 625
keir@19135 626 /* ensure mod[0] is mapped before parsing */
keir@19135 627 bootstrap_map(mod[0].mod_start, mod[0].mod_end);
keir@19135 628 modules_headroom = bzimage_headroom(
keir@19135 629 (char *)(unsigned long)mod[0].mod_start,
keir@19135 630 (unsigned long)(mod[0].mod_end - mod[0].mod_start));
keir@19135 631
keir@15077 632 for ( i = boot_e820.nr_map-1; i >= 0; i-- )
kfraser@15074 633 {
kfraser@15074 634 uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
kaf24@6134 635
keir@16563 636 /* Superpage-aligned chunks from 16MB to BOOTSTRAP_DIRECTMAP_END. */
kfraser@15074 637 s = (boot_e820.map[i].addr + mask) & ~mask;
kfraser@15074 638 e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
keir@16563 639 s = max_t(uint64_t, s, 16 << 20);
kfraser@15074 640 e = min_t(uint64_t, e, BOOTSTRAP_DIRECTMAP_END);
kfraser@15074 641 if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
kaf24@3354 642 continue;
kaf24@6111 643
kfraser@15074 644 /* Map the chunk. No memory will need to be allocated to do this. */
kfraser@15074 645 map_pages_to_xen(
kfraser@15074 646 (unsigned long)maddr_to_bootstrap_virt(s),
kfraser@15074 647 s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
kaf24@6111 648
kfraser@14084 649 #if defined(CONFIG_X86_64)
keir@19190 650 /* Relocate Xen image, allocation bitmap, and one page of padding. */
keir@19190 651 #define reloc_size ((__pa(&_end) + max_page/8 + PAGE_SIZE + mask) & ~mask)
kfraser@15074 652 /* Is the region suitable for relocating Xen? */
keir@19055 653 if ( !xen_phys_start && ((e-s) >= reloc_size) )
kaf24@5003 654 {
kfraser@15074 655 extern l2_pgentry_t l2_xenmap[];
kfraser@15074 656 l4_pgentry_t *pl4e;
kfraser@15074 657 l3_pgentry_t *pl3e;
kfraser@15074 658 l2_pgentry_t *pl2e;
keir@16888 659 int i, j, k;
kfraser@15074 660
kfraser@15074 661 /* Select relocation address. */
keir@19055 662 e -= reloc_size;
kfraser@15074 663 xen_phys_start = e;
kfraser@15292 664 bootsym(trampoline_xen_phys_start) = e;
kfraser@15074 665
kfraser@15074 666 /*
kfraser@15074 667 * Perform relocation to new physical address.
kfraser@15074 668 * Before doing so we must sync static/global data with main memory
kfraser@15074 669 * with a barrier(). After this we must *not* modify static/global
kfraser@15074 670 * data until after we have switched to the relocated pagetables!
kfraser@15074 671 */
kfraser@15074 672 barrier();
kfraser@15074 673 move_memory(e, 0, __pa(&_end) - xen_phys_start);
kfraser@15074 674
kfraser@15379 675 /* Poison low 1MB to detect stray pointers to physical 0-1MB. */
kfraser@15379 676 memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
kfraser@15379 677
kfraser@15074 678 /* Walk initial pagetables, relocating page directory entries. */
kfraser@15074 679 pl4e = __va(__pa(idle_pg_table));
kfraser@15074 680 for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
kfraser@15074 681 {
kfraser@15074 682 if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
kfraser@15074 683 continue;
kfraser@15074 684 *pl4e = l4e_from_intpte(l4e_get_intpte(*pl4e) +
kfraser@15074 685 xen_phys_start);
kfraser@15074 686 pl3e = l4e_to_l3e(*pl4e);
kfraser@15074 687 for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ )
kfraser@15074 688 {
keir@16921 689 /* Not present, 1GB mapping, or already relocated? */
kfraser@15074 690 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ||
keir@16921 691 (l3e_get_flags(*pl3e) & _PAGE_PSE) ||
kfraser@15074 692 (l3e_get_pfn(*pl3e) > 0x1000) )
kfraser@15074 693 continue;
kfraser@15074 694 *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) +
kfraser@15074 695 xen_phys_start);
keir@16888 696 pl2e = l3e_to_l2e(*pl3e);
keir@16888 697 for ( k = 0; k < L2_PAGETABLE_ENTRIES; k++, pl2e++ )
keir@16888 698 {
keir@16888 699 /* Not present, PSE, or already relocated? */
keir@16888 700 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ||
keir@16888 701 (l2e_get_flags(*pl2e) & _PAGE_PSE) ||
keir@16888 702 (l2e_get_pfn(*pl2e) > 0x1000) )
keir@16888 703 continue;
keir@16888 704 *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
keir@16888 705 xen_phys_start);
keir@16888 706 }
kfraser@15074 707 }
kfraser@15074 708 }
kfraser@15074 709
kfraser@15074 710 /* The only data mappings to be relocated are in the Xen area. */
kfraser@15074 711 pl2e = __va(__pa(l2_xenmap));
keir@16888 712 *pl2e++ = l2e_from_pfn(xen_phys_start >> PAGE_SHIFT,
keir@16888 713 PAGE_HYPERVISOR | _PAGE_PSE);
keir@16888 714 for ( i = 1; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ )
kfraser@15074 715 {
kfraser@15074 716 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
kfraser@15074 717 continue;
kfraser@15074 718 *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
kfraser@15074 719 xen_phys_start);
kfraser@15074 720 }
kfraser@15074 721
kfraser@15074 722 /* Re-sync the stack and then switch to relocated pagetables. */
kfraser@15074 723 asm volatile (
kfraser@15074 724 "rep movsb ; " /* re-sync the stack */
kfraser@15074 725 "movq %%cr4,%%rsi ; "
kfraser@15074 726 "andb $0x7f,%%sil ; "
kfraser@15074 727 "movq %%rsi,%%cr4 ; " /* CR4.PGE == 0 */
kfraser@15074 728 "movq %0,%%cr3 ; " /* CR3 == new pagetables */
kfraser@15074 729 "orb $0x80,%%sil ; "
kfraser@15074 730 "movq %%rsi,%%cr4 " /* CR4.PGE == 1 */
kfraser@15074 731 : : "r" (__pa(idle_pg_table)), "S" (cpu0_stack),
kfraser@15074 732 "D" (__va(__pa(cpu0_stack))), "c" (STACK_SIZE) : "memory" );
kaf24@5003 733 }
kaf24@5003 734 #endif
keir@15077 735
keir@15077 736 /* Is the region suitable for relocating the multiboot modules? */
keir@19076 737 if ( !initial_images_start && (s < e) &&
keir@19076 738 ((e-s) >= (modules_length+modules_headroom)) )
keir@15077 739 {
kfraser@15489 740 initial_images_end = e;
kfraser@15489 741 e = (e - modules_length) & PAGE_MASK;
keir@15077 742 initial_images_start = e;
keir@19076 743 e -= modules_headroom;
keir@19076 744 initial_images_base = e;
keir@19544 745 e += modules_length + modules_headroom;
keir@19544 746 for ( j = mbi->mods_count-1; j >= 0; j-- )
keir@19544 747 {
keir@19544 748 e -= mod[j].mod_end - mod[j].mod_start;
keir@19544 749 move_memory(e, mod[j].mod_start, mod[j].mod_end);
keir@19544 750 mod[j].mod_end += e - mod[j].mod_start;
keir@19544 751 mod[j].mod_start = e;
keir@19544 752 }
keir@15077 753 }
kfraser@15489 754
kfraser@15489 755 if ( !kexec_crash_area.start && (s < e) &&
kfraser@15489 756 ((e-s) >= kexec_crash_area.size) )
kfraser@15489 757 {
kfraser@15489 758 e = (e - kexec_crash_area.size) & PAGE_MASK;
kfraser@15489 759 kexec_crash_area.start = e;
kfraser@15489 760 }
kaf24@3354 761 }
kaf24@3354 762
kfraser@15074 763 if ( !initial_images_start )
kfraser@15074 764 EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
keir@19076 765 reserve_e820_ram(&boot_e820, initial_images_base, initial_images_end);
kfraser@15074 766
keir@19055 767 /* Initialise boot heap. */
keir@19055 768 allocator_bitmap_end = init_boot_allocator(__pa(&_end));
keir@19055 769 #if defined(CONFIG_X86_32)
keir@19061 770 xenheap_initial_phys_start = allocator_bitmap_end;
keir@19061 771 xenheap_phys_end = DIRECTMAP_MBYTES << 20;
keir@19055 772 #else
kfraser@15074 773 if ( !xen_phys_start )
kfraser@15074 774 EARLY_FAIL("Not enough memory to relocate Xen.\n");
keir@19055 775 reserve_e820_ram(&boot_e820, __pa(&_start), allocator_bitmap_end);
kfraser@15074 776 #endif
kfraser@15074 777
keir@16563 778 /* Late kexec reservation (dynamic start address). */
keir@16563 779 kexec_reserve_area(&boot_e820);
kfraser@15489 780
kfraser@15074 781 /*
keir@16624 782 * With the boot allocator now initialised, we can walk every RAM region
keir@16624 783 * and map it in its entirety (on x86/64, at least) and notify it to the
kfraser@15074 784 * boot allocator.
kfraser@15074 785 */
kfraser@15074 786 for ( i = 0; i < boot_e820.nr_map; i++ )
kfraser@15074 787 {
keir@16563 788 uint64_t s, e, map_s, map_e, mask = PAGE_SIZE - 1;
kfraser@15074 789
kfraser@15074 790 /* Only page alignment required now. */
kfraser@15074 791 s = (boot_e820.map[i].addr + mask) & ~mask;
kfraser@15074 792 e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
keir@16563 793 #if defined(CONFIG_X86_32)
keir@16563 794 s = max_t(uint64_t, s, xenheap_phys_end);
keir@16563 795 #else
keir@16563 796 s = max_t(uint64_t, s, 1<<20);
keir@16563 797 #endif
kfraser@15074 798 if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
kfraser@15074 799 continue;
kfraser@15074 800
keir@16563 801 /* Need to create mappings above 16MB. */
keir@16563 802 map_s = max_t(uint64_t, s, 16<<20);
kfraser@15074 803 map_e = e;
keir@16563 804 #if defined(CONFIG_X86_32) /* mappings are truncated on x86_32 */
kfraser@15074 805 map_e = min_t(uint64_t, map_e, BOOTSTRAP_DIRECTMAP_END);
kfraser@15074 806 #endif
keir@16563 807
keir@16563 808 /* Pass mapped memory to allocator /before/ creating new mappings. */
keir@16624 809 init_boot_pages(s, min_t(uint64_t, map_s, e));
keir@16563 810
keir@16563 811 /* Create new mappings /before/ passing memory to the allocator. */
keir@16563 812 if ( map_s < map_e )
kfraser@15074 813 map_pages_to_xen(
keir@16563 814 (unsigned long)maddr_to_bootstrap_virt(map_s),
keir@16563 815 map_s >> PAGE_SHIFT, (map_e-map_s) >> PAGE_SHIFT,
keir@16563 816 PAGE_HYPERVISOR);
kfraser@15074 817
keir@16563 818 /* Pass remainder of this memory chunk to the allocator. */
keir@16624 819 init_boot_pages(map_s, e);
kfraser@15074 820 }
kfraser@15074 821
kaf24@5003 822 memguard_init();
kaf24@4950 823
kfraser@15074 824 nr_pages = 0;
kfraser@15074 825 for ( i = 0; i < e820.nr_map; i++ )
kfraser@15074 826 if ( e820.map[i].type == E820_RAM )
kfraser@15074 827 nr_pages += e820.map[i].size >> PAGE_SHIFT;
ian@12681 828 printk("System RAM: %luMB (%lukB)\n",
kaf24@3354 829 nr_pages >> (20 - PAGE_SHIFT),
kaf24@3354 830 nr_pages << (PAGE_SHIFT - 10));
kaf24@7220 831 total_pages = nr_pages;
kaf24@3354 832
kfraser@11296 833 /* Sanity check for unwanted bloat of certain hypercall structures. */
kfraser@11296 834 BUILD_BUG_ON(sizeof(((struct xen_platform_op *)0)->u) !=
kfraser@11296 835 sizeof(((struct xen_platform_op *)0)->u.pad));
kfraser@11296 836 BUILD_BUG_ON(sizeof(((struct xen_domctl *)0)->u) !=
kfraser@11296 837 sizeof(((struct xen_domctl *)0)->u.pad));
kfraser@11296 838 BUILD_BUG_ON(sizeof(((struct xen_sysctl *)0)->u) !=
kfraser@11296 839 sizeof(((struct xen_sysctl *)0)->u.pad));
kaf24@7388 840
kaf24@9878 841 BUILD_BUG_ON(sizeof(start_info_t) > PAGE_SIZE);
kaf24@9878 842 BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
ack@13292 843 BUILD_BUG_ON(sizeof(struct vcpu_info) != 64);
kaf24@7744 844
ack@13291 845 #ifdef CONFIG_COMPAT
ack@13291 846 BUILD_BUG_ON(sizeof(((struct compat_platform_op *)0)->u) !=
ack@13291 847 sizeof(((struct compat_platform_op *)0)->u.pad));
ack@13291 848 BUILD_BUG_ON(sizeof(start_info_compat_t) > PAGE_SIZE);
ack@13292 849 BUILD_BUG_ON(sizeof(struct compat_vcpu_info) != 64);
ack@13291 850 #endif
ack@13291 851
kfraser@10492 852 /* Check definitions in public headers match internal defs. */
kaf24@9878 853 BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
kaf24@8521 854 #ifdef HYPERVISOR_VIRT_END
kaf24@9878 855 BUILD_BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END);
kaf24@8521 856 #endif
kfraser@10492 857 BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START);
kfraser@10492 858 BUILD_BUG_ON(MACH2PHYS_VIRT_END != RO_MPT_VIRT_END);
kaf24@8521 859
kaf24@3354 860 init_frametable();
kaf24@3338 861
kfraser@11971 862 acpi_boot_table_init();
kfraser@11971 863
kfraser@11971 864 acpi_numa_init();
kfraser@11971 865
kfraser@11971 866 numa_initmem_init(0, max_page);
kfraser@11971 867
keir@19055 868 #if defined(CONFIG_X86_32)
keir@19055 869 /* Initialise the Xen heap. */
keir@19061 870 init_xenheap_pages(xenheap_initial_phys_start, xenheap_phys_end);
keir@19061 871 nr_pages = (xenheap_phys_end - xenheap_initial_phys_start) >> PAGE_SHIFT;
kaf24@6111 872 printk("Xen heap: %luMB (%lukB)\n",
kaf24@6111 873 nr_pages >> (20 - PAGE_SHIFT),
kaf24@6111 874 nr_pages << (PAGE_SHIFT - 10));
keir@19055 875 #endif
kaf24@3338 876
keir@14680 877 end_boot_allocator();
keir@19055 878 early_boot = 0;
keir@14680 879
keir@19055 880 #if defined(CONFIG_X86_64)
keir@19055 881 vesa_init();
keir@19055 882 #endif
kaf24@3338 883
keir@17444 884 softirq_init();
keir@17444 885
kaf24@8459 886 early_cpu_init();
kaf24@8459 887
kaf24@8459 888 paging_init();
kaf24@8459 889
keir@16274 890 tboot_probe();
keir@16274 891
kaf24@8459 892 /* Unmap the first page of CPU0's stack. */
kaf24@8459 893 memguard_guard_stack(cpu0_stack);
kaf24@8459 894
kaf24@8459 895 open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
kaf24@8459 896
kaf24@8459 897 if ( opt_watchdog )
kaf24@8459 898 nmi_watchdog = NMI_LOCAL_APIC;
kaf24@8459 899
kaf24@8459 900 sort_exception_tables();
kaf24@8459 901
kaf24@8459 902 find_smp_config();
kaf24@8459 903
kaf24@8459 904 dmi_scan_machine();
kaf24@8459 905
kaf24@8459 906 generic_apic_probe();
kaf24@8459 907
keir@17552 908 if ( x2apic_is_available() )
keir@17552 909 enable_x2apic();
keir@17552 910
kaf24@8459 911 acpi_boot_init();
kaf24@8459 912
kfraser@11971 913 init_cpu_to_node();
kfraser@11971 914
kfraser@11241 915 if ( smp_found_config )
kaf24@8459 916 get_smp_config();
kaf24@8459 917
keir@15083 918 #ifdef CONFIG_X86_64
keir@15083 919 /* Low mappings were only needed for some BIOS table parsing. */
keir@15083 920 zap_low_mappings();
keir@15083 921 #endif
keir@15083 922
kaf24@8459 923 init_apic_mappings();
kaf24@8459 924
kaf24@8459 925 init_IRQ();
kaf24@8459 926
kfraser@11241 927 percpu_init_areas();
kfraser@11241 928
kfraser@15815 929 xsm_init(&initrdidx, mbi, initial_images_start);
kfraser@15815 930
kfraser@11240 931 init_idle_domain();
kfraser@11240 932
kaf24@8459 933 trap_init();
kaf24@8459 934
kaf24@13662 935 rcu_init();
kaf24@13662 936
kaf24@8586 937 timer_init();
kaf24@8459 938
kaf24@8459 939 early_time_init();
kaf24@8459 940
kaf24@8459 941 arch_init_memory();
kaf24@8459 942
kaf24@8459 943 identify_cpu(&boot_cpu_data);
kaf24@8459 944 if ( cpu_has_fxsr )
kaf24@8459 945 set_in_cr4(X86_CR4_OSFXSR);
kaf24@8459 946 if ( cpu_has_xmm )
kaf24@8459 947 set_in_cr4(X86_CR4_OSXMMEXCPT);
keir@18920 948
keir@18920 949 local_irq_enable();
keir@18920 950
kfraser@15747 951 #ifdef CONFIG_X86_64
kfraser@15747 952 vesa_mtrr_init();
kfraser@15747 953 #endif
kaf24@8459 954
kaf24@8459 955 if ( opt_nosmp )
kaf24@8459 956 max_cpus = 0;
kaf24@8459 957
kaf24@8459 958 smp_prepare_cpus(max_cpus);
kaf24@8459 959
keir@18920 960 spin_debug_enable();
keir@18920 961
kaf24@8459 962 /*
kaf24@8459 963 * Initialise higher-level timer functions. We do this fairly late
kaf24@8459 964 * (post-SMP) because the time bases and scale factors need to be updated
kaf24@8459 965 * regularly, and SMP initialisation can cause a long delay with
kaf24@8459 966 * interrupts not yet enabled.
kaf24@8459 967 */
kaf24@8459 968 init_xen_time();
kaf24@8459 969
kaf24@8459 970 initialize_keytable();
kaf24@8459 971
keir@19543 972 console_init_postirq();
kaf24@8459 973
kaf24@8459 974 for_each_present_cpu ( i )
kaf24@8459 975 {
kaf24@8459 976 if ( num_online_cpus() >= max_cpus )
kaf24@8459 977 break;
kaf24@8459 978 if ( !cpu_online(i) )
kaf24@13662 979 {
kaf24@13662 980 rcu_online_cpu(i);
kaf24@8459 981 __cpu_up(i);
kaf24@13662 982 }
kfraser@11971 983
kfraser@11998 984 /* Set up cpu_to_node[]. */
kfraser@11971 985 srat_detect_node(i);
kfraser@11998 986 /* Set up node_to_cpumask based on cpu_to_node[]. */
kfraser@11971 987 numa_add_cpu(i);
kaf24@8459 988 }
kaf24@8459 989
kaf24@8459 990 printk("Brought up %ld CPUs\n", (long)num_online_cpus());
kaf24@8459 991 smp_cpus_done(max_cpus);
kaf24@8459 992
kaf24@9117 993 initialise_gdb(); /* could be moved earlier */
kaf24@9117 994
kaf24@8459 995 do_initcalls();
kaf24@8459 996
kaf24@8594 997 if ( opt_watchdog )
kaf24@8594 998 watchdog_enable();
keir@19259 999
keir@19259 1000 if ( !tboot_protect_mem_regions() )
keir@19259 1001 panic("Could not protect TXT memory regions\n");
kaf24@8459 1002
kaf24@8459 1003 /* Create initial domain 0. */
keir@19266 1004 dom0 = domain_create(0, DOMCRF_s3_integrity, DOM0_SSIDREF);
keir@19788 1005 if ( (dom0 == NULL) || (alloc_dom0_vcpu0() == NULL) )
kaf24@8459 1006 panic("Error creating domain 0\n");
kaf24@8459 1007
kfraser@12210 1008 dom0->is_privileged = 1;
keir@16856 1009 dom0->target = NULL;
kfraser@12210 1010
kaf24@8459 1011 /* Grab the DOM0 command line. */
kaf24@8459 1012 cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
kfraser@15426 1013 if ( (cmdline != NULL) || (kextra != NULL) )
kaf24@8459 1014 {
kaf24@8459 1015 static char dom0_cmdline[MAX_GUEST_CMDLINE];
kaf24@8459 1016
kfraser@15796 1017 cmdline = cmdline_cook(cmdline);
kfraser@15796 1018 safe_strcpy(dom0_cmdline, cmdline);
kaf24@8459 1019
kfraser@15426 1020 if ( kextra != NULL )
kfraser@15426 1021 /* kextra always includes exactly one leading space. */
kfraser@15426 1022 safe_strcat(dom0_cmdline, kextra);
kfraser@15426 1023
kaf24@8459 1024 /* Append any extra parameters. */
kfraser@13691 1025 if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
kfraser@13691 1026 safe_strcat(dom0_cmdline, " noapic");
kaf24@8459 1027 if ( acpi_skip_timer_override &&
kfraser@13691 1028 !strstr(dom0_cmdline, "acpi_skip_timer_override") )
kfraser@13691 1029 safe_strcat(dom0_cmdline, " acpi_skip_timer_override");
keir@16165 1030 if ( (strlen(acpi_param) == 0) && acpi_disabled )
keir@16165 1031 {
keir@16165 1032 printk("ACPI is disabled, notifying Domain 0 (acpi=off)\n");
keir@16165 1033 safe_strcpy(acpi_param, "off");
keir@16165 1034 }
kfraser@13691 1035 if ( (strlen(acpi_param) != 0) && !strstr(dom0_cmdline, "acpi=") )
kaf24@8459 1036 {
kfraser@13691 1037 safe_strcat(dom0_cmdline, " acpi=");
kfraser@13691 1038 safe_strcat(dom0_cmdline, acpi_param);
kaf24@8459 1039 }
kfraser@13691 1040
kfraser@13691 1041 cmdline = dom0_cmdline;
kaf24@8459 1042 }
kaf24@8459 1043
kaf24@8459 1044 if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
kaf24@8459 1045 {
keir@19544 1046 _initrd_start = mod[initrdidx].mod_start;
kaf24@8459 1047 _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
kaf24@8459 1048 }
kaf24@8459 1049
keir@18180 1050 if ( xen_cpuidle )
keir@18180 1051 xen_processor_pmbits |= XEN_PROCESSOR_PM_CX;
keir@18180 1052
kaf24@8459 1053 /*
kaf24@8459 1054 * We're going to setup domain0 using the module(s) that we stashed safely
kaf24@8459 1055 * above our heap. The second module, if present, is an initrd ramdisk.
kaf24@8459 1056 */
kaf24@8459 1057 if ( construct_dom0(dom0,
keir@19076 1058 initial_images_base,
keir@19076 1059 initial_images_start,
kaf24@8459 1060 mod[0].mod_end-mod[0].mod_start,
kaf24@8459 1061 _initrd_start,
kaf24@8459 1062 _initrd_len,
kaf24@8459 1063 cmdline) != 0)
kaf24@8459 1064 panic("Could not set up DOM0 guest OS\n");
kaf24@8459 1065
kaf24@8459 1066 /* Scrub RAM that is still free and so may go to an unprivileged domain. */
kaf24@8459 1067 scrub_heap_pages();
kaf24@8459 1068
kaf24@8459 1069 init_trace_bufs();
kaf24@8459 1070
keir@19646 1071 init_tmem();
keir@19646 1072
kaf24@10502 1073 console_endboot();
kaf24@8459 1074
kaf24@8459 1075 /* Hide UART from DOM0 if we're using it */
kaf24@8459 1076 serial_endboot();
kaf24@8459 1077
kaf24@8459 1078 domain_unpause_by_systemcontroller(dom0);
kaf24@8459 1079
keir@15082 1080 reset_stack_and_jump(init_done);
kaf24@8459 1081 }
kaf24@8459 1082
ian@13763 1083 void arch_get_xen_caps(xen_capabilities_info_t *info)
iap10@6721 1084 {
kfraser@14997 1085 /* Interface name is always xen-3.0-* for Xen-3.x. */
kfraser@14997 1086 int major = 3, minor = 0;
keir@13754 1087 char s[32];
keir@13754 1088
ian@13763 1089 (*info)[0] = '\0';
iap10@6721 1090
keir@17618 1091 #if defined(CONFIG_X86_32)
kaf24@6725 1092
keir@13754 1093 snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor);
ian@13763 1094 safe_strcat(*info, s);
kaf24@6725 1095 if ( hvm_enabled )
iap10@6721 1096 {
keir@13754 1097 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor);
ian@13763 1098 safe_strcat(*info, s);
keir@13754 1099 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor);
ian@13763 1100 safe_strcat(*info, s);
iap10@6721 1101 }
iap10@6721 1102
kaf24@6725 1103 #elif defined(CONFIG_X86_64)
iap10@6721 1104
keir@13754 1105 snprintf(s, sizeof(s), "xen-%d.%d-x86_64 ", major, minor);
ian@13763 1106 safe_strcat(*info, s);
ack@13288 1107 #ifdef CONFIG_COMPAT
keir@13754 1108 snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor);
ian@13763 1109 safe_strcat(*info, s);
ack@13288 1110 #endif
kaf24@6725 1111 if ( hvm_enabled )
iap10@6721 1112 {
keir@13754 1113 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor);
ian@13763 1114 safe_strcat(*info, s);
keir@13754 1115 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor);
ian@13763 1116 safe_strcat(*info, s);
keir@13754 1117 snprintf(s, sizeof(s), "hvm-%d.%d-x86_64 ", major, minor);
ian@13763 1118 safe_strcat(*info, s);
iap10@6721 1119 }
kaf24@6725 1120
iap10@6721 1121 #endif
iap10@6721 1122 }
iap10@6721 1123
keir@17738 1124 int xen_in_range(paddr_t start, paddr_t end)
keir@17729 1125 {
keir@19283 1126 int i;
keir@19283 1127 static struct {
keir@19283 1128 paddr_t s, e;
keir@19568 1129 } xen_regions[4];
keir@19055 1130
keir@19283 1131 /* initialize first time */
keir@19283 1132 if ( !xen_regions[0].s )
keir@19283 1133 {
keir@19577 1134 extern char __init_begin[], __bss_start[];
keir@19283 1135 extern unsigned long allocator_bitmap_end;
keir@19283 1136
keir@19283 1137 /* S3 resume code (and other real mode trampoline code) */
keir@19283 1138 xen_regions[0].s = bootsym_phys(trampoline_start);
keir@19283 1139 xen_regions[0].e = bootsym_phys(trampoline_end);
keir@19283 1140 /* hypervisor code + data */
keir@19283 1141 xen_regions[1].s =__pa(&_stext);
keir@19283 1142 xen_regions[1].e = __pa(&__init_begin);
keir@19283 1143 /* per-cpu data */
keir@19283 1144 xen_regions[2].s = __pa(&__per_cpu_start);
keir@19577 1145 xen_regions[2].e = xen_regions[2].s +
keir@19577 1146 (((paddr_t)last_cpu(cpu_possible_map) + 1) << PERCPU_SHIFT);
keir@19283 1147 /* bss + boot allocator bitmap */
keir@19283 1148 xen_regions[3].s = __pa(&__bss_start);
keir@19283 1149 xen_regions[3].e = allocator_bitmap_end;
keir@19283 1150 }
keir@19283 1151
keir@19283 1152 for ( i = 0; i < ARRAY_SIZE(xen_regions); i++ )
keir@19283 1153 {
keir@19283 1154 if ( (start < xen_regions[i].e) && (end > xen_regions[i].s) )
keir@19283 1155 return 1;
keir@19283 1156 }
keir@19283 1157
keir@19283 1158 return 0;
keir@17729 1159 }
keir@17729 1160
kaf24@3914 1161 /*
kaf24@3914 1162 * Local variables:
kaf24@3914 1163 * mode: C
kaf24@3914 1164 * c-set-style: "BSD"
kaf24@3914 1165 * c-basic-offset: 4
kaf24@3914 1166 * tab-width: 4
kaf24@3914 1167 * indent-tabs-mode: nil
kaf24@3988 1168 * End:
kaf24@3914 1169 */