ia64/xen-unstable

annotate xen/arch/x86/setup.c @ 19283:73c4e6dbbb60

vt-d: Better restrict memory ranges considered to be in Xen

The current implementation of xen_in_range() misses several memory
ranges that are used by the hypervisor and thus shouldn't get mapped
into dom0's VT-d tables. This patch should make the check complete.

This patch is only against x86 because I'm not familiar enough with
IA64 to know how much, if any, of these checks apply there.

Signed-off-by: Joseph Cihula <joseph.cihula@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Mar 06 19:06:30 2009 +0000 (2009-03-06)
parents 43019597f85c
children 11650ecdd31e
rev   line source
kaf24@1452 1 #include <xen/config.h>
kaf24@1452 2 #include <xen/init.h>
kaf24@1452 3 #include <xen/lib.h>
kaf24@1452 4 #include <xen/sched.h>
cl349@5247 5 #include <xen/domain.h>
kaf24@1452 6 #include <xen/serial.h>
kaf24@1506 7 #include <xen/softirq.h>
kaf24@1452 8 #include <xen/acpi.h>
kaf24@3338 9 #include <xen/console.h>
iap10@4287 10 #include <xen/serial.h>
kaf24@3338 11 #include <xen/trace.h>
kaf24@3338 12 #include <xen/multiboot.h>
kaf24@5356 13 #include <xen/domain_page.h>
kfraser@10890 14 #include <xen/version.h>
kaf24@9117 15 #include <xen/gdbstub.h>
kaf24@9818 16 #include <xen/percpu.h>
kfraser@11296 17 #include <xen/hypercall.h>
kfraser@11601 18 #include <xen/keyhandler.h>
kfraser@11971 19 #include <xen/numa.h>
kaf24@13662 20 #include <xen/rcupdate.h>
keir@15298 21 #include <xen/vga.h>
keir@15988 22 #include <xen/dmi.h>
iap10@6721 23 #include <public/version.h>
ack@13291 24 #ifdef CONFIG_COMPAT
ack@13291 25 #include <compat/platform.h>
ack@13291 26 #include <compat/xen.h>
ack@13291 27 #endif
kaf24@1452 28 #include <asm/bitops.h>
kaf24@1452 29 #include <asm/smp.h>
kaf24@1452 30 #include <asm/processor.h>
kaf24@1452 31 #include <asm/mpspec.h>
kaf24@1452 32 #include <asm/apic.h>
kaf24@1452 33 #include <asm/desc.h>
Tim@13909 34 #include <asm/paging.h>
kaf24@3344 35 #include <asm/e820.h>
kfraser@15819 36 #include <xsm/acm/acm_hooks.h>
ian@12677 37 #include <xen/kexec.h>
kfraser@15336 38 #include <asm/edd.h>
kfraser@15815 39 #include <xsm/xsm.h>
keir@16274 40 #include <asm/tboot.h>
kaf24@3338 41
keir@19076 42 int __init bzimage_headroom(char *image_start, unsigned long image_length);
keir@19076 43
kfraser@15074 44 #if defined(CONFIG_X86_64)
kfraser@15597 45 #define BOOTSTRAP_DIRECTMAP_END (1UL << 32) /* 4GB */
kfraser@15074 46 #define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
kfraser@15074 47 #else
kfraser@15597 48 #define BOOTSTRAP_DIRECTMAP_END (1UL << 30) /* 1GB */
kfraser@15074 49 #define maddr_to_bootstrap_virt(m) ((void *)(long)(m))
kfraser@15074 50 #endif
kfraser@15074 51
kaf24@5211 52 extern void generic_apic_probe(void);
kfraser@11971 53 extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
kaf24@5157 54
kfraser@15330 55 extern u16 boot_edid_caps;
kfraser@15330 56 extern u8 boot_edid_info[128];
kfraser@15330 57 extern struct boot_video_info boot_vid_info;
kfraser@15330 58
kaf24@5146 59 /* opt_nosmp: If true, secondary processors are ignored. */
kaf24@5900 60 static int opt_nosmp = 0;
kaf24@5146 61 boolean_param("nosmp", opt_nosmp);
kaf24@5146 62
kaf24@5146 63 /* maxcpus: maximum number of CPUs to activate. */
kaf24@5146 64 static unsigned int max_cpus = NR_CPUS;
shand@11156 65 integer_param("maxcpus", max_cpus);
kaf24@5146 66
kaf24@3334 67 /* opt_watchdog: If true, run a watchdog NMI on each processor. */
kaf24@3334 68 static int opt_watchdog = 0;
kaf24@3334 69 boolean_param("watchdog", opt_watchdog);
kaf24@3334 70
kaf24@4850 71 /* **** Linux config option: propagated to domain0. */
kaf24@4850 72 /* "acpi=off": Sisables both ACPI table parsing and interpreter. */
kaf24@4850 73 /* "acpi=force": Override the disable blacklist. */
kaf24@4850 74 /* "acpi=strict": Disables out-of-spec workarounds. */
kaf24@4850 75 /* "acpi=ht": Limit ACPI just to boot-time to enable HT. */
kaf24@4850 76 /* "acpi=noirq": Disables ACPI interrupt routing. */
kaf24@4850 77 static void parse_acpi_param(char *s);
kaf24@4850 78 custom_param("acpi", parse_acpi_param);
kaf24@4850 79
kaf24@4850 80 /* **** Linux config option: propagated to domain0. */
kaf24@4850 81 /* acpi_skip_timer_override: Skip IRQ0 overrides. */
kaf24@4850 82 extern int acpi_skip_timer_override;
kaf24@4850 83 boolean_param("acpi_skip_timer_override", acpi_skip_timer_override);
kaf24@4850 84
kaf24@4850 85 /* **** Linux config option: propagated to domain0. */
kaf24@4850 86 /* noapic: Disable IOAPIC setup. */
kaf24@4850 87 extern int skip_ioapic_setup;
kaf24@4850 88 boolean_param("noapic", skip_ioapic_setup);
kaf24@4850 89
keir@17546 90 /* **** Linux config option: propagated to domain0. */
keir@17657 91 /* xen_cpuidle: xen control cstate. */
keir@18051 92 /*static*/ int xen_cpuidle;
keir@17657 93 boolean_param("cpuidle", xen_cpuidle);
keir@17546 94
kaf24@3594 95 int early_boot = 1;
kaf24@3594 96
kaf24@5146 97 cpumask_t cpu_present_map;
kaf24@5146 98
kfraser@15074 99 unsigned long xen_phys_start;
keir@19266 100 unsigned long allocator_bitmap_end;
kfraser@15074 101
keir@19055 102 #ifdef CONFIG_X86_32
kaf24@5003 103 /* Limits of Xen heap, used to initialise the allocator. */
keir@19061 104 unsigned long xenheap_initial_phys_start, xenheap_phys_end;
keir@19055 105 #endif
kaf24@3338 106
kaf24@2298 107 extern void arch_init_memory(void);
kaf24@1589 108 extern void init_IRQ(void);
kaf24@5604 109 extern void early_time_init(void);
kaf24@5167 110 extern void early_cpu_init(void);
kfraser@15747 111 extern void vesa_init(void);
kfraser@15747 112 extern void vesa_mtrr_init(void);
kaf24@1589 113
keir@18523 114 DEFINE_PER_CPU(struct desc_struct *, gdt_table) = boot_cpu_gdt_table;
keir@18523 115 #ifdef CONFIG_COMPAT
keir@18523 116 DEFINE_PER_CPU(struct desc_struct *, compat_gdt_table)
keir@18523 117 = boot_cpu_compat_gdt_table;
keir@18523 118 #endif
keir@18523 119
kaf24@8533 120 struct tss_struct init_tss[NR_CPUS];
kaf24@8533 121
kfraser@15490 122 char __attribute__ ((__section__(".bss.stack_aligned"))) cpu0_stack[STACK_SIZE];
kaf24@5011 123
keir@16144 124 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1 };
kaf24@1452 125
kaf24@1670 126 unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
kaf24@1452 127 EXPORT_SYMBOL(mmu_cr4_features);
kaf24@1452 128
kaf24@4818 129 int acpi_disabled;
kaf24@1452 130
kaf24@4850 131 int acpi_force;
kaf24@4850 132 char acpi_param[10] = "";
kfraser@15074 133 static void __init parse_acpi_param(char *s)
kaf24@4850 134 {
kaf24@4850 135 /* Save the parameter so it can be propagated to domain0. */
kfraser@13689 136 safe_strcpy(acpi_param, s);
kaf24@4850 137
kaf24@4850 138 /* Interpret the parameter for use within Xen. */
kaf24@4850 139 if ( !strcmp(s, "off") )
kaf24@4850 140 {
kaf24@4850 141 disable_acpi();
kaf24@4850 142 }
kaf24@4850 143 else if ( !strcmp(s, "force") )
kaf24@4850 144 {
kaf24@4850 145 acpi_force = 1;
kaf24@4850 146 acpi_ht = 1;
kaf24@4850 147 acpi_disabled = 0;
kaf24@4850 148 }
kaf24@4850 149 else if ( !strcmp(s, "strict") )
kaf24@4850 150 {
kaf24@4850 151 acpi_strict = 1;
kaf24@4850 152 }
kaf24@4850 153 else if ( !strcmp(s, "ht") )
kaf24@4850 154 {
kaf24@4850 155 if ( !acpi_force )
kaf24@4850 156 disable_acpi();
kaf24@4850 157 acpi_ht = 1;
kaf24@4850 158 }
kaf24@4850 159 else if ( !strcmp(s, "noirq") )
kaf24@4850 160 {
kaf24@4850 161 acpi_noirq_set();
kaf24@4850 162 }
kaf24@4850 163 }
kaf24@4850 164
kaf24@1452 165 static void __init do_initcalls(void)
kaf24@1452 166 {
kaf24@1452 167 initcall_t *call;
kaf24@1452 168 for ( call = &__initcall_start; call < &__initcall_end; call++ )
kaf24@1452 169 (*call)();
kaf24@1452 170 }
kaf24@1452 171
kfraser@15074 172 #define EARLY_FAIL(f, a...) do { \
kfraser@15074 173 printk( f , ## a ); \
kfraser@15871 174 for ( ; ; ) halt(); \
kfraser@15074 175 } while (0)
kaf24@8459 176
keir@19076 177 static unsigned long __initdata initial_images_base;
keir@19076 178 static unsigned long __initdata initial_images_start;
keir@19076 179 static unsigned long __initdata initial_images_end;
kaf24@9067 180
kfraser@15074 181 unsigned long __init initial_images_nrpages(void)
kaf24@9067 182 {
keir@19076 183 ASSERT(!(initial_images_base & ~PAGE_MASK));
kfraser@15489 184 ASSERT(!(initial_images_end & ~PAGE_MASK));
kfraser@15489 185 return ((initial_images_end >> PAGE_SHIFT) -
keir@19076 186 (initial_images_base >> PAGE_SHIFT));
kaf24@9067 187 }
kaf24@9067 188
kfraser@15074 189 void __init discard_initial_images(void)
kaf24@9067 190 {
keir@19076 191 init_domheap_pages(initial_images_base, initial_images_end);
kaf24@9067 192 }
kaf24@9067 193
kaf24@9818 194 extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[];
kaf24@9818 195
kfraser@11241 196 static void __init percpu_init_areas(void)
kaf24@9818 197 {
kaf24@9818 198 unsigned int i, data_size = __per_cpu_data_end - __per_cpu_start;
kfraser@15074 199 unsigned int first_unused;
kaf24@9818 200
kaf24@9818 201 BUG_ON(data_size > PERCPU_SIZE);
kaf24@9818 202
kfraser@15074 203 /* Initialise per-cpu data area for all possible secondary CPUs. */
kfraser@15074 204 for ( i = 1; (i < NR_CPUS) && cpu_possible(i); i++ )
kfraser@15074 205 memcpy(__per_cpu_start + (i << PERCPU_SHIFT),
kfraser@15074 206 __per_cpu_start,
kfraser@15074 207 data_size);
kaf24@9818 208 first_unused = i;
kaf24@9818 209
kfraser@14340 210 /* Check that there are no holes in cpu_possible_map. */
kaf24@9818 211 for ( ; i < NR_CPUS; i++ )
kfraser@14340 212 BUG_ON(cpu_possible(i));
kaf24@9818 213
kfraser@11241 214 #ifndef MEMORY_GUARD
kaf24@9818 215 init_xenheap_pages(__pa(__per_cpu_start) + (first_unused << PERCPU_SHIFT),
kaf24@9818 216 __pa(__per_cpu_end));
kfraser@11241 217 #endif
keir@15082 218 memguard_guard_range(&__per_cpu_start[first_unused << PERCPU_SHIFT],
keir@15082 219 (NR_CPUS - first_unused) << PERCPU_SHIFT);
keir@15082 220 #if defined(CONFIG_X86_64)
keir@15082 221 /* Also zap the mapping in the 1:1 area. */
keir@15082 222 memguard_guard_range(__va(__pa(__per_cpu_start)) +
keir@15082 223 (first_unused << PERCPU_SHIFT),
keir@15082 224 (NR_CPUS - first_unused) << PERCPU_SHIFT);
keir@15082 225 #endif
kaf24@9818 226 }
kaf24@9818 227
kfraser@11241 228 static void __init init_idle_domain(void)
kfraser@11240 229 {
kfraser@11240 230 struct domain *idle_domain;
kfraser@11240 231
kfraser@11240 232 /* Domain creation requires that scheduler structures are initialised. */
kfraser@11240 233 scheduler_init();
kfraser@11240 234
kfraser@14911 235 idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0);
kfraser@11240 236 if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) )
kfraser@11240 237 BUG();
kfraser@11240 238
kfraser@11240 239 set_current(idle_domain->vcpu[0]);
kfraser@11240 240 idle_vcpu[0] = this_cpu(curr_vcpu) = current;
kfraser@11240 241
kfraser@11240 242 setup_idle_pagetable();
kfraser@11240 243 }
kfraser@11240 244
kfraser@15074 245 static void __init srat_detect_node(int cpu)
kfraser@11971 246 {
kfraser@11998 247 unsigned node;
keir@17551 248 u32 apicid = x86_cpu_to_apicid[cpu];
kfraser@11971 249
kfraser@11998 250 node = apicid_to_node[apicid];
kfraser@11998 251 if ( node == NUMA_NO_NODE )
kfraser@11998 252 node = 0;
kfraser@11998 253 numa_set_node(cpu, node);
kfraser@11971 254
kfraser@11998 255 if ( acpi_numa > 0 )
kfraser@11998 256 printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node);
kfraser@11971 257 }
kfraser@11971 258
kfraser@15740 259 /*
kfraser@15740 260 * Ensure a given physical memory range is present in the bootstrap mappings.
kfraser@15740 261 * Use superpage mappings to ensure that pagetable memory needn't be allocated.
kfraser@15740 262 */
kfraser@15740 263 static void __init bootstrap_map(unsigned long start, unsigned long end)
kfraser@15740 264 {
kfraser@15740 265 unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
keir@16888 266 start = max_t(unsigned long, start & ~mask, 16UL << 20);
kfraser@15740 267 end = (end + mask) & ~mask;
keir@16888 268 if ( start >= end )
keir@16888 269 return;
kfraser@15740 270 if ( end > BOOTSTRAP_DIRECTMAP_END )
kfraser@15740 271 panic("Cannot access memory beyond end of "
kfraser@15740 272 "bootstrap direct-map area\n");
kfraser@15740 273 map_pages_to_xen(
kfraser@15740 274 (unsigned long)maddr_to_bootstrap_virt(start),
kfraser@15740 275 start >> PAGE_SHIFT, (end-start) >> PAGE_SHIFT, PAGE_HYPERVISOR);
kfraser@15740 276 }
kfraser@15740 277
kfraser@15074 278 static void __init move_memory(
kfraser@15074 279 unsigned long dst, unsigned long src_start, unsigned long src_end)
ian@12677 280 {
kfraser@15740 281 bootstrap_map(src_start, src_end);
kfraser@15740 282 bootstrap_map(dst, dst + src_end - src_start);
kfraser@15074 283 memmove(maddr_to_bootstrap_virt(dst),
kfraser@15074 284 maddr_to_bootstrap_virt(src_start),
ian@12677 285 src_end - src_start);
kfraser@15074 286 }
kfraser@15074 287
kfraser@15074 288 /* A temporary copy of the e820 map that we can mess with during bootstrap. */
kfraser@15074 289 static struct e820map __initdata boot_e820;
kfraser@15074 290
keir@15298 291 struct boot_video_info {
keir@15298 292 u8 orig_x; /* 0x00 */
keir@15298 293 u8 orig_y; /* 0x01 */
keir@15298 294 u8 orig_video_mode; /* 0x02 */
keir@15298 295 u8 orig_video_cols; /* 0x03 */
keir@15298 296 u8 orig_video_lines; /* 0x04 */
keir@15298 297 u8 orig_video_isVGA; /* 0x05 */
keir@15298 298 u16 orig_video_points; /* 0x06 */
keir@15298 299
keir@15298 300 /* VESA graphic mode -- linear frame buffer */
keir@15298 301 u32 capabilities; /* 0x08 */
keir@15298 302 u16 lfb_linelength; /* 0x0c */
keir@15298 303 u16 lfb_width; /* 0x0e */
keir@15298 304 u16 lfb_height; /* 0x10 */
keir@15298 305 u16 lfb_depth; /* 0x12 */
keir@15298 306 u32 lfb_base; /* 0x14 */
keir@15298 307 u32 lfb_size; /* 0x18 */
keir@15298 308 u8 red_size; /* 0x1c */
keir@15298 309 u8 red_pos; /* 0x1d */
keir@15298 310 u8 green_size; /* 0x1e */
keir@15298 311 u8 green_pos; /* 0x1f */
keir@15298 312 u8 blue_size; /* 0x20 */
keir@15298 313 u8 blue_pos; /* 0x21 */
keir@15298 314 u8 rsvd_size; /* 0x22 */
keir@15298 315 u8 rsvd_pos; /* 0x23 */
keir@15298 316 u16 vesapm_seg; /* 0x24 */
keir@15298 317 u16 vesapm_off; /* 0x26 */
keir@16124 318 u16 vesa_attrib; /* 0x28 */
keir@15298 319 };
keir@15298 320
keir@15298 321 static void __init parse_video_info(void)
keir@15298 322 {
keir@15298 323 struct boot_video_info *bvi = &bootsym(boot_vid_info);
keir@15298 324
keir@15298 325 if ( (bvi->orig_video_isVGA == 1) && (bvi->orig_video_mode == 3) )
keir@15298 326 {
keir@15298 327 vga_console_info.video_type = XEN_VGATYPE_TEXT_MODE_3;
keir@15298 328 vga_console_info.u.text_mode_3.font_height = bvi->orig_video_points;
keir@15298 329 vga_console_info.u.text_mode_3.cursor_x = bvi->orig_x;
keir@15298 330 vga_console_info.u.text_mode_3.cursor_y = bvi->orig_y;
keir@15298 331 vga_console_info.u.text_mode_3.rows = bvi->orig_video_lines;
keir@15298 332 vga_console_info.u.text_mode_3.columns = bvi->orig_video_cols;
keir@15298 333 }
keir@15298 334 else if ( bvi->orig_video_isVGA == 0x23 )
keir@15298 335 {
keir@15298 336 vga_console_info.video_type = XEN_VGATYPE_VESA_LFB;
keir@15298 337 vga_console_info.u.vesa_lfb.width = bvi->lfb_width;
keir@15298 338 vga_console_info.u.vesa_lfb.height = bvi->lfb_height;
keir@15298 339 vga_console_info.u.vesa_lfb.bytes_per_line = bvi->lfb_linelength;
keir@15298 340 vga_console_info.u.vesa_lfb.bits_per_pixel = bvi->lfb_depth;
keir@15298 341 vga_console_info.u.vesa_lfb.lfb_base = bvi->lfb_base;
keir@15298 342 vga_console_info.u.vesa_lfb.lfb_size = bvi->lfb_size;
keir@15298 343 vga_console_info.u.vesa_lfb.red_pos = bvi->red_pos;
keir@15298 344 vga_console_info.u.vesa_lfb.red_size = bvi->red_size;
keir@15298 345 vga_console_info.u.vesa_lfb.green_pos = bvi->green_pos;
keir@15298 346 vga_console_info.u.vesa_lfb.green_size = bvi->green_size;
keir@15298 347 vga_console_info.u.vesa_lfb.blue_pos = bvi->blue_pos;
keir@15298 348 vga_console_info.u.vesa_lfb.blue_size = bvi->blue_size;
keir@15298 349 vga_console_info.u.vesa_lfb.rsvd_pos = bvi->rsvd_pos;
keir@15298 350 vga_console_info.u.vesa_lfb.rsvd_size = bvi->rsvd_size;
keir@16124 351 vga_console_info.u.vesa_lfb.gbl_caps = bvi->capabilities;
keir@16124 352 vga_console_info.u.vesa_lfb.mode_attrs = bvi->vesa_attrib;
keir@15298 353 }
keir@15298 354 }
keir@15298 355
keir@16563 356 void __init kexec_reserve_area(struct e820map *e820)
keir@16563 357 {
keir@16563 358 unsigned long kdump_start = kexec_crash_area.start;
keir@16563 359 unsigned long kdump_size = kexec_crash_area.size;
keir@16563 360 static int is_reserved = 0;
keir@16563 361
keir@16563 362 kdump_size = (kdump_size + PAGE_SIZE - 1) & PAGE_MASK;
keir@16563 363
keir@16563 364 if ( (kdump_start == 0) || (kdump_size == 0) || is_reserved )
keir@16563 365 return;
keir@16563 366
keir@16563 367 is_reserved = 1;
keir@16563 368
keir@17674 369 if ( !reserve_e820_ram(e820, kdump_start, kdump_start + kdump_size) )
keir@16563 370 {
keir@16563 371 printk("Kdump: DISABLED (failed to reserve %luMB (%lukB) at 0x%lx)"
keir@16563 372 "\n", kdump_size >> 20, kdump_size >> 10, kdump_start);
keir@16563 373 kexec_crash_area.start = kexec_crash_area.size = 0;
keir@16563 374 }
keir@16563 375 else
keir@16563 376 {
keir@16563 377 printk("Kdump: %luMB (%lukB) at 0x%lx\n",
keir@16563 378 kdump_size >> 20, kdump_size >> 10, kdump_start);
keir@16563 379 }
keir@16563 380 }
keir@16563 381
keir@15082 382 void init_done(void)
keir@15082 383 {
keir@15082 384 extern char __init_begin[], __init_end[];
keir@15082 385
keir@15082 386 /* Free (or page-protect) the init areas. */
keir@18988 387 memset(__init_begin, 0xcc, __init_end - __init_begin); /* int3 poison */
keir@15082 388 #ifndef MEMORY_GUARD
keir@15082 389 init_xenheap_pages(__pa(__init_begin), __pa(__init_end));
keir@15082 390 #endif
keir@15082 391 memguard_guard_range(__init_begin, __init_end - __init_begin);
keir@15082 392 #if defined(CONFIG_X86_64)
keir@15082 393 /* Also zap the mapping in the 1:1 area. */
keir@15082 394 memguard_guard_range(__va(__pa(__init_begin)), __init_end - __init_begin);
keir@15082 395 #endif
keir@15082 396 printk("Freed %ldkB init memory.\n", (long)(__init_end-__init_begin)>>10);
keir@15082 397
keir@15082 398 startup_cpu_idle_loop();
keir@15082 399 }
keir@15082 400
kfraser@15796 401 static char * __init cmdline_cook(char *p)
kfraser@15796 402 {
kfraser@15796 403 p = p ? : "";
kfraser@15796 404 while ( *p == ' ' )
kfraser@15796 405 p++;
kfraser@15796 406 while ( (*p != ' ') && (*p != '\0') )
kfraser@15796 407 p++;
kfraser@15796 408 while ( *p == ' ' )
kfraser@15796 409 p++;
kfraser@15796 410 return p;
kfraser@15796 411 }
kfraser@15796 412
kfraser@15379 413 void __init __start_xen(unsigned long mbi_p)
kaf24@1452 414 {
kfraser@15293 415 char *memmap_type = NULL;
kfraser@15796 416 char *cmdline, *kextra;
kaf24@8457 417 unsigned long _initrd_start = 0, _initrd_len = 0;
kaf24@8457 418 unsigned int initrdidx = 1;
kfraser@15379 419 multiboot_info_t *mbi = __va(mbi_p);
kaf24@8457 420 module_t *mod = (module_t *)__va(mbi->mods_addr);
keir@19135 421 unsigned long nr_pages, modules_length, modules_headroom;
kfraser@15293 422 int i, e820_warn = 0, bytes = 0;
kaf24@5776 423 struct ns16550_defaults ns16550 = {
kaf24@5776 424 .data_bits = 8,
kaf24@5776 425 .parity = 'n',
kaf24@5776 426 .stop_bits = 1
kaf24@5776 427 };
kaf24@3338 428
kfraser@12853 429 extern void early_page_fault(void);
kfraser@12853 430 set_intr_gate(TRAP_page_fault, &early_page_fault);
kfraser@12853 431
kaf24@3338 432 /* Parse the command-line options. */
kfraser@15796 433 cmdline = cmdline_cook((mbi->flags & MBI_CMDLINE) ?
kfraser@15796 434 __va(mbi->cmdline) : NULL);
kfraser@15426 435 if ( (kextra = strstr(cmdline, " -- ")) != NULL )
kfraser@15426 436 {
kfraser@15426 437 /*
kfraser@15426 438 * Options after ' -- ' separator belong to dom0.
kfraser@15426 439 * 1. Orphan dom0's options from Xen's command line.
kfraser@15426 440 * 2. Skip all but final leading space from dom0's options.
kfraser@15426 441 */
kfraser@15426 442 *kextra = '\0';
kfraser@15426 443 kextra += 3;
kfraser@15426 444 while ( kextra[1] == ' ' ) kextra++;
kfraser@15426 445 }
kaf24@9823 446 cmdline_parse(cmdline);
kaf24@3338 447
keir@15298 448 parse_video_info();
keir@15298 449
kaf24@8534 450 set_current((struct vcpu *)0xfffff000); /* debug sanity */
keir@18790 451 idle_vcpu[0] = current;
kaf24@8534 452 set_processor_id(0); /* needed early, for smp_processor_id() */
keir@16378 453 if ( cpu_has_efer )
keir@16378 454 rdmsrl(MSR_EFER, this_cpu(efer));
keir@16267 455 asm volatile ( "mov %%cr4,%0" : "=r" (this_cpu(cr4)) );
kaf24@3338 456
kaf24@5146 457 smp_prepare_boot_cpu();
kaf24@5146 458
kaf24@3338 459 /* We initialise the serial devices very early so we can get debugging. */
kaf24@5776 460 ns16550.io_base = 0x3f8;
kaf24@5776 461 ns16550.irq = 4;
kaf24@5776 462 ns16550_init(0, &ns16550);
kaf24@5776 463 ns16550.io_base = 0x2f8;
kaf24@5776 464 ns16550.irq = 3;
kaf24@5776 465 ns16550_init(1, &ns16550);
kaf24@5195 466 serial_init_preirq();
kaf24@3338 467
kaf24@3338 468 init_console();
kaf24@3338 469
kfraser@11947 470 printk("Command line: %s\n", cmdline);
kaf24@9823 471
kfraser@15330 472 printk("Video information:\n");
kfraser@15330 473
kfraser@15330 474 /* Print VGA display mode information. */
keir@15298 475 switch ( vga_console_info.video_type )
keir@15298 476 {
keir@15298 477 case XEN_VGATYPE_TEXT_MODE_3:
kfraser@15330 478 printk(" VGA is text mode %dx%d, font 8x%d\n",
keir@15298 479 vga_console_info.u.text_mode_3.columns,
keir@15298 480 vga_console_info.u.text_mode_3.rows,
keir@15298 481 vga_console_info.u.text_mode_3.font_height);
keir@15298 482 break;
keir@15298 483 case XEN_VGATYPE_VESA_LFB:
kfraser@15330 484 printk(" VGA is graphics mode %dx%d, %d bpp\n",
keir@15298 485 vga_console_info.u.vesa_lfb.width,
keir@15298 486 vga_console_info.u.vesa_lfb.height,
keir@15298 487 vga_console_info.u.vesa_lfb.bits_per_pixel);
keir@15298 488 break;
kfraser@15330 489 default:
kfraser@15330 490 printk(" No VGA detected\n");
kfraser@15330 491 break;
kfraser@15330 492 }
kfraser@15330 493
kfraser@15330 494 /* Print VBE/DDC EDID information. */
kfraser@15330 495 if ( bootsym(boot_edid_caps) != 0x1313 )
kfraser@15330 496 {
kfraser@15330 497 u16 caps = bootsym(boot_edid_caps);
kfraser@15330 498 printk(" VBE/DDC methods:%s%s%s; ",
kfraser@15330 499 (caps & 1) ? " V1" : "",
kfraser@15330 500 (caps & 2) ? " V2" : "",
kfraser@15330 501 !(caps & 3) ? " none" : "");
kfraser@15330 502 printk("EDID transfer time: %d seconds\n", caps >> 8);
kfraser@15330 503 if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 )
kfraser@15330 504 {
kfraser@15330 505 printk(" EDID info not retrieved because ");
kfraser@15330 506 if ( !(caps & 3) )
kfraser@15330 507 printk("no DDC retrieval method detected\n");
kfraser@15330 508 else if ( (caps >> 8) > 5 )
kfraser@15330 509 printk("takes longer than 5 seconds\n");
kfraser@15330 510 else
kfraser@15330 511 printk("of reasons unknown\n");
kfraser@15330 512 }
keir@15298 513 }
keir@15298 514
kfraser@15336 515 printk("Disc information:\n");
kfraser@15336 516 printk(" Found %d MBR signatures\n",
kfraser@15430 517 bootsym(boot_mbr_signature_nr));
kfraser@15336 518 printk(" Found %d EDD information structures\n",
kfraser@15336 519 bootsym(boot_edd_info_nr));
kfraser@15336 520
kaf24@3344 521 /* Check that we have at least one Multiboot module. */
kaf24@3344 522 if ( !(mbi->flags & MBI_MODULES) || (mbi->mods_count == 0) )
kfraser@15074 523 EARLY_FAIL("dom0 kernel not specified. "
kfraser@15074 524 "Check bootloader configuration.\n");
kaf24@5011 525
kaf24@5011 526 if ( ((unsigned long)cpu0_stack & (STACK_SIZE-1)) != 0 )
kfraser@15074 527 EARLY_FAIL("Misaligned CPU0 stack.\n");
kaf24@3338 528
kfraser@15293 529 if ( e820_raw_nr != 0 )
kfraser@15292 530 {
kfraser@15293 531 memmap_type = "Xen-e820";
kfraser@15292 532 }
kfraser@15293 533 else if ( bootsym(lowmem_kb) )
kfraser@15292 534 {
kfraser@15293 535 memmap_type = "Xen-e801";
kfraser@15292 536 e820_raw[0].addr = 0;
kfraser@15293 537 e820_raw[0].size = bootsym(lowmem_kb) << 10;
kfraser@15292 538 e820_raw[0].type = E820_RAM;
kfraser@15292 539 e820_raw[1].addr = 0x100000;
kfraser@15293 540 e820_raw[1].size = bootsym(highmem_kb) << 10;
kfraser@15292 541 e820_raw[1].type = E820_RAM;
kfraser@15292 542 e820_raw_nr = 2;
kfraser@15292 543 }
kfraser@15292 544 else if ( mbi->flags & MBI_MEMMAP )
kaf24@3344 545 {
kfraser@15293 546 memmap_type = "Multiboot-e820";
keir@15988 547 while ( (bytes < mbi->mmap_length) && (e820_raw_nr < E820MAX) )
kaf24@3344 548 {
kaf24@3344 549 memory_map_t *map = __va(mbi->mmap_addr + bytes);
kaf24@8402 550
kaf24@8402 551 /*
kaf24@8403 552 * This is a gross workaround for a BIOS bug. Some bootloaders do
kaf24@8402 553 * not write e820 map entries into pre-zeroed memory. This is
kaf24@8402 554 * okay if the BIOS fills in all fields of the map entry, but
kaf24@8402 555 * some broken BIOSes do not bother to write the high word of
kaf24@8402 556 * the length field if the length is smaller than 4GB. We
kaf24@8402 557 * detect and fix this by flagging sections below 4GB that
kaf24@8403 558 * appear to be larger than 4GB in size.
kaf24@8402 559 */
kaf24@8403 560 if ( (map->base_addr_high == 0) && (map->length_high != 0) )
kaf24@8402 561 {
kfraser@15292 562 if ( !e820_warn )
kfraser@15292 563 {
kfraser@15292 564 printk("WARNING: Buggy e820 map detected and fixed "
kfraser@15292 565 "(truncated length fields).\n");
kfraser@15292 566 e820_warn = 1;
kfraser@15292 567 }
kaf24@8402 568 map->length_high = 0;
kaf24@8402 569 }
kaf24@8402 570
kaf24@3344 571 e820_raw[e820_raw_nr].addr =
kaf24@3344 572 ((u64)map->base_addr_high << 32) | (u64)map->base_addr_low;
kaf24@3344 573 e820_raw[e820_raw_nr].size =
kaf24@3344 574 ((u64)map->length_high << 32) | (u64)map->length_low;
kfraser@15799 575 e820_raw[e820_raw_nr].type = map->type;
kaf24@3344 576 e820_raw_nr++;
kaf24@8402 577
kaf24@3344 578 bytes += map->size + 4;
kaf24@3344 579 }
kaf24@3344 580 }
kaf24@3344 581 else if ( mbi->flags & MBI_MEMLIMITS )
kaf24@3344 582 {
kfraser@15293 583 memmap_type = "Multiboot-e801";
kaf24@3344 584 e820_raw[0].addr = 0;
kaf24@3344 585 e820_raw[0].size = mbi->mem_lower << 10;
kaf24@3344 586 e820_raw[0].type = E820_RAM;
kaf24@3354 587 e820_raw[1].addr = 0x100000;
kaf24@3354 588 e820_raw[1].size = mbi->mem_upper << 10;
kaf24@3354 589 e820_raw[1].type = E820_RAM;
kaf24@3344 590 e820_raw_nr = 2;
kaf24@3344 591 }
kaf24@3344 592 else
kaf24@3344 593 {
kfraser@15074 594 EARLY_FAIL("Bootloader provided no memory information.\n");
kaf24@3344 595 }
kaf24@3344 596
kaf24@13427 597 /* Sanitise the raw E820 map to produce a final clean version. */
kfraser@15293 598 max_page = init_e820(memmap_type, e820_raw, &e820_raw_nr);
kaf24@3338 599
keir@16563 600 /* Create a temporary copy of the E820 map. */
kfraser@15074 601 memcpy(&boot_e820, &e820, sizeof(e820));
keir@16563 602
keir@16563 603 /* Early kexec reservation (explicit static start address). */
keir@16563 604 kexec_reserve_area(&boot_e820);
kaf24@6111 605
kfraser@15074 606 /*
keir@15077 607 * Iterate backwards over all superpage-aligned RAM regions.
kfraser@15074 608 *
kfraser@15074 609 * We require superpage alignment because the boot allocator is not yet
kfraser@15074 610 * initialised. Hence we can only map superpages in the address range
kfraser@15074 611 * 0 to BOOTSTRAP_DIRECTMAP_END, as this is guaranteed not to require
kfraser@15074 612 * dynamic allocation of pagetables.
kfraser@15074 613 *
kfraser@15074 614 * As well as mapping superpages in that range, in preparation for
kfraser@15074 615 * initialising the boot allocator, we also look for a region to which
kfraser@15074 616 * we can relocate the dom0 kernel and other multiboot modules. Also, on
kfraser@15074 617 * x86/64, we relocate Xen to higher memory.
kfraser@15074 618 */
kfraser@15074 619 modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
keir@19076 620
keir@19135 621 /* ensure mod[0] is mapped before parsing */
keir@19135 622 bootstrap_map(mod[0].mod_start, mod[0].mod_end);
keir@19135 623 modules_headroom = bzimage_headroom(
keir@19135 624 (char *)(unsigned long)mod[0].mod_start,
keir@19135 625 (unsigned long)(mod[0].mod_end - mod[0].mod_start));
keir@19135 626
keir@15077 627 for ( i = boot_e820.nr_map-1; i >= 0; i-- )
kfraser@15074 628 {
kfraser@15074 629 uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
kaf24@6134 630
keir@16563 631 /* Superpage-aligned chunks from 16MB to BOOTSTRAP_DIRECTMAP_END. */
kfraser@15074 632 s = (boot_e820.map[i].addr + mask) & ~mask;
kfraser@15074 633 e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
keir@16563 634 s = max_t(uint64_t, s, 16 << 20);
kfraser@15074 635 e = min_t(uint64_t, e, BOOTSTRAP_DIRECTMAP_END);
kfraser@15074 636 if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
kaf24@3354 637 continue;
kaf24@6111 638
kfraser@15074 639 /* Map the chunk. No memory will need to be allocated to do this. */
kfraser@15074 640 map_pages_to_xen(
kfraser@15074 641 (unsigned long)maddr_to_bootstrap_virt(s),
kfraser@15074 642 s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
kaf24@6111 643
kfraser@14084 644 #if defined(CONFIG_X86_64)
keir@19190 645 /* Relocate Xen image, allocation bitmap, and one page of padding. */
keir@19190 646 #define reloc_size ((__pa(&_end) + max_page/8 + PAGE_SIZE + mask) & ~mask)
kfraser@15074 647 /* Is the region suitable for relocating Xen? */
keir@19055 648 if ( !xen_phys_start && ((e-s) >= reloc_size) )
kaf24@5003 649 {
kfraser@15074 650 extern l2_pgentry_t l2_xenmap[];
kfraser@15074 651 l4_pgentry_t *pl4e;
kfraser@15074 652 l3_pgentry_t *pl3e;
kfraser@15074 653 l2_pgentry_t *pl2e;
keir@16888 654 int i, j, k;
kfraser@15074 655
kfraser@15074 656 /* Select relocation address. */
keir@19055 657 e -= reloc_size;
kfraser@15074 658 xen_phys_start = e;
kfraser@15292 659 bootsym(trampoline_xen_phys_start) = e;
kfraser@15074 660
kfraser@15074 661 /*
kfraser@15074 662 * Perform relocation to new physical address.
kfraser@15074 663 * Before doing so we must sync static/global data with main memory
kfraser@15074 664 * with a barrier(). After this we must *not* modify static/global
kfraser@15074 665 * data until after we have switched to the relocated pagetables!
kfraser@15074 666 */
kfraser@15074 667 barrier();
kfraser@15074 668 move_memory(e, 0, __pa(&_end) - xen_phys_start);
kfraser@15074 669
kfraser@15379 670 /* Poison low 1MB to detect stray pointers to physical 0-1MB. */
kfraser@15379 671 memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
kfraser@15379 672
kfraser@15074 673 /* Walk initial pagetables, relocating page directory entries. */
kfraser@15074 674 pl4e = __va(__pa(idle_pg_table));
kfraser@15074 675 for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
kfraser@15074 676 {
kfraser@15074 677 if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
kfraser@15074 678 continue;
kfraser@15074 679 *pl4e = l4e_from_intpte(l4e_get_intpte(*pl4e) +
kfraser@15074 680 xen_phys_start);
kfraser@15074 681 pl3e = l4e_to_l3e(*pl4e);
kfraser@15074 682 for ( j = 0; j < L3_PAGETABLE_ENTRIES; j++, pl3e++ )
kfraser@15074 683 {
keir@16921 684 /* Not present, 1GB mapping, or already relocated? */
kfraser@15074 685 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ||
keir@16921 686 (l3e_get_flags(*pl3e) & _PAGE_PSE) ||
kfraser@15074 687 (l3e_get_pfn(*pl3e) > 0x1000) )
kfraser@15074 688 continue;
kfraser@15074 689 *pl3e = l3e_from_intpte(l3e_get_intpte(*pl3e) +
kfraser@15074 690 xen_phys_start);
keir@16888 691 pl2e = l3e_to_l2e(*pl3e);
keir@16888 692 for ( k = 0; k < L2_PAGETABLE_ENTRIES; k++, pl2e++ )
keir@16888 693 {
keir@16888 694 /* Not present, PSE, or already relocated? */
keir@16888 695 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ||
keir@16888 696 (l2e_get_flags(*pl2e) & _PAGE_PSE) ||
keir@16888 697 (l2e_get_pfn(*pl2e) > 0x1000) )
keir@16888 698 continue;
keir@16888 699 *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
keir@16888 700 xen_phys_start);
keir@16888 701 }
kfraser@15074 702 }
kfraser@15074 703 }
kfraser@15074 704
kfraser@15074 705 /* The only data mappings to be relocated are in the Xen area. */
kfraser@15074 706 pl2e = __va(__pa(l2_xenmap));
keir@16888 707 *pl2e++ = l2e_from_pfn(xen_phys_start >> PAGE_SHIFT,
keir@16888 708 PAGE_HYPERVISOR | _PAGE_PSE);
keir@16888 709 for ( i = 1; i < L2_PAGETABLE_ENTRIES; i++, pl2e++ )
kfraser@15074 710 {
kfraser@15074 711 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
kfraser@15074 712 continue;
kfraser@15074 713 *pl2e = l2e_from_intpte(l2e_get_intpte(*pl2e) +
kfraser@15074 714 xen_phys_start);
kfraser@15074 715 }
kfraser@15074 716
kfraser@15074 717 /* Re-sync the stack and then switch to relocated pagetables. */
kfraser@15074 718 asm volatile (
kfraser@15074 719 "rep movsb ; " /* re-sync the stack */
kfraser@15074 720 "movq %%cr4,%%rsi ; "
kfraser@15074 721 "andb $0x7f,%%sil ; "
kfraser@15074 722 "movq %%rsi,%%cr4 ; " /* CR4.PGE == 0 */
kfraser@15074 723 "movq %0,%%cr3 ; " /* CR3 == new pagetables */
kfraser@15074 724 "orb $0x80,%%sil ; "
kfraser@15074 725 "movq %%rsi,%%cr4 " /* CR4.PGE == 1 */
kfraser@15074 726 : : "r" (__pa(idle_pg_table)), "S" (cpu0_stack),
kfraser@15074 727 "D" (__va(__pa(cpu0_stack))), "c" (STACK_SIZE) : "memory" );
kaf24@5003 728 }
kaf24@5003 729 #endif
keir@15077 730
keir@15077 731 /* Is the region suitable for relocating the multiboot modules? */
keir@19076 732 if ( !initial_images_start && (s < e) &&
keir@19076 733 ((e-s) >= (modules_length+modules_headroom)) )
keir@15077 734 {
kfraser@15489 735 initial_images_end = e;
kfraser@15489 736 e = (e - modules_length) & PAGE_MASK;
keir@15077 737 initial_images_start = e;
keir@19076 738 e -= modules_headroom;
keir@19076 739 initial_images_base = e;
keir@19076 740 move_memory(initial_images_start,
keir@15077 741 mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
keir@15077 742 }
kfraser@15489 743
kfraser@15489 744 if ( !kexec_crash_area.start && (s < e) &&
kfraser@15489 745 ((e-s) >= kexec_crash_area.size) )
kfraser@15489 746 {
kfraser@15489 747 e = (e - kexec_crash_area.size) & PAGE_MASK;
kfraser@15489 748 kexec_crash_area.start = e;
kfraser@15489 749 }
kaf24@3354 750 }
kaf24@3354 751
kfraser@15074 752 if ( !initial_images_start )
kfraser@15074 753 EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
keir@19076 754 reserve_e820_ram(&boot_e820, initial_images_base, initial_images_end);
kfraser@15074 755
keir@19055 756 /* Initialise boot heap. */
keir@19055 757 allocator_bitmap_end = init_boot_allocator(__pa(&_end));
keir@19055 758 #if defined(CONFIG_X86_32)
keir@19061 759 xenheap_initial_phys_start = allocator_bitmap_end;
keir@19061 760 xenheap_phys_end = DIRECTMAP_MBYTES << 20;
keir@19055 761 #else
kfraser@15074 762 if ( !xen_phys_start )
kfraser@15074 763 EARLY_FAIL("Not enough memory to relocate Xen.\n");
keir@19055 764 reserve_e820_ram(&boot_e820, __pa(&_start), allocator_bitmap_end);
kfraser@15074 765 #endif
kfraser@15074 766
keir@16563 767 /* Late kexec reservation (dynamic start address). */
keir@16563 768 kexec_reserve_area(&boot_e820);
kfraser@15489 769
kfraser@15074 770 /*
keir@16624 771 * With the boot allocator now initialised, we can walk every RAM region
keir@16624 772 * and map it in its entirety (on x86/64, at least) and notify it to the
kfraser@15074 773 * boot allocator.
kfraser@15074 774 */
kfraser@15074 775 for ( i = 0; i < boot_e820.nr_map; i++ )
kfraser@15074 776 {
keir@16563 777 uint64_t s, e, map_s, map_e, mask = PAGE_SIZE - 1;
kfraser@15074 778
kfraser@15074 779 /* Only page alignment required now. */
kfraser@15074 780 s = (boot_e820.map[i].addr + mask) & ~mask;
kfraser@15074 781 e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
keir@16563 782 #if defined(CONFIG_X86_32)
keir@16563 783 s = max_t(uint64_t, s, xenheap_phys_end);
keir@16563 784 #else
keir@16563 785 s = max_t(uint64_t, s, 1<<20);
keir@16563 786 #endif
kfraser@15074 787 if ( (boot_e820.map[i].type != E820_RAM) || (s >= e) )
kfraser@15074 788 continue;
kfraser@15074 789
keir@16563 790 /* Need to create mappings above 16MB. */
keir@16563 791 map_s = max_t(uint64_t, s, 16<<20);
kfraser@15074 792 map_e = e;
keir@16563 793 #if defined(CONFIG_X86_32) /* mappings are truncated on x86_32 */
kfraser@15074 794 map_e = min_t(uint64_t, map_e, BOOTSTRAP_DIRECTMAP_END);
kfraser@15074 795 #endif
keir@16563 796
keir@16563 797 /* Pass mapped memory to allocator /before/ creating new mappings. */
keir@16624 798 init_boot_pages(s, min_t(uint64_t, map_s, e));
keir@16563 799
keir@16563 800 /* Create new mappings /before/ passing memory to the allocator. */
keir@16563 801 if ( map_s < map_e )
kfraser@15074 802 map_pages_to_xen(
keir@16563 803 (unsigned long)maddr_to_bootstrap_virt(map_s),
keir@16563 804 map_s >> PAGE_SHIFT, (map_e-map_s) >> PAGE_SHIFT,
keir@16563 805 PAGE_HYPERVISOR);
kfraser@15074 806
keir@16563 807 /* Pass remainder of this memory chunk to the allocator. */
keir@16624 808 init_boot_pages(map_s, e);
kfraser@15074 809 }
kfraser@15074 810
kaf24@5003 811 memguard_init();
kaf24@4950 812
kfraser@15074 813 nr_pages = 0;
kfraser@15074 814 for ( i = 0; i < e820.nr_map; i++ )
kfraser@15074 815 if ( e820.map[i].type == E820_RAM )
kfraser@15074 816 nr_pages += e820.map[i].size >> PAGE_SHIFT;
ian@12681 817 printk("System RAM: %luMB (%lukB)\n",
kaf24@3354 818 nr_pages >> (20 - PAGE_SHIFT),
kaf24@3354 819 nr_pages << (PAGE_SHIFT - 10));
kaf24@7220 820 total_pages = nr_pages;
kaf24@3354 821
kfraser@11296 822 /* Sanity check for unwanted bloat of certain hypercall structures. */
kfraser@11296 823 BUILD_BUG_ON(sizeof(((struct xen_platform_op *)0)->u) !=
kfraser@11296 824 sizeof(((struct xen_platform_op *)0)->u.pad));
kfraser@11296 825 BUILD_BUG_ON(sizeof(((struct xen_domctl *)0)->u) !=
kfraser@11296 826 sizeof(((struct xen_domctl *)0)->u.pad));
kfraser@11296 827 BUILD_BUG_ON(sizeof(((struct xen_sysctl *)0)->u) !=
kfraser@11296 828 sizeof(((struct xen_sysctl *)0)->u.pad));
kaf24@7388 829
kaf24@9878 830 BUILD_BUG_ON(sizeof(start_info_t) > PAGE_SIZE);
kaf24@9878 831 BUILD_BUG_ON(sizeof(shared_info_t) > PAGE_SIZE);
ack@13292 832 BUILD_BUG_ON(sizeof(struct vcpu_info) != 64);
kaf24@7744 833
ack@13291 834 #ifdef CONFIG_COMPAT
ack@13291 835 BUILD_BUG_ON(sizeof(((struct compat_platform_op *)0)->u) !=
ack@13291 836 sizeof(((struct compat_platform_op *)0)->u.pad));
ack@13291 837 BUILD_BUG_ON(sizeof(start_info_compat_t) > PAGE_SIZE);
ack@13292 838 BUILD_BUG_ON(sizeof(struct compat_vcpu_info) != 64);
ack@13291 839 #endif
ack@13291 840
kfraser@10492 841 /* Check definitions in public headers match internal defs. */
kaf24@9878 842 BUILD_BUG_ON(__HYPERVISOR_VIRT_START != HYPERVISOR_VIRT_START);
kaf24@8521 843 #ifdef HYPERVISOR_VIRT_END
kaf24@9878 844 BUILD_BUG_ON(__HYPERVISOR_VIRT_END != HYPERVISOR_VIRT_END);
kaf24@8521 845 #endif
kfraser@10492 846 BUILD_BUG_ON(MACH2PHYS_VIRT_START != RO_MPT_VIRT_START);
kfraser@10492 847 BUILD_BUG_ON(MACH2PHYS_VIRT_END != RO_MPT_VIRT_END);
kaf24@8521 848
kaf24@3354 849 init_frametable();
kaf24@3338 850
kfraser@11971 851 acpi_boot_table_init();
kfraser@11971 852
kfraser@11971 853 acpi_numa_init();
kfraser@11971 854
kfraser@11971 855 numa_initmem_init(0, max_page);
kfraser@11971 856
keir@19055 857 #if defined(CONFIG_X86_32)
keir@19055 858 /* Initialise the Xen heap. */
keir@19061 859 init_xenheap_pages(xenheap_initial_phys_start, xenheap_phys_end);
keir@19061 860 nr_pages = (xenheap_phys_end - xenheap_initial_phys_start) >> PAGE_SHIFT;
kaf24@6111 861 printk("Xen heap: %luMB (%lukB)\n",
kaf24@6111 862 nr_pages >> (20 - PAGE_SHIFT),
kaf24@6111 863 nr_pages << (PAGE_SHIFT - 10));
keir@19055 864 #endif
kaf24@3338 865
keir@14680 866 end_boot_allocator();
keir@19055 867 early_boot = 0;
keir@14680 868
keir@19055 869 #if defined(CONFIG_X86_64)
keir@19055 870 vesa_init();
keir@19055 871 #endif
kaf24@3338 872
keir@17444 873 softirq_init();
keir@17444 874
kaf24@8459 875 early_cpu_init();
kaf24@8459 876
kaf24@8459 877 paging_init();
kaf24@8459 878
keir@16274 879 tboot_probe();
keir@16274 880
kaf24@8459 881 /* Unmap the first page of CPU0's stack. */
kaf24@8459 882 memguard_guard_stack(cpu0_stack);
kaf24@8459 883
kaf24@8459 884 open_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ, new_tlbflush_clock_period);
kaf24@8459 885
kaf24@8459 886 if ( opt_watchdog )
kaf24@8459 887 nmi_watchdog = NMI_LOCAL_APIC;
kaf24@8459 888
kaf24@8459 889 sort_exception_tables();
kaf24@8459 890
kaf24@8459 891 find_smp_config();
kaf24@8459 892
kaf24@8459 893 dmi_scan_machine();
kaf24@8459 894
kaf24@8459 895 generic_apic_probe();
kaf24@8459 896
keir@17552 897 if ( x2apic_is_available() )
keir@17552 898 enable_x2apic();
keir@17552 899
kaf24@8459 900 acpi_boot_init();
kaf24@8459 901
kfraser@11971 902 init_cpu_to_node();
kfraser@11971 903
kfraser@11241 904 if ( smp_found_config )
kaf24@8459 905 get_smp_config();
kaf24@8459 906
keir@15083 907 #ifdef CONFIG_X86_64
keir@15083 908 /* Low mappings were only needed for some BIOS table parsing. */
keir@15083 909 zap_low_mappings();
keir@15083 910 #endif
keir@15083 911
kaf24@8459 912 init_apic_mappings();
kaf24@8459 913
kaf24@8459 914 init_IRQ();
kaf24@8459 915
kfraser@11241 916 percpu_init_areas();
kfraser@11241 917
kfraser@15815 918 xsm_init(&initrdidx, mbi, initial_images_start);
kfraser@15815 919
kfraser@11240 920 init_idle_domain();
kfraser@11240 921
kaf24@8459 922 trap_init();
kaf24@8459 923
kaf24@13662 924 rcu_init();
kaf24@13662 925
kaf24@8586 926 timer_init();
kaf24@8459 927
kaf24@8459 928 early_time_init();
kaf24@8459 929
kaf24@8459 930 arch_init_memory();
kaf24@8459 931
kaf24@8459 932 identify_cpu(&boot_cpu_data);
kaf24@8459 933 if ( cpu_has_fxsr )
kaf24@8459 934 set_in_cr4(X86_CR4_OSFXSR);
kaf24@8459 935 if ( cpu_has_xmm )
kaf24@8459 936 set_in_cr4(X86_CR4_OSXMMEXCPT);
keir@18920 937
keir@18920 938 local_irq_enable();
keir@18920 939
kfraser@15747 940 #ifdef CONFIG_X86_64
kfraser@15747 941 vesa_mtrr_init();
kfraser@15747 942 #endif
kaf24@8459 943
kaf24@8459 944 if ( opt_nosmp )
kaf24@8459 945 max_cpus = 0;
kaf24@8459 946
kaf24@8459 947 smp_prepare_cpus(max_cpus);
kaf24@8459 948
keir@18920 949 spin_debug_enable();
keir@18920 950
kaf24@8459 951 /*
kaf24@8459 952 * Initialise higher-level timer functions. We do this fairly late
kaf24@8459 953 * (post-SMP) because the time bases and scale factors need to be updated
kaf24@8459 954 * regularly, and SMP initialisation can cause a long delay with
kaf24@8459 955 * interrupts not yet enabled.
kaf24@8459 956 */
kaf24@8459 957 init_xen_time();
kaf24@8459 958
kaf24@8459 959 initialize_keytable();
kaf24@8459 960
kaf24@8459 961 serial_init_postirq();
kaf24@8459 962
kaf24@8459 963 for_each_present_cpu ( i )
kaf24@8459 964 {
kaf24@8459 965 if ( num_online_cpus() >= max_cpus )
kaf24@8459 966 break;
kaf24@8459 967 if ( !cpu_online(i) )
kaf24@13662 968 {
kaf24@13662 969 rcu_online_cpu(i);
kaf24@8459 970 __cpu_up(i);
kaf24@13662 971 }
kfraser@11971 972
kfraser@11998 973 /* Set up cpu_to_node[]. */
kfraser@11971 974 srat_detect_node(i);
kfraser@11998 975 /* Set up node_to_cpumask based on cpu_to_node[]. */
kfraser@11971 976 numa_add_cpu(i);
kaf24@8459 977 }
kaf24@8459 978
kaf24@8459 979 printk("Brought up %ld CPUs\n", (long)num_online_cpus());
kaf24@8459 980 smp_cpus_done(max_cpus);
kaf24@8459 981
kaf24@9117 982 initialise_gdb(); /* could be moved earlier */
kaf24@9117 983
kaf24@8459 984 do_initcalls();
kaf24@8459 985
kaf24@8594 986 if ( opt_watchdog )
kaf24@8594 987 watchdog_enable();
keir@19259 988
keir@19259 989 if ( !tboot_protect_mem_regions() )
keir@19259 990 panic("Could not protect TXT memory regions\n");
kaf24@8459 991
kaf24@8459 992 /* Create initial domain 0. */
keir@19266 993 dom0 = domain_create(0, DOMCRF_s3_integrity, DOM0_SSIDREF);
kfraser@10655 994 if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) )
kaf24@8459 995 panic("Error creating domain 0\n");
kaf24@8459 996
kfraser@12210 997 dom0->is_privileged = 1;
keir@16856 998 dom0->target = NULL;
kfraser@12210 999
kaf24@8459 1000 /* Grab the DOM0 command line. */
kaf24@8459 1001 cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
kfraser@15426 1002 if ( (cmdline != NULL) || (kextra != NULL) )
kaf24@8459 1003 {
kaf24@8459 1004 static char dom0_cmdline[MAX_GUEST_CMDLINE];
kaf24@8459 1005
kfraser@15796 1006 cmdline = cmdline_cook(cmdline);
kfraser@15796 1007 safe_strcpy(dom0_cmdline, cmdline);
kaf24@8459 1008
kfraser@15426 1009 if ( kextra != NULL )
kfraser@15426 1010 /* kextra always includes exactly one leading space. */
kfraser@15426 1011 safe_strcat(dom0_cmdline, kextra);
kfraser@15426 1012
kaf24@8459 1013 /* Append any extra parameters. */
kfraser@13691 1014 if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
kfraser@13691 1015 safe_strcat(dom0_cmdline, " noapic");
kaf24@8459 1016 if ( acpi_skip_timer_override &&
kfraser@13691 1017 !strstr(dom0_cmdline, "acpi_skip_timer_override") )
kfraser@13691 1018 safe_strcat(dom0_cmdline, " acpi_skip_timer_override");
keir@16165 1019 if ( (strlen(acpi_param) == 0) && acpi_disabled )
keir@16165 1020 {
keir@16165 1021 printk("ACPI is disabled, notifying Domain 0 (acpi=off)\n");
keir@16165 1022 safe_strcpy(acpi_param, "off");
keir@16165 1023 }
kfraser@13691 1024 if ( (strlen(acpi_param) != 0) && !strstr(dom0_cmdline, "acpi=") )
kaf24@8459 1025 {
kfraser@13691 1026 safe_strcat(dom0_cmdline, " acpi=");
kfraser@13691 1027 safe_strcat(dom0_cmdline, acpi_param);
kaf24@8459 1028 }
kfraser@13691 1029
kfraser@13691 1030 cmdline = dom0_cmdline;
kaf24@8459 1031 }
kaf24@8459 1032
kaf24@8459 1033 if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) )
kaf24@8459 1034 {
kaf24@8459 1035 _initrd_start = initial_images_start +
kaf24@8459 1036 (mod[initrdidx].mod_start - mod[0].mod_start);
kaf24@8459 1037 _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
kaf24@8459 1038 }
kaf24@8459 1039
keir@18180 1040 if ( xen_cpuidle )
keir@18180 1041 xen_processor_pmbits |= XEN_PROCESSOR_PM_CX;
keir@18180 1042
kaf24@8459 1043 /*
kaf24@8459 1044 * We're going to setup domain0 using the module(s) that we stashed safely
kaf24@8459 1045 * above our heap. The second module, if present, is an initrd ramdisk.
kaf24@8459 1046 */
kaf24@8459 1047 if ( construct_dom0(dom0,
keir@19076 1048 initial_images_base,
keir@19076 1049 initial_images_start,
kaf24@8459 1050 mod[0].mod_end-mod[0].mod_start,
kaf24@8459 1051 _initrd_start,
kaf24@8459 1052 _initrd_len,
kaf24@8459 1053 cmdline) != 0)
kaf24@8459 1054 panic("Could not set up DOM0 guest OS\n");
kaf24@8459 1055
kaf24@8459 1056 /* Scrub RAM that is still free and so may go to an unprivileged domain. */
kaf24@8459 1057 scrub_heap_pages();
kaf24@8459 1058
kaf24@8459 1059 init_trace_bufs();
kaf24@8459 1060
kaf24@10502 1061 console_endboot();
kaf24@8459 1062
kaf24@8459 1063 /* Hide UART from DOM0 if we're using it */
kaf24@8459 1064 serial_endboot();
kaf24@8459 1065
kaf24@8459 1066 domain_unpause_by_systemcontroller(dom0);
kaf24@8459 1067
keir@15082 1068 reset_stack_and_jump(init_done);
kaf24@8459 1069 }
kaf24@8459 1070
ian@13763 1071 void arch_get_xen_caps(xen_capabilities_info_t *info)
iap10@6721 1072 {
kfraser@14997 1073 /* Interface name is always xen-3.0-* for Xen-3.x. */
kfraser@14997 1074 int major = 3, minor = 0;
keir@13754 1075 char s[32];
keir@13754 1076
ian@13763 1077 (*info)[0] = '\0';
iap10@6721 1078
keir@17618 1079 #if defined(CONFIG_X86_32)
kaf24@6725 1080
keir@13754 1081 snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor);
ian@13763 1082 safe_strcat(*info, s);
kaf24@6725 1083 if ( hvm_enabled )
iap10@6721 1084 {
keir@13754 1085 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor);
ian@13763 1086 safe_strcat(*info, s);
keir@13754 1087 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor);
ian@13763 1088 safe_strcat(*info, s);
iap10@6721 1089 }
iap10@6721 1090
kaf24@6725 1091 #elif defined(CONFIG_X86_64)
iap10@6721 1092
keir@13754 1093 snprintf(s, sizeof(s), "xen-%d.%d-x86_64 ", major, minor);
ian@13763 1094 safe_strcat(*info, s);
ack@13288 1095 #ifdef CONFIG_COMPAT
keir@13754 1096 snprintf(s, sizeof(s), "xen-%d.%d-x86_32p ", major, minor);
ian@13763 1097 safe_strcat(*info, s);
ack@13288 1098 #endif
kaf24@6725 1099 if ( hvm_enabled )
iap10@6721 1100 {
keir@13754 1101 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32 ", major, minor);
ian@13763 1102 safe_strcat(*info, s);
keir@13754 1103 snprintf(s, sizeof(s), "hvm-%d.%d-x86_32p ", major, minor);
ian@13763 1104 safe_strcat(*info, s);
keir@13754 1105 snprintf(s, sizeof(s), "hvm-%d.%d-x86_64 ", major, minor);
ian@13763 1106 safe_strcat(*info, s);
iap10@6721 1107 }
kaf24@6725 1108
iap10@6721 1109 #endif
iap10@6721 1110 }
iap10@6721 1111
keir@17738 1112 int xen_in_range(paddr_t start, paddr_t end)
keir@17729 1113 {
keir@19283 1114 int i;
keir@19283 1115 static struct {
keir@19283 1116 paddr_t s, e;
keir@19283 1117 } xen_regions[5];
keir@19055 1118
keir@19283 1119 /* initialize first time */
keir@19283 1120 if ( !xen_regions[0].s )
keir@19283 1121 {
keir@19283 1122 extern char __init_begin[], __per_cpu_start[], __per_cpu_end[],
keir@19283 1123 __bss_start[];
keir@19283 1124 extern unsigned long allocator_bitmap_end;
keir@19283 1125
keir@19283 1126 /* S3 resume code (and other real mode trampoline code) */
keir@19283 1127 xen_regions[0].s = bootsym_phys(trampoline_start);
keir@19283 1128 xen_regions[0].e = bootsym_phys(trampoline_end);
keir@19283 1129 /* hypervisor code + data */
keir@19283 1130 xen_regions[1].s =__pa(&_stext);
keir@19283 1131 xen_regions[1].e = __pa(&__init_begin);
keir@19283 1132 /* per-cpu data */
keir@19283 1133 xen_regions[2].s = __pa(&__per_cpu_start);
keir@19283 1134 xen_regions[2].e = __pa(&__per_cpu_end);
keir@19283 1135 /* bss + boot allocator bitmap */
keir@19283 1136 xen_regions[3].s = __pa(&__bss_start);
keir@19283 1137 xen_regions[3].e = allocator_bitmap_end;
keir@19283 1138 /* frametable */
keir@19283 1139 xen_regions[4].s = (unsigned long)frame_table;
keir@19283 1140 xen_regions[4].e = (unsigned long)frame_table +
keir@19283 1141 PFN_UP(max_page * sizeof(*frame_table));
keir@19283 1142 }
keir@19283 1143
keir@19283 1144 for ( i = 0; i < ARRAY_SIZE(xen_regions); i++ )
keir@19283 1145 {
keir@19283 1146 if ( (start < xen_regions[i].e) && (end > xen_regions[i].s) )
keir@19283 1147 return 1;
keir@19283 1148 }
keir@19283 1149
keir@19283 1150 return 0;
keir@17729 1151 }
keir@17729 1152
kaf24@3914 1153 /*
kaf24@3914 1154 * Local variables:
kaf24@3914 1155 * mode: C
kaf24@3914 1156 * c-set-style: "BSD"
kaf24@3914 1157 * c-basic-offset: 4
kaf24@3914 1158 * tab-width: 4
kaf24@3914 1159 * indent-tabs-mode: nil
kaf24@3988 1160 * End:
kaf24@3914 1161 */