ia64/linux-2.6.18-xen.hg

view arch/i386/kernel/setup-xen.c @ 482:f619448beab5

xen/i386: re-add and use pre_setup_arch_hook()

It was only during the 2.6.25 merge that I realized that there was a
difference to native code that was not only unnecessary, but even
preventing the Xen version from being better readable and closer to
native both in terms of source code and behavior:
pre_setup_arch_hook() can do everything that (or equivalent to what)
x86-64 does in head64-xen.c. Apart from that it simplifies forward
porting, since certain pieces set up here are required to be available
much earlier in newer Linux.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Mar 18 11:22:54 2008 +0000 (2008-03-18)
parents 17a1a39df565
children f128849f9e78
line source
1 /*
2 * linux/arch/i386/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 *
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
10 *
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
13 *
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
16 *
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
19 *
20 */
22 /*
23 * This file handles the architecture-dependent parts of initialization
24 */
26 #include <linux/sched.h>
27 #include <linux/mm.h>
28 #include <linux/mmzone.h>
29 #include <linux/screen_info.h>
30 #include <linux/ioport.h>
31 #include <linux/acpi.h>
32 #include <linux/apm_bios.h>
33 #include <linux/initrd.h>
34 #include <linux/bootmem.h>
35 #include <linux/seq_file.h>
36 #include <linux/platform_device.h>
37 #include <linux/console.h>
38 #include <linux/mca.h>
39 #include <linux/root_dev.h>
40 #include <linux/highmem.h>
41 #include <linux/module.h>
42 #include <linux/efi.h>
43 #include <linux/init.h>
44 #include <linux/edd.h>
45 #include <linux/nodemask.h>
46 #include <linux/kernel.h>
47 #include <linux/percpu.h>
48 #include <linux/notifier.h>
49 #include <linux/kexec.h>
50 #include <linux/crash_dump.h>
51 #include <linux/dmi.h>
52 #include <linux/pfn.h>
54 #include <video/edid.h>
56 #include <asm/apic.h>
57 #include <asm/e820.h>
58 #include <asm/mpspec.h>
59 #include <asm/setup.h>
60 #include <asm/arch_hooks.h>
61 #include <asm/sections.h>
62 #include <asm/io_apic.h>
63 #include <asm/ist.h>
64 #include <asm/io.h>
65 #include <asm/hypervisor.h>
66 #include <xen/interface/physdev.h>
67 #include <xen/interface/memory.h>
68 #include <xen/features.h>
69 #include <xen/firmware.h>
70 #include <xen/xencons.h>
71 #include <setup_arch.h>
72 #include <bios_ebda.h>
74 #ifdef CONFIG_XEN
75 #include <xen/interface/kexec.h>
76 #endif
78 /* Forward Declaration. */
79 void __init find_max_pfn(void);
81 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
82 static struct notifier_block xen_panic_block = {
83 xen_panic_event, NULL, 0 /* try to go last */
84 };
86 extern char hypercall_page[PAGE_SIZE];
87 EXPORT_SYMBOL(hypercall_page);
89 int disable_pse __devinitdata = 0;
91 /*
92 * Machine setup..
93 */
95 #ifdef CONFIG_EFI
96 int efi_enabled = 0;
97 EXPORT_SYMBOL(efi_enabled);
98 #endif
100 /* cpu data as detected by the assembly code in head.S */
101 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
102 /* common cpu data for all cpus */
103 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
104 EXPORT_SYMBOL(boot_cpu_data);
106 unsigned long mmu_cr4_features;
108 #ifdef CONFIG_ACPI
109 int acpi_disabled = 0;
110 #else
111 int acpi_disabled = 1;
112 #endif
113 EXPORT_SYMBOL(acpi_disabled);
115 #ifdef CONFIG_ACPI
116 int __initdata acpi_force = 0;
117 extern acpi_interrupt_flags acpi_sci_flags;
118 #endif
120 /* for MCA, but anyone else can use it if they want */
121 unsigned int machine_id;
122 #ifdef CONFIG_MCA
123 EXPORT_SYMBOL(machine_id);
124 #endif
125 unsigned int machine_submodel_id;
126 unsigned int BIOS_revision;
127 unsigned int mca_pentium_flag;
129 /* For PCI or other memory-mapped resources */
130 unsigned long pci_mem_start = 0x10000000;
131 #ifdef CONFIG_PCI
132 EXPORT_SYMBOL(pci_mem_start);
133 #endif
135 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
136 int bootloader_type;
138 /* user-defined highmem size */
139 static unsigned int highmem_pages = -1;
141 /*
142 * Setup options
143 */
144 struct drive_info_struct { char dummy[32]; } drive_info;
145 #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
146 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
147 EXPORT_SYMBOL(drive_info);
148 #endif
149 struct screen_info screen_info;
150 EXPORT_SYMBOL(screen_info);
151 struct apm_info apm_info;
152 EXPORT_SYMBOL(apm_info);
153 struct sys_desc_table_struct {
154 unsigned short length;
155 unsigned char table[0];
156 };
157 struct edid_info edid_info;
158 EXPORT_SYMBOL_GPL(edid_info);
159 #ifndef CONFIG_XEN
160 #define copy_edid() (edid_info = EDID_INFO)
161 #endif
162 struct ist_info ist_info;
163 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
164 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
165 EXPORT_SYMBOL(ist_info);
166 #endif
167 struct e820map e820;
168 #ifdef CONFIG_XEN
169 struct e820map machine_e820;
170 #endif
172 extern void early_cpu_init(void);
173 extern void generic_apic_probe(char *);
174 extern int root_mountflags;
176 unsigned long saved_videomode;
178 #define RAMDISK_IMAGE_START_MASK 0x07FF
179 #define RAMDISK_PROMPT_FLAG 0x8000
180 #define RAMDISK_LOAD_FLAG 0x4000
182 static char command_line[COMMAND_LINE_SIZE];
184 unsigned char __initdata boot_params[PARAM_SIZE];
186 static struct resource data_resource = {
187 .name = "Kernel data",
188 .start = 0,
189 .end = 0,
190 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
191 };
193 static struct resource code_resource = {
194 .name = "Kernel code",
195 .start = 0,
196 .end = 0,
197 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
198 };
200 static struct resource system_rom_resource = {
201 .name = "System ROM",
202 .start = 0xf0000,
203 .end = 0xfffff,
204 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
205 };
207 static struct resource extension_rom_resource = {
208 .name = "Extension ROM",
209 .start = 0xe0000,
210 .end = 0xeffff,
211 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
212 };
214 static struct resource adapter_rom_resources[] = { {
215 .name = "Adapter ROM",
216 .start = 0xc8000,
217 .end = 0,
218 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
219 }, {
220 .name = "Adapter ROM",
221 .start = 0,
222 .end = 0,
223 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
224 }, {
225 .name = "Adapter ROM",
226 .start = 0,
227 .end = 0,
228 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
229 }, {
230 .name = "Adapter ROM",
231 .start = 0,
232 .end = 0,
233 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
234 }, {
235 .name = "Adapter ROM",
236 .start = 0,
237 .end = 0,
238 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
239 }, {
240 .name = "Adapter ROM",
241 .start = 0,
242 .end = 0,
243 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
244 } };
246 #define ADAPTER_ROM_RESOURCES \
247 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
249 static struct resource video_rom_resource = {
250 .name = "Video ROM",
251 .start = 0xc0000,
252 .end = 0xc7fff,
253 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
254 };
256 static struct resource video_ram_resource = {
257 .name = "Video RAM area",
258 .start = 0xa0000,
259 .end = 0xbffff,
260 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
261 };
263 static struct resource standard_io_resources[] = { {
264 .name = "dma1",
265 .start = 0x0000,
266 .end = 0x001f,
267 .flags = IORESOURCE_BUSY | IORESOURCE_IO
268 }, {
269 .name = "pic1",
270 .start = 0x0020,
271 .end = 0x0021,
272 .flags = IORESOURCE_BUSY | IORESOURCE_IO
273 }, {
274 .name = "timer0",
275 .start = 0x0040,
276 .end = 0x0043,
277 .flags = IORESOURCE_BUSY | IORESOURCE_IO
278 }, {
279 .name = "timer1",
280 .start = 0x0050,
281 .end = 0x0053,
282 .flags = IORESOURCE_BUSY | IORESOURCE_IO
283 }, {
284 .name = "keyboard",
285 .start = 0x0060,
286 .end = 0x006f,
287 .flags = IORESOURCE_BUSY | IORESOURCE_IO
288 }, {
289 .name = "dma page reg",
290 .start = 0x0080,
291 .end = 0x008f,
292 .flags = IORESOURCE_BUSY | IORESOURCE_IO
293 }, {
294 .name = "pic2",
295 .start = 0x00a0,
296 .end = 0x00a1,
297 .flags = IORESOURCE_BUSY | IORESOURCE_IO
298 }, {
299 .name = "dma2",
300 .start = 0x00c0,
301 .end = 0x00df,
302 .flags = IORESOURCE_BUSY | IORESOURCE_IO
303 }, {
304 .name = "fpu",
305 .start = 0x00f0,
306 .end = 0x00ff,
307 .flags = IORESOURCE_BUSY | IORESOURCE_IO
308 } };
310 #define STANDARD_IO_RESOURCES \
311 (sizeof standard_io_resources / sizeof standard_io_resources[0])
313 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
315 static int __init romchecksum(unsigned char *rom, unsigned long length)
316 {
317 unsigned char *p, sum = 0;
319 for (p = rom; p < rom + length; p++)
320 sum += *p;
321 return sum == 0;
322 }
324 static void __init probe_roms(void)
325 {
326 unsigned long start, length, upper;
327 unsigned char *rom;
328 int i;
330 #ifdef CONFIG_XEN
331 /* Nothing to do if not running in dom0. */
332 if (!is_initial_xendomain())
333 return;
334 #endif
336 /* video rom */
337 upper = adapter_rom_resources[0].start;
338 for (start = video_rom_resource.start; start < upper; start += 2048) {
339 rom = isa_bus_to_virt(start);
340 if (!romsignature(rom))
341 continue;
343 video_rom_resource.start = start;
345 /* 0 < length <= 0x7f * 512, historically */
346 length = rom[2] * 512;
348 /* if checksum okay, trust length byte */
349 if (length && romchecksum(rom, length))
350 video_rom_resource.end = start + length - 1;
352 request_resource(&iomem_resource, &video_rom_resource);
353 break;
354 }
356 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
357 if (start < upper)
358 start = upper;
360 /* system rom */
361 request_resource(&iomem_resource, &system_rom_resource);
362 upper = system_rom_resource.start;
364 /* check for extension rom (ignore length byte!) */
365 rom = isa_bus_to_virt(extension_rom_resource.start);
366 if (romsignature(rom)) {
367 length = extension_rom_resource.end - extension_rom_resource.start + 1;
368 if (romchecksum(rom, length)) {
369 request_resource(&iomem_resource, &extension_rom_resource);
370 upper = extension_rom_resource.start;
371 }
372 }
374 /* check for adapter roms on 2k boundaries */
375 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
376 rom = isa_bus_to_virt(start);
377 if (!romsignature(rom))
378 continue;
380 /* 0 < length <= 0x7f * 512, historically */
381 length = rom[2] * 512;
383 /* but accept any length that fits if checksum okay */
384 if (!length || start + length > upper || !romchecksum(rom, length))
385 continue;
387 adapter_rom_resources[i].start = start;
388 adapter_rom_resources[i].end = start + length - 1;
389 request_resource(&iomem_resource, &adapter_rom_resources[i]);
391 start = adapter_rom_resources[i++].end & ~2047UL;
392 }
393 }
395 /*
396 * Point at the empty zero page to start with. We map the real shared_info
397 * page as soon as fixmap is up and running.
398 */
399 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
400 EXPORT_SYMBOL(HYPERVISOR_shared_info);
402 unsigned long *phys_to_machine_mapping;
403 unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[16];
404 EXPORT_SYMBOL(phys_to_machine_mapping);
406 /* Raw start-of-day parameters from the hypervisor. */
407 start_info_t *xen_start_info;
408 EXPORT_SYMBOL(xen_start_info);
410 void __init add_memory_region(unsigned long long start,
411 unsigned long long size, int type)
412 {
413 int x;
415 if (!efi_enabled) {
416 x = e820.nr_map;
418 if (x == E820MAX) {
419 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
420 return;
421 }
423 e820.map[x].addr = start;
424 e820.map[x].size = size;
425 e820.map[x].type = type;
426 e820.nr_map++;
427 }
428 } /* add_memory_region */
430 static void __init limit_regions(unsigned long long size)
431 {
432 unsigned long long current_addr = 0;
433 int i;
435 if (efi_enabled) {
436 efi_memory_desc_t *md;
437 void *p;
439 for (p = memmap.map, i = 0; p < memmap.map_end;
440 p += memmap.desc_size, i++) {
441 md = p;
442 current_addr = md->phys_addr + (md->num_pages << 12);
443 if (md->type == EFI_CONVENTIONAL_MEMORY) {
444 if (current_addr >= size) {
445 md->num_pages -=
446 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
447 memmap.nr_map = i + 1;
448 return;
449 }
450 }
451 }
452 }
453 for (i = 0; i < e820.nr_map; i++) {
454 current_addr = e820.map[i].addr + e820.map[i].size;
455 if (current_addr < size)
456 continue;
458 if (e820.map[i].type != E820_RAM)
459 continue;
461 if (e820.map[i].addr >= size) {
462 /*
463 * This region starts past the end of the
464 * requested size, skip it completely.
465 */
466 e820.nr_map = i;
467 } else {
468 e820.nr_map = i + 1;
469 e820.map[i].size -= current_addr - size;
470 }
471 return;
472 }
473 #ifdef CONFIG_XEN
474 if (i==e820.nr_map && current_addr < size) {
475 /*
476 * The e820 map finished before our requested size so
477 * extend the final entry to the requested address.
478 */
479 --i;
480 if (e820.map[i].type == E820_RAM)
481 e820.map[i].size -= current_addr - size;
482 else
483 add_memory_region(current_addr, size - current_addr, E820_RAM);
484 }
485 #endif
486 }
488 #define E820_DEBUG 1
490 static void __init print_memory_map(char *who)
491 {
492 int i;
494 for (i = 0; i < e820.nr_map; i++) {
495 printk(" %s: %016Lx - %016Lx ", who,
496 e820.map[i].addr,
497 e820.map[i].addr + e820.map[i].size);
498 switch (e820.map[i].type) {
499 case E820_RAM: printk("(usable)\n");
500 break;
501 case E820_RESERVED:
502 printk("(reserved)\n");
503 break;
504 case E820_ACPI:
505 printk("(ACPI data)\n");
506 break;
507 case E820_NVS:
508 printk("(ACPI NVS)\n");
509 break;
510 default: printk("type %lu\n", e820.map[i].type);
511 break;
512 }
513 }
514 }
516 /*
517 * Sanitize the BIOS e820 map.
518 *
519 * Some e820 responses include overlapping entries. The following
520 * replaces the original e820 map with a new one, removing overlaps.
521 *
522 */
523 struct change_member {
524 struct e820entry *pbios; /* pointer to original bios entry */
525 unsigned long long addr; /* address for this change point */
526 };
527 static struct change_member change_point_list[2*E820MAX] __initdata;
528 static struct change_member *change_point[2*E820MAX] __initdata;
529 static struct e820entry *overlap_list[E820MAX] __initdata;
530 static struct e820entry new_bios[E820MAX] __initdata;
532 int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
533 {
534 struct change_member *change_tmp;
535 unsigned long current_type, last_type;
536 unsigned long long last_addr;
537 int chgidx, still_changing;
538 int overlap_entries;
539 int new_bios_entry;
540 int old_nr, new_nr, chg_nr;
541 int i;
543 /*
544 Visually we're performing the following (1,2,3,4 = memory types)...
546 Sample memory map (w/overlaps):
547 ____22__________________
548 ______________________4_
549 ____1111________________
550 _44_____________________
551 11111111________________
552 ____________________33__
553 ___________44___________
554 __________33333_________
555 ______________22________
556 ___________________2222_
557 _________111111111______
558 _____________________11_
559 _________________4______
561 Sanitized equivalent (no overlap):
562 1_______________________
563 _44_____________________
564 ___1____________________
565 ____22__________________
566 ______11________________
567 _________1______________
568 __________3_____________
569 ___________44___________
570 _____________33_________
571 _______________2________
572 ________________1_______
573 _________________4______
574 ___________________2____
575 ____________________33__
576 ______________________4_
577 */
579 /* if there's only one memory region, don't bother */
580 if (*pnr_map < 2)
581 return -1;
583 old_nr = *pnr_map;
585 /* bail out if we find any unreasonable addresses in bios map */
586 for (i=0; i<old_nr; i++)
587 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
588 return -1;
590 /* create pointers for initial change-point information (for sorting) */
591 for (i=0; i < 2*old_nr; i++)
592 change_point[i] = &change_point_list[i];
594 /* record all known change-points (starting and ending addresses),
595 omitting those that are for empty memory regions */
596 chgidx = 0;
597 for (i=0; i < old_nr; i++) {
598 if (biosmap[i].size != 0) {
599 change_point[chgidx]->addr = biosmap[i].addr;
600 change_point[chgidx++]->pbios = &biosmap[i];
601 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
602 change_point[chgidx++]->pbios = &biosmap[i];
603 }
604 }
605 chg_nr = chgidx; /* true number of change-points */
607 /* sort change-point list by memory addresses (low -> high) */
608 still_changing = 1;
609 while (still_changing) {
610 still_changing = 0;
611 for (i=1; i < chg_nr; i++) {
612 /* if <current_addr> > <last_addr>, swap */
613 /* or, if current=<start_addr> & last=<end_addr>, swap */
614 if ((change_point[i]->addr < change_point[i-1]->addr) ||
615 ((change_point[i]->addr == change_point[i-1]->addr) &&
616 (change_point[i]->addr == change_point[i]->pbios->addr) &&
617 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
618 )
619 {
620 change_tmp = change_point[i];
621 change_point[i] = change_point[i-1];
622 change_point[i-1] = change_tmp;
623 still_changing=1;
624 }
625 }
626 }
628 /* create a new bios memory map, removing overlaps */
629 overlap_entries=0; /* number of entries in the overlap table */
630 new_bios_entry=0; /* index for creating new bios map entries */
631 last_type = 0; /* start with undefined memory type */
632 last_addr = 0; /* start with 0 as last starting address */
633 /* loop through change-points, determining affect on the new bios map */
634 for (chgidx=0; chgidx < chg_nr; chgidx++)
635 {
636 /* keep track of all overlapping bios entries */
637 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
638 {
639 /* add map entry to overlap list (> 1 entry implies an overlap) */
640 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
641 }
642 else
643 {
644 /* remove entry from list (order independent, so swap with last) */
645 for (i=0; i<overlap_entries; i++)
646 {
647 if (overlap_list[i] == change_point[chgidx]->pbios)
648 overlap_list[i] = overlap_list[overlap_entries-1];
649 }
650 overlap_entries--;
651 }
652 /* if there are overlapping entries, decide which "type" to use */
653 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
654 current_type = 0;
655 for (i=0; i<overlap_entries; i++)
656 if (overlap_list[i]->type > current_type)
657 current_type = overlap_list[i]->type;
658 /* continue building up new bios map based on this information */
659 if (current_type != last_type) {
660 if (last_type != 0) {
661 new_bios[new_bios_entry].size =
662 change_point[chgidx]->addr - last_addr;
663 /* move forward only if the new size was non-zero */
664 if (new_bios[new_bios_entry].size != 0)
665 if (++new_bios_entry >= E820MAX)
666 break; /* no more space left for new bios entries */
667 }
668 if (current_type != 0) {
669 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
670 new_bios[new_bios_entry].type = current_type;
671 last_addr=change_point[chgidx]->addr;
672 }
673 last_type = current_type;
674 }
675 }
676 new_nr = new_bios_entry; /* retain count for new bios entries */
678 /* copy new bios mapping into original location */
679 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
680 *pnr_map = new_nr;
682 return 0;
683 }
685 /*
686 * Copy the BIOS e820 map into a safe place.
687 *
688 * Sanity-check it while we're at it..
689 *
690 * If we're lucky and live on a modern system, the setup code
691 * will have given us a memory map that we can use to properly
692 * set up memory. If we aren't, we'll fake a memory map.
693 *
694 * We check to see that the memory map contains at least 2 elements
695 * before we'll use it, because the detection code in setup.S may
696 * not be perfect and most every PC known to man has two memory
697 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
698 * thinkpad 560x, for example, does not cooperate with the memory
699 * detection code.)
700 */
701 int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
702 {
703 #ifndef CONFIG_XEN
704 /* Only one memory region (or negative)? Ignore it */
705 if (nr_map < 2)
706 return -1;
707 #else
708 BUG_ON(nr_map < 1);
709 #endif
711 do {
712 unsigned long long start = biosmap->addr;
713 unsigned long long size = biosmap->size;
714 unsigned long long end = start + size;
715 unsigned long type = biosmap->type;
717 /* Overflow in 64 bits? Ignore the memory map. */
718 if (start > end)
719 return -1;
721 #ifndef CONFIG_XEN
722 /*
723 * Some BIOSes claim RAM in the 640k - 1M region.
724 * Not right. Fix it up.
725 */
726 if (type == E820_RAM) {
727 if (start < 0x100000ULL && end > 0xA0000ULL) {
728 if (start < 0xA0000ULL)
729 add_memory_region(start, 0xA0000ULL-start, type);
730 if (end <= 0x100000ULL)
731 continue;
732 start = 0x100000ULL;
733 size = end - start;
734 }
735 }
736 #endif
737 add_memory_region(start, size, type);
738 } while (biosmap++,--nr_map);
739 return 0;
740 }
742 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
743 struct edd edd;
744 #ifdef CONFIG_EDD_MODULE
745 EXPORT_SYMBOL(edd);
746 #endif
747 #ifndef CONFIG_XEN
748 /**
749 * copy_edd() - Copy the BIOS EDD information
750 * from boot_params into a safe place.
751 *
752 */
753 static inline void copy_edd(void)
754 {
755 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
756 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
757 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
758 edd.edd_info_nr = EDD_NR;
759 }
760 #endif
761 #else
762 static inline void copy_edd(void)
763 {
764 }
765 #endif
767 static void __init parse_cmdline_early (char ** cmdline_p)
768 {
769 char c = ' ', *to = command_line, *from = saved_command_line;
770 int len = 0, max_cmdline;
771 int userdef = 0;
773 if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
774 max_cmdline = COMMAND_LINE_SIZE;
775 memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
776 /* Save unparsed command line copy for /proc/cmdline */
777 saved_command_line[max_cmdline-1] = '\0';
779 for (;;) {
780 if (c != ' ')
781 goto next_char;
782 /*
783 * "mem=nopentium" disables the 4MB page tables.
784 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
785 * to <mem>, overriding the bios size.
786 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
787 * <start> to <start>+<mem>, overriding the bios size.
788 *
789 * HPA tells me bootloaders need to parse mem=, so no new
790 * option should be mem= [also see Documentation/i386/boot.txt]
791 */
792 if (!memcmp(from, "mem=", 4)) {
793 if (to != command_line)
794 to--;
795 if (!memcmp(from+4, "nopentium", 9)) {
796 from += 9+4;
797 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
798 disable_pse = 1;
799 } else {
800 /* If the user specifies memory size, we
801 * limit the BIOS-provided memory map to
802 * that size. exactmap can be used to specify
803 * the exact map. mem=number can be used to
804 * trim the existing memory map.
805 */
806 unsigned long long mem_size;
808 mem_size = memparse(from+4, &from);
809 limit_regions(mem_size);
810 userdef=1;
811 }
812 }
814 else if (!memcmp(from, "memmap=", 7)) {
815 if (to != command_line)
816 to--;
817 if (!memcmp(from+7, "exactmap", 8)) {
818 #ifdef CONFIG_CRASH_DUMP
819 /* If we are doing a crash dump, we
820 * still need to know the real mem
821 * size before original memory map is
822 * reset.
823 */
824 find_max_pfn();
825 saved_max_pfn = max_pfn;
826 #endif
827 from += 8+7;
828 e820.nr_map = 0;
829 userdef = 1;
830 } else {
831 /* If the user specifies memory size, we
832 * limit the BIOS-provided memory map to
833 * that size. exactmap can be used to specify
834 * the exact map. mem=number can be used to
835 * trim the existing memory map.
836 */
837 unsigned long long start_at, mem_size;
839 mem_size = memparse(from+7, &from);
840 if (*from == '@') {
841 start_at = memparse(from+1, &from);
842 add_memory_region(start_at, mem_size, E820_RAM);
843 } else if (*from == '#') {
844 start_at = memparse(from+1, &from);
845 add_memory_region(start_at, mem_size, E820_ACPI);
846 } else if (*from == '$') {
847 start_at = memparse(from+1, &from);
848 add_memory_region(start_at, mem_size, E820_RESERVED);
849 } else {
850 limit_regions(mem_size);
851 userdef=1;
852 }
853 }
854 }
856 else if (!memcmp(from, "noexec=", 7))
857 noexec_setup(from + 7);
860 #ifdef CONFIG_X86_MPPARSE
861 /*
862 * If the BIOS enumerates physical processors before logical,
863 * maxcpus=N at enumeration-time can be used to disable HT.
864 */
865 else if (!memcmp(from, "maxcpus=", 8)) {
866 extern unsigned int maxcpus;
868 maxcpus = simple_strtoul(from + 8, NULL, 0);
869 }
870 #endif
872 #ifdef CONFIG_ACPI
873 /* "acpi=off" disables both ACPI table parsing and interpreter */
874 else if (!memcmp(from, "acpi=off", 8)) {
875 disable_acpi();
876 }
878 /* acpi=force to over-ride black-list */
879 else if (!memcmp(from, "acpi=force", 10)) {
880 acpi_force = 1;
881 acpi_ht = 1;
882 acpi_disabled = 0;
883 }
885 /* acpi=strict disables out-of-spec workarounds */
886 else if (!memcmp(from, "acpi=strict", 11)) {
887 acpi_strict = 1;
888 }
890 /* Limit ACPI just to boot-time to enable HT */
891 else if (!memcmp(from, "acpi=ht", 7)) {
892 if (!acpi_force)
893 disable_acpi();
894 acpi_ht = 1;
895 }
897 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
898 else if (!memcmp(from, "pci=noacpi", 10)) {
899 acpi_disable_pci();
900 }
901 /* "acpi=noirq" disables ACPI interrupt routing */
902 else if (!memcmp(from, "acpi=noirq", 10)) {
903 acpi_noirq_set();
904 }
906 else if (!memcmp(from, "acpi_sci=edge", 13))
907 acpi_sci_flags.trigger = 1;
909 else if (!memcmp(from, "acpi_sci=level", 14))
910 acpi_sci_flags.trigger = 3;
912 else if (!memcmp(from, "acpi_sci=high", 13))
913 acpi_sci_flags.polarity = 1;
915 else if (!memcmp(from, "acpi_sci=low", 12))
916 acpi_sci_flags.polarity = 3;
918 #ifdef CONFIG_X86_IO_APIC
919 else if (!memcmp(from, "acpi_skip_timer_override", 24))
920 acpi_skip_timer_override = 1;
922 if (!memcmp(from, "disable_timer_pin_1", 19))
923 disable_timer_pin_1 = 1;
924 if (!memcmp(from, "enable_timer_pin_1", 18))
925 disable_timer_pin_1 = -1;
927 /* disable IO-APIC */
928 else if (!memcmp(from, "noapic", 6))
929 disable_ioapic_setup();
930 #endif /* CONFIG_X86_IO_APIC */
931 #endif /* CONFIG_ACPI */
933 #ifdef CONFIG_X86_LOCAL_APIC
934 /* enable local APIC */
935 else if (!memcmp(from, "lapic", 5))
936 lapic_enable();
938 /* disable local APIC */
939 else if (!memcmp(from, "nolapic", 6))
940 lapic_disable();
941 #endif /* CONFIG_X86_LOCAL_APIC */
943 #ifdef CONFIG_KEXEC
944 /* crashkernel=size@addr specifies the location to reserve for
945 * a crash kernel. By reserving this memory we guarantee
946 * that linux never set's it up as a DMA target.
947 * Useful for holding code to do something appropriate
948 * after a kernel panic.
949 */
950 else if (!memcmp(from, "crashkernel=", 12)) {
951 #ifndef CONFIG_XEN
952 unsigned long size, base;
953 size = memparse(from+12, &from);
954 if (*from == '@') {
955 base = memparse(from+1, &from);
956 /* FIXME: Do I want a sanity check
957 * to validate the memory range?
958 */
959 crashk_res.start = base;
960 crashk_res.end = base + size - 1;
961 }
962 #else
963 printk("Ignoring crashkernel command line, "
964 "parameter will be supplied by xen\n");
965 #endif
966 }
967 #endif
968 #ifdef CONFIG_PROC_VMCORE
969 /* elfcorehdr= specifies the location of elf core header
970 * stored by the crashed kernel.
971 */
972 else if (!memcmp(from, "elfcorehdr=", 11))
973 elfcorehdr_addr = memparse(from+11, &from);
974 #endif
976 /*
977 * highmem=size forces highmem to be exactly 'size' bytes.
978 * This works even on boxes that have no highmem otherwise.
979 * This also works to reduce highmem size on bigger boxes.
980 */
981 else if (!memcmp(from, "highmem=", 8))
982 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
984 /*
985 * vmalloc=size forces the vmalloc area to be exactly 'size'
986 * bytes. This can be used to increase (or decrease) the
987 * vmalloc area - the default is 128m.
988 */
989 else if (!memcmp(from, "vmalloc=", 8))
990 __VMALLOC_RESERVE = memparse(from+8, &from);
992 next_char:
993 c = *(from++);
994 if (!c)
995 break;
996 if (COMMAND_LINE_SIZE <= ++len)
997 break;
998 *(to++) = c;
999 }
1000 *to = '\0';
1001 *cmdline_p = command_line;
1002 if (userdef) {
1003 printk(KERN_INFO "user-defined physical RAM map:\n");
1004 print_memory_map("user");
1008 /*
1009 * Callback for efi_memory_walk.
1010 */
1011 static int __init
1012 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
1014 unsigned long *max_pfn = arg, pfn;
1016 if (start < end) {
1017 pfn = PFN_UP(end -1);
1018 if (pfn > *max_pfn)
1019 *max_pfn = pfn;
1021 return 0;
1024 static int __init
1025 efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
1027 memory_present(0, start, end);
1028 return 0;
1031 /*
1032 * This function checks if any part of the range <start,end> is mapped
1033 * with type.
1034 */
1035 int
1036 e820_any_mapped(u64 start, u64 end, unsigned type)
1038 int i;
1040 #ifndef CONFIG_XEN
1041 for (i = 0; i < e820.nr_map; i++) {
1042 const struct e820entry *ei = &e820.map[i];
1043 #else
1044 if (!is_initial_xendomain())
1045 return 0;
1046 for (i = 0; i < machine_e820.nr_map; ++i) {
1047 const struct e820entry *ei = &machine_e820.map[i];
1048 #endif
1050 if (type && ei->type != type)
1051 continue;
1052 if (ei->addr >= end || ei->addr + ei->size <= start)
1053 continue;
1054 return 1;
1056 return 0;
1058 EXPORT_SYMBOL_GPL(e820_any_mapped);
1060 /*
1061 * This function checks if the entire range <start,end> is mapped with type.
1063 * Note: this function only works correct if the e820 table is sorted and
1064 * not-overlapping, which is the case
1065 */
1066 int __init
1067 e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
1069 u64 start = s;
1070 u64 end = e;
1071 int i;
1073 #ifndef CONFIG_XEN
1074 for (i = 0; i < e820.nr_map; i++) {
1075 struct e820entry *ei = &e820.map[i];
1076 #else
1077 if (!is_initial_xendomain())
1078 return 0;
1079 for (i = 0; i < machine_e820.nr_map; ++i) {
1080 const struct e820entry *ei = &machine_e820.map[i];
1081 #endif
1082 if (type && ei->type != type)
1083 continue;
1084 /* is the region (part) in overlap with the current region ?*/
1085 if (ei->addr >= end || ei->addr + ei->size <= start)
1086 continue;
1087 /* if the region is at the beginning of <start,end> we move
1088 * start to the end of the region since it's ok until there
1089 */
1090 if (ei->addr <= start)
1091 start = ei->addr + ei->size;
1092 /* if start is now at or beyond end, we're done, full
1093 * coverage */
1094 if (start >= end)
1095 return 1; /* we're done */
1097 return 0;
1100 /*
1101 * Find the highest page frame number we have available
1102 */
1103 void __init find_max_pfn(void)
1105 int i;
1107 max_pfn = 0;
1108 if (efi_enabled) {
1109 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
1110 efi_memmap_walk(efi_memory_present_wrapper, NULL);
1111 return;
1114 for (i = 0; i < e820.nr_map; i++) {
1115 unsigned long start, end;
1116 /* RAM? */
1117 if (e820.map[i].type != E820_RAM)
1118 continue;
1119 start = PFN_UP(e820.map[i].addr);
1120 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1121 if (start >= end)
1122 continue;
1123 if (end > max_pfn)
1124 max_pfn = end;
1125 memory_present(0, start, end);
1129 /*
1130 * Determine low and high memory ranges:
1131 */
1132 unsigned long __init find_max_low_pfn(void)
1134 unsigned long max_low_pfn;
1136 max_low_pfn = max_pfn;
1137 if (max_low_pfn > MAXMEM_PFN) {
1138 if (highmem_pages == -1)
1139 highmem_pages = max_pfn - MAXMEM_PFN;
1140 if (highmem_pages + MAXMEM_PFN < max_pfn)
1141 max_pfn = MAXMEM_PFN + highmem_pages;
1142 if (highmem_pages + MAXMEM_PFN > max_pfn) {
1143 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
1144 highmem_pages = 0;
1146 max_low_pfn = MAXMEM_PFN;
1147 #ifndef CONFIG_HIGHMEM
1148 /* Maximum memory usable is what is directly addressable */
1149 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
1150 MAXMEM>>20);
1151 if (max_pfn > MAX_NONPAE_PFN)
1152 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1153 else
1154 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
1155 max_pfn = MAXMEM_PFN;
1156 #else /* !CONFIG_HIGHMEM */
1157 #ifndef CONFIG_X86_PAE
1158 if (max_pfn > MAX_NONPAE_PFN) {
1159 max_pfn = MAX_NONPAE_PFN;
1160 printk(KERN_WARNING "Warning only 4GB will be used.\n");
1161 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1163 #endif /* !CONFIG_X86_PAE */
1164 #endif /* !CONFIG_HIGHMEM */
1165 } else {
1166 if (highmem_pages == -1)
1167 highmem_pages = 0;
1168 #ifdef CONFIG_HIGHMEM
1169 if (highmem_pages >= max_pfn) {
1170 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
1171 highmem_pages = 0;
1173 if (highmem_pages) {
1174 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
1175 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
1176 highmem_pages = 0;
1178 max_low_pfn -= highmem_pages;
1180 #else
1181 if (highmem_pages)
1182 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1183 #endif
1185 return max_low_pfn;
1188 /*
1189 * Free all available memory for boot time allocation. Used
1190 * as a callback function by efi_memory_walk()
1191 */
1193 static int __init
1194 free_available_memory(unsigned long start, unsigned long end, void *arg)
1196 /* check max_low_pfn */
1197 if (start >= (max_low_pfn << PAGE_SHIFT))
1198 return 0;
1199 if (end >= (max_low_pfn << PAGE_SHIFT))
1200 end = max_low_pfn << PAGE_SHIFT;
1201 if (start < end)
1202 free_bootmem(start, end - start);
1204 return 0;
1206 /*
1207 * Register fully available low RAM pages with the bootmem allocator.
1208 */
1209 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1211 int i;
1213 if (efi_enabled) {
1214 efi_memmap_walk(free_available_memory, NULL);
1215 return;
1217 for (i = 0; i < e820.nr_map; i++) {
1218 unsigned long curr_pfn, last_pfn, size;
1219 /*
1220 * Reserve usable low memory
1221 */
1222 if (e820.map[i].type != E820_RAM)
1223 continue;
1224 /*
1225 * We are rounding up the start address of usable memory:
1226 */
1227 curr_pfn = PFN_UP(e820.map[i].addr);
1228 if (curr_pfn >= max_low_pfn)
1229 continue;
1230 /*
1231 * ... and at the end of the usable range downwards:
1232 */
1233 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1235 #ifdef CONFIG_XEN
1236 /*
1237 * Truncate to the number of actual pages currently
1238 * present.
1239 */
1240 if (last_pfn > xen_start_info->nr_pages)
1241 last_pfn = xen_start_info->nr_pages;
1242 #endif
1244 if (last_pfn > max_low_pfn)
1245 last_pfn = max_low_pfn;
1247 /*
1248 * .. finally, did all the rounding and playing
1249 * around just make the area go away?
1250 */
1251 if (last_pfn <= curr_pfn)
1252 continue;
1254 size = last_pfn - curr_pfn;
1255 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1259 #ifndef CONFIG_XEN
1260 /*
1261 * workaround for Dell systems that neglect to reserve EBDA
1262 */
1263 static void __init reserve_ebda_region(void)
1265 unsigned int addr;
1266 addr = get_bios_ebda();
1267 if (addr)
1268 reserve_bootmem(addr, PAGE_SIZE);
1270 #endif
1272 #ifndef CONFIG_NEED_MULTIPLE_NODES
1273 void __init setup_bootmem_allocator(void);
1274 static unsigned long __init setup_memory(void)
1276 /*
1277 * partially used pages are not usable - thus
1278 * we are rounding upwards:
1279 */
1280 min_low_pfn = PFN_UP(__pa(xen_start_info->pt_base)) +
1281 xen_start_info->nr_pt_frames;
1283 find_max_pfn();
1285 max_low_pfn = find_max_low_pfn();
1287 #ifdef CONFIG_HIGHMEM
1288 highstart_pfn = highend_pfn = max_pfn;
1289 if (max_pfn > max_low_pfn) {
1290 highstart_pfn = max_low_pfn;
1292 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1293 pages_to_mb(highend_pfn - highstart_pfn));
1294 #endif
1295 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1296 pages_to_mb(max_low_pfn));
1298 setup_bootmem_allocator();
1300 return max_low_pfn;
1303 void __init zone_sizes_init(void)
1305 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1306 unsigned int max_dma, low;
1308 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1309 low = max_low_pfn;
1311 if (low < max_dma)
1312 zones_size[ZONE_DMA] = low;
1313 else {
1314 zones_size[ZONE_DMA] = max_dma;
1315 zones_size[ZONE_NORMAL] = low - max_dma;
1316 #ifdef CONFIG_HIGHMEM
1317 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1318 #endif
1320 free_area_init(zones_size);
1322 #else
1323 extern unsigned long __init setup_memory(void);
1324 extern void zone_sizes_init(void);
1325 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1327 void __init setup_bootmem_allocator(void)
1329 unsigned long bootmap_size;
1330 /*
1331 * Initialize the boot-time allocator (with low memory only):
1332 */
1333 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1335 register_bootmem_low_pages(max_low_pfn);
1337 /*
1338 * Reserve the bootmem bitmap itself as well. We do this in two
1339 * steps (first step was init_bootmem()) because this catches
1340 * the (very unlikely) case of us accidentally initializing the
1341 * bootmem allocator with an invalid RAM area.
1342 */
1343 reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1344 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
1346 #ifndef CONFIG_XEN
1347 /*
1348 * reserve physical page 0 - it's a special BIOS page on many boxes,
1349 * enabling clean reboots, SMP operation, laptop functions.
1350 */
1351 reserve_bootmem(0, PAGE_SIZE);
1353 /* reserve EBDA region, it's a 4K region */
1354 reserve_ebda_region();
1356 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1357 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1358 unless you have no PS/2 mouse plugged in. */
1359 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1360 boot_cpu_data.x86 == 6)
1361 reserve_bootmem(0xa0000 - 4096, 4096);
1363 #ifdef CONFIG_SMP
1364 /*
1365 * But first pinch a few for the stack/trampoline stuff
1366 * FIXME: Don't need the extra page at 4K, but need to fix
1367 * trampoline before removing it. (see the GDT stuff)
1368 */
1369 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1370 #endif
1371 #ifdef CONFIG_ACPI_SLEEP
1372 /*
1373 * Reserve low memory region for sleep support.
1374 */
1375 acpi_reserve_bootmem();
1376 #endif
1377 #endif /* !CONFIG_XEN */
1379 #ifdef CONFIG_BLK_DEV_INITRD
1380 if (xen_start_info->mod_start) {
1381 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1382 /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
1383 initrd_start = INITRD_START + PAGE_OFFSET;
1384 initrd_end = initrd_start+INITRD_SIZE;
1385 initrd_below_start_ok = 1;
1387 else {
1388 printk(KERN_ERR "initrd extends beyond end of memory "
1389 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1390 INITRD_START + INITRD_SIZE,
1391 max_low_pfn << PAGE_SHIFT);
1392 initrd_start = 0;
1395 #endif
1396 #ifdef CONFIG_KEXEC
1397 #ifdef CONFIG_XEN
1398 xen_machine_kexec_setup_resources();
1399 #else
1400 if (crashk_res.start != crashk_res.end)
1401 reserve_bootmem(crashk_res.start,
1402 crashk_res.end - crashk_res.start + 1);
1403 #endif
1404 #endif
1407 /*
1408 * The node 0 pgdat is initialized before all of these because
1409 * it's needed for bootmem. node>0 pgdats have their virtual
1410 * space allocated before the pagetables are in place to access
1411 * them, so they can't be cleared then.
1413 * This should all compile down to nothing when NUMA is off.
1414 */
1415 void __init remapped_pgdat_init(void)
1417 int nid;
1419 for_each_online_node(nid) {
1420 if (nid != 0)
1421 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1425 /*
1426 * Request address space for all standard RAM and ROM resources
1427 * and also for regions reported as reserved by the e820.
1428 */
1429 static void __init
1430 legacy_init_iomem_resources(struct e820entry *e820, int nr_map,
1431 struct resource *code_resource,
1432 struct resource *data_resource)
1434 int i;
1436 probe_roms();
1438 for (i = 0; i < nr_map; i++) {
1439 struct resource *res;
1440 #ifndef CONFIG_RESOURCES_64BIT
1441 if (e820[i].addr + e820[i].size > 0x100000000ULL)
1442 continue;
1443 #endif
1444 res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
1445 switch (e820[i].type) {
1446 case E820_RAM: res->name = "System RAM"; break;
1447 case E820_ACPI: res->name = "ACPI Tables"; break;
1448 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1449 default: res->name = "reserved";
1451 res->start = e820[i].addr;
1452 res->end = res->start + e820[i].size - 1;
1453 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1454 if (request_resource(&iomem_resource, res)) {
1455 kfree(res);
1456 continue;
1458 if (e820[i].type == E820_RAM) {
1459 /*
1460 * We don't know which RAM region contains kernel data,
1461 * so we try it repeatedly and let the resource manager
1462 * test it.
1463 */
1464 #ifndef CONFIG_XEN
1465 request_resource(res, code_resource);
1466 request_resource(res, data_resource);
1467 #endif
1468 #ifdef CONFIG_KEXEC
1469 if (crashk_res.start != crashk_res.end)
1470 request_resource(res, &crashk_res);
1471 #ifdef CONFIG_XEN
1472 xen_machine_kexec_register_resources(res);
1473 #endif
1474 #endif
1479 /*
1480 * Locate a unused range of the physical address space below 4G which
1481 * can be used for PCI mappings.
1482 */
1483 static void __init
1484 e820_setup_gap(struct e820entry *e820, int nr_map)
1486 unsigned long gapstart, gapsize, round;
1487 unsigned long long last;
1488 int i;
1490 /*
1491 * Search for the bigest gap in the low 32 bits of the e820
1492 * memory space.
1493 */
1494 last = 0x100000000ull;
1495 gapstart = 0x10000000;
1496 gapsize = 0x400000;
1497 i = nr_map;
1498 while (--i >= 0) {
1499 unsigned long long start = e820[i].addr;
1500 unsigned long long end = start + e820[i].size;
1502 /*
1503 * Since "last" is at most 4GB, we know we'll
1504 * fit in 32 bits if this condition is true
1505 */
1506 if (last > end) {
1507 unsigned long gap = last - end;
1509 if (gap > gapsize) {
1510 gapsize = gap;
1511 gapstart = end;
1514 if (start < last)
1515 last = start;
1518 /*
1519 * See how much we want to round up: start off with
1520 * rounding to the next 1MB area.
1521 */
1522 round = 0x100000;
1523 while ((gapsize >> 4) > round)
1524 round += round;
1525 /* Fun with two's complement */
1526 pci_mem_start = (gapstart + round) & -round;
1528 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1529 pci_mem_start, gapstart, gapsize);
1532 /*
1533 * Request address space for all standard resources
1535 * This is called just before pcibios_init(), which is also a
1536 * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
1537 */
1538 static int __init request_standard_resources(void)
1540 int i;
1542 /* Nothing to do if not running in dom0. */
1543 if (!is_initial_xendomain())
1544 return 0;
1546 printk("Setting up standard PCI resources\n");
1547 #ifdef CONFIG_XEN
1548 legacy_init_iomem_resources(machine_e820.map, machine_e820.nr_map,
1549 &code_resource, &data_resource);
1550 #else
1551 if (efi_enabled)
1552 efi_initialize_iomem_resources(&code_resource, &data_resource);
1553 else
1554 legacy_init_iomem_resources(e820.map, e820.nr_map,
1555 &code_resource, &data_resource);
1556 #endif
1558 /* EFI systems may still have VGA */
1559 request_resource(&iomem_resource, &video_ram_resource);
1561 /* request I/O space for devices used on all i[345]86 PCs */
1562 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1563 request_resource(&ioport_resource, &standard_io_resources[i]);
1564 return 0;
1567 subsys_initcall(request_standard_resources);
1569 static void __init register_memory(void)
1571 #ifdef CONFIG_XEN
1572 if (is_initial_xendomain()) {
1573 struct xen_memory_map memmap;
1575 memmap.nr_entries = E820MAX;
1576 set_xen_guest_handle(memmap.buffer, machine_e820.map);
1578 if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
1579 BUG();
1581 machine_e820.nr_map = memmap.nr_entries;
1582 e820_setup_gap(machine_e820.map, machine_e820.nr_map);
1584 else
1585 #endif
1586 e820_setup_gap(e820.map, e820.nr_map);
1589 #ifdef CONFIG_MCA
1590 static void set_mca_bus(int x)
1592 MCA_bus = x;
1594 #else
1595 static void set_mca_bus(int x) { }
1596 #endif
1598 /*
1599 * Determine if we were loaded by an EFI loader. If so, then we have also been
1600 * passed the efi memmap, systab, etc., so we should use these data structures
1601 * for initialization. Note, the efi init code path is determined by the
1602 * global efi_enabled. This allows the same kernel image to be used on existing
1603 * systems (with a traditional BIOS) as well as on EFI systems.
1604 */
1605 void __init setup_arch(char **cmdline_p)
1607 int i, j, k, fpp;
1608 struct physdev_set_iopl set_iopl;
1609 unsigned long max_low_pfn;
1610 unsigned long p2m_pages;
1612 /* Force a quick death if the kernel panics (not domain 0). */
1613 extern int panic_timeout;
1614 if (!panic_timeout && !is_initial_xendomain())
1615 panic_timeout = 1;
1617 /* Register a call for panic conditions. */
1618 atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
1620 WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
1621 VMASST_TYPE_4gb_segments));
1622 WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
1623 VMASST_TYPE_writable_pagetables));
1625 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1626 pre_setup_arch_hook();
1627 early_cpu_init();
1628 #ifdef CONFIG_SMP
1629 prefill_possible_map();
1630 #endif
1632 /*
1633 * FIXME: This isn't an official loader_type right
1634 * now but does currently work with elilo.
1635 * If we were configured as an EFI kernel, check to make
1636 * sure that we were loaded correctly from elilo and that
1637 * the system table is valid. If not, then initialize normally.
1638 */
1639 #ifdef CONFIG_EFI
1640 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1641 efi_enabled = 1;
1642 #endif
1644 /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
1645 properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
1646 */
1647 ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
1648 drive_info = DRIVE_INFO;
1649 screen_info = SCREEN_INFO;
1650 copy_edid();
1651 apm_info.bios = APM_BIOS_INFO;
1652 ist_info = IST_INFO;
1653 saved_videomode = VIDEO_MODE;
1654 if( SYS_DESC_TABLE.length != 0 ) {
1655 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1656 machine_id = SYS_DESC_TABLE.table[0];
1657 machine_submodel_id = SYS_DESC_TABLE.table[1];
1658 BIOS_revision = SYS_DESC_TABLE.table[2];
1660 bootloader_type = LOADER_TYPE;
1662 if (is_initial_xendomain()) {
1663 const struct dom0_vga_console_info *info =
1664 (void *)((char *)xen_start_info +
1665 xen_start_info->console.dom0.info_off);
1667 dom0_init_screen_info(info,
1668 xen_start_info->console.dom0.info_size);
1669 xen_start_info->console.domU.mfn = 0;
1670 xen_start_info->console.domU.evtchn = 0;
1671 } else
1672 screen_info.orig_video_isVGA = 0;
1674 #ifdef CONFIG_BLK_DEV_RAM
1675 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1676 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1677 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1678 #endif
1680 ARCH_SETUP
1681 if (efi_enabled)
1682 efi_init();
1683 else {
1684 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1685 print_memory_map(machine_specific_memory_setup());
1688 copy_edd();
1690 if (!MOUNT_ROOT_RDONLY)
1691 root_mountflags &= ~MS_RDONLY;
1692 init_mm.start_code = (unsigned long) _text;
1693 init_mm.end_code = (unsigned long) _etext;
1694 init_mm.end_data = (unsigned long) _edata;
1695 init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) +
1696 xen_start_info->nr_pt_frames) << PAGE_SHIFT;
1698 code_resource.start = virt_to_phys(_text);
1699 code_resource.end = virt_to_phys(_etext)-1;
1700 data_resource.start = virt_to_phys(_etext);
1701 data_resource.end = virt_to_phys(_edata)-1;
1703 parse_cmdline_early(cmdline_p);
1705 #ifdef CONFIG_EARLY_PRINTK
1707 char *s = strstr(*cmdline_p, "earlyprintk=");
1708 if (s) {
1709 setup_early_printk(strchr(s, '=') + 1);
1710 printk("early console enabled\n");
1713 #endif
1715 max_low_pfn = setup_memory();
1717 /*
1718 * NOTE: before this point _nobody_ is allowed to allocate
1719 * any memory using the bootmem allocator. Although the
1720 * alloctor is now initialised only the first 8Mb of the kernel
1721 * virtual address space has been mapped. All allocations before
1722 * paging_init() has completed must use the alloc_bootmem_low_pages()
1723 * variant (which allocates DMA'able memory) and care must be taken
1724 * not to exceed the 8Mb limit.
1725 */
1727 #ifdef CONFIG_SMP
1728 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1729 #endif
1730 paging_init();
1731 remapped_pgdat_init();
1732 sparse_init();
1733 zone_sizes_init();
1735 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1736 /*
1737 * Find and reserve possible boot-time SMP configuration:
1738 */
1739 find_smp_config();
1740 #endif
1742 p2m_pages = max_pfn;
1743 if (xen_start_info->nr_pages > max_pfn) {
1744 /*
1745 * the max_pfn was shrunk (probably by mem= or highmem=
1746 * kernel parameter); shrink reservation with the HV
1747 */
1748 struct xen_memory_reservation reservation = {
1749 .address_bits = 0,
1750 .extent_order = 0,
1751 .domid = DOMID_SELF
1752 };
1753 unsigned int difference;
1754 int ret;
1756 difference = xen_start_info->nr_pages - max_pfn;
1758 set_xen_guest_handle(reservation.extent_start,
1759 ((unsigned long *)xen_start_info->mfn_list) + max_pfn);
1760 reservation.nr_extents = difference;
1761 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
1762 &reservation);
1763 BUG_ON (ret != difference);
1765 else if (max_pfn > xen_start_info->nr_pages)
1766 p2m_pages = xen_start_info->nr_pages;
1768 /* Make sure we have a correctly sized P->M table. */
1769 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1770 phys_to_machine_mapping = alloc_bootmem_low_pages(
1771 max_pfn * sizeof(unsigned long));
1772 memset(phys_to_machine_mapping, ~0,
1773 max_pfn * sizeof(unsigned long));
1774 memcpy(phys_to_machine_mapping,
1775 (unsigned long *)xen_start_info->mfn_list,
1776 p2m_pages * sizeof(unsigned long));
1777 free_bootmem(
1778 __pa(xen_start_info->mfn_list),
1779 PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
1780 sizeof(unsigned long))));
1782 /*
1783 * Initialise the list of the frames that specify the list of
1784 * frames that make up the p2m table. Used by save/restore
1785 */
1786 pfn_to_mfn_frame_list_list = alloc_bootmem_low_pages(PAGE_SIZE);
1788 fpp = PAGE_SIZE/sizeof(unsigned long);
1789 for (i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++) {
1790 if ((j % fpp) == 0) {
1791 k++;
1792 BUG_ON(k>=16);
1793 pfn_to_mfn_frame_list[k] =
1794 alloc_bootmem_low_pages(PAGE_SIZE);
1795 pfn_to_mfn_frame_list_list[k] =
1796 virt_to_mfn(pfn_to_mfn_frame_list[k]);
1797 j=0;
1799 pfn_to_mfn_frame_list[k][j] =
1800 virt_to_mfn(&phys_to_machine_mapping[i]);
1802 HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
1803 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
1804 virt_to_mfn(pfn_to_mfn_frame_list_list);
1807 /* Mark all ISA DMA channels in-use - using them wouldn't work. */
1808 for (i = 0; i < MAX_DMA_CHANNELS; ++i)
1809 if (i != 4 && request_dma(i, "xen") != 0)
1810 BUG();
1812 /*
1813 * NOTE: at this point the bootmem allocator is fully available.
1814 */
1816 if (is_initial_xendomain())
1817 dmi_scan_machine();
1819 #ifdef CONFIG_X86_GENERICARCH
1820 generic_apic_probe(*cmdline_p);
1821 #endif
1822 if (efi_enabled)
1823 efi_map_memmap();
1825 set_iopl.iopl = 1;
1826 WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
1828 #ifdef CONFIG_ACPI
1829 if (!is_initial_xendomain()) {
1830 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
1831 acpi_disabled = 1;
1832 acpi_ht = 0;
1835 /*
1836 * Parse the ACPI tables for possible boot-time SMP configuration.
1837 */
1838 acpi_boot_table_init();
1839 #endif
1841 #ifdef CONFIG_X86_IO_APIC
1842 check_acpi_pci(); /* Checks more than just ACPI actually */
1843 #endif
1845 #ifdef CONFIG_ACPI
1846 acpi_boot_init();
1848 #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
1849 if (def_to_bigsmp)
1850 printk(KERN_WARNING "More than 8 CPUs detected and "
1851 "CONFIG_X86_PC cannot handle it.\nUse "
1852 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
1853 #endif
1854 #endif
1855 #ifdef CONFIG_X86_LOCAL_APIC
1856 if (smp_found_config)
1857 get_smp_config();
1858 #endif
1860 register_memory();
1862 if (is_initial_xendomain()) {
1863 #ifdef CONFIG_VT
1864 #if defined(CONFIG_VGA_CONSOLE)
1865 if (!efi_enabled ||
1866 (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1867 conswitchp = &vga_con;
1868 #elif defined(CONFIG_DUMMY_CONSOLE)
1869 conswitchp = &dummy_con;
1870 #endif
1871 #endif
1872 } else {
1873 #if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
1874 conswitchp = &dummy_con;
1875 #endif
1877 tsc_init();
1880 static int
1881 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
1883 HYPERVISOR_shutdown(SHUTDOWN_crash);
1884 /* we're never actually going to get here... */
1885 return NOTIFY_DONE;
1888 static __init int add_pcspkr(void)
1890 struct platform_device *pd;
1891 int ret;
1893 if (!is_initial_xendomain())
1894 return 0;
1896 pd = platform_device_alloc("pcspkr", -1);
1897 if (!pd)
1898 return -ENOMEM;
1900 ret = platform_device_add(pd);
1901 if (ret)
1902 platform_device_put(pd);
1904 return ret;
1906 device_initcall(add_pcspkr);
1908 /*
1909 * Local Variables:
1910 * mode:c
1911 * c-file-style:"k&r"
1912 * c-basic-offset:8
1913 * End:
1914 */