ia64/linux-2.6.18-xen.hg

view arch/i386/kernel/setup-xen.c @ 526:f128849f9e78

linux/x86: set up machine_e820 in copy_e820_map() for it to be usable
earlier (and also for being placed more logically).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Apr 22 14:41:51 2008 +0100 (2008-04-22)
parents f619448beab5
children 6d9e4b02bba4
line source
1 /*
2 * linux/arch/i386/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 *
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
10 *
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
13 *
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
16 *
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
19 *
20 */
22 /*
23 * This file handles the architecture-dependent parts of initialization
24 */
26 #include <linux/sched.h>
27 #include <linux/mm.h>
28 #include <linux/mmzone.h>
29 #include <linux/screen_info.h>
30 #include <linux/ioport.h>
31 #include <linux/acpi.h>
32 #include <linux/apm_bios.h>
33 #include <linux/initrd.h>
34 #include <linux/bootmem.h>
35 #include <linux/seq_file.h>
36 #include <linux/platform_device.h>
37 #include <linux/console.h>
38 #include <linux/mca.h>
39 #include <linux/root_dev.h>
40 #include <linux/highmem.h>
41 #include <linux/module.h>
42 #include <linux/efi.h>
43 #include <linux/init.h>
44 #include <linux/edd.h>
45 #include <linux/nodemask.h>
46 #include <linux/kernel.h>
47 #include <linux/percpu.h>
48 #include <linux/notifier.h>
49 #include <linux/kexec.h>
50 #include <linux/crash_dump.h>
51 #include <linux/dmi.h>
52 #include <linux/pfn.h>
54 #include <video/edid.h>
56 #include <asm/apic.h>
57 #include <asm/e820.h>
58 #include <asm/mpspec.h>
59 #include <asm/setup.h>
60 #include <asm/arch_hooks.h>
61 #include <asm/sections.h>
62 #include <asm/io_apic.h>
63 #include <asm/ist.h>
64 #include <asm/io.h>
65 #include <asm/hypervisor.h>
66 #include <xen/interface/physdev.h>
67 #include <xen/interface/memory.h>
68 #include <xen/features.h>
69 #include <xen/firmware.h>
70 #include <xen/xencons.h>
71 #include <setup_arch.h>
72 #include <bios_ebda.h>
74 #ifdef CONFIG_XEN
75 #include <xen/interface/kexec.h>
76 #endif
78 /* Forward Declaration. */
79 void __init find_max_pfn(void);
81 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
82 static struct notifier_block xen_panic_block = {
83 xen_panic_event, NULL, 0 /* try to go last */
84 };
86 extern char hypercall_page[PAGE_SIZE];
87 EXPORT_SYMBOL(hypercall_page);
89 int disable_pse __devinitdata = 0;
91 /*
92 * Machine setup..
93 */
95 #ifdef CONFIG_EFI
96 int efi_enabled = 0;
97 EXPORT_SYMBOL(efi_enabled);
98 #endif
100 /* cpu data as detected by the assembly code in head.S */
101 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
102 /* common cpu data for all cpus */
103 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
104 EXPORT_SYMBOL(boot_cpu_data);
106 unsigned long mmu_cr4_features;
108 #ifdef CONFIG_ACPI
109 int acpi_disabled = 0;
110 #else
111 int acpi_disabled = 1;
112 #endif
113 EXPORT_SYMBOL(acpi_disabled);
115 #ifdef CONFIG_ACPI
116 int __initdata acpi_force = 0;
117 extern acpi_interrupt_flags acpi_sci_flags;
118 #endif
120 /* for MCA, but anyone else can use it if they want */
121 unsigned int machine_id;
122 #ifdef CONFIG_MCA
123 EXPORT_SYMBOL(machine_id);
124 #endif
125 unsigned int machine_submodel_id;
126 unsigned int BIOS_revision;
127 unsigned int mca_pentium_flag;
129 /* For PCI or other memory-mapped resources */
130 unsigned long pci_mem_start = 0x10000000;
131 #ifdef CONFIG_PCI
132 EXPORT_SYMBOL(pci_mem_start);
133 #endif
135 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
136 int bootloader_type;
138 /* user-defined highmem size */
139 static unsigned int highmem_pages = -1;
141 /*
142 * Setup options
143 */
144 struct drive_info_struct { char dummy[32]; } drive_info;
145 #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
146 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
147 EXPORT_SYMBOL(drive_info);
148 #endif
149 struct screen_info screen_info;
150 EXPORT_SYMBOL(screen_info);
151 struct apm_info apm_info;
152 EXPORT_SYMBOL(apm_info);
153 struct sys_desc_table_struct {
154 unsigned short length;
155 unsigned char table[0];
156 };
157 struct edid_info edid_info;
158 EXPORT_SYMBOL_GPL(edid_info);
159 #ifndef CONFIG_XEN
160 #define copy_edid() (edid_info = EDID_INFO)
161 #endif
162 struct ist_info ist_info;
163 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
164 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
165 EXPORT_SYMBOL(ist_info);
166 #endif
167 struct e820map e820;
168 #ifdef CONFIG_XEN
169 struct e820map machine_e820;
170 #endif
172 extern void early_cpu_init(void);
173 extern void generic_apic_probe(char *);
174 extern int root_mountflags;
176 unsigned long saved_videomode;
178 #define RAMDISK_IMAGE_START_MASK 0x07FF
179 #define RAMDISK_PROMPT_FLAG 0x8000
180 #define RAMDISK_LOAD_FLAG 0x4000
182 static char command_line[COMMAND_LINE_SIZE];
184 unsigned char __initdata boot_params[PARAM_SIZE];
186 static struct resource data_resource = {
187 .name = "Kernel data",
188 .start = 0,
189 .end = 0,
190 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
191 };
193 static struct resource code_resource = {
194 .name = "Kernel code",
195 .start = 0,
196 .end = 0,
197 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
198 };
200 static struct resource system_rom_resource = {
201 .name = "System ROM",
202 .start = 0xf0000,
203 .end = 0xfffff,
204 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
205 };
207 static struct resource extension_rom_resource = {
208 .name = "Extension ROM",
209 .start = 0xe0000,
210 .end = 0xeffff,
211 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
212 };
214 static struct resource adapter_rom_resources[] = { {
215 .name = "Adapter ROM",
216 .start = 0xc8000,
217 .end = 0,
218 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
219 }, {
220 .name = "Adapter ROM",
221 .start = 0,
222 .end = 0,
223 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
224 }, {
225 .name = "Adapter ROM",
226 .start = 0,
227 .end = 0,
228 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
229 }, {
230 .name = "Adapter ROM",
231 .start = 0,
232 .end = 0,
233 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
234 }, {
235 .name = "Adapter ROM",
236 .start = 0,
237 .end = 0,
238 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
239 }, {
240 .name = "Adapter ROM",
241 .start = 0,
242 .end = 0,
243 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
244 } };
246 #define ADAPTER_ROM_RESOURCES \
247 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
249 static struct resource video_rom_resource = {
250 .name = "Video ROM",
251 .start = 0xc0000,
252 .end = 0xc7fff,
253 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
254 };
256 static struct resource video_ram_resource = {
257 .name = "Video RAM area",
258 .start = 0xa0000,
259 .end = 0xbffff,
260 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
261 };
263 static struct resource standard_io_resources[] = { {
264 .name = "dma1",
265 .start = 0x0000,
266 .end = 0x001f,
267 .flags = IORESOURCE_BUSY | IORESOURCE_IO
268 }, {
269 .name = "pic1",
270 .start = 0x0020,
271 .end = 0x0021,
272 .flags = IORESOURCE_BUSY | IORESOURCE_IO
273 }, {
274 .name = "timer0",
275 .start = 0x0040,
276 .end = 0x0043,
277 .flags = IORESOURCE_BUSY | IORESOURCE_IO
278 }, {
279 .name = "timer1",
280 .start = 0x0050,
281 .end = 0x0053,
282 .flags = IORESOURCE_BUSY | IORESOURCE_IO
283 }, {
284 .name = "keyboard",
285 .start = 0x0060,
286 .end = 0x006f,
287 .flags = IORESOURCE_BUSY | IORESOURCE_IO
288 }, {
289 .name = "dma page reg",
290 .start = 0x0080,
291 .end = 0x008f,
292 .flags = IORESOURCE_BUSY | IORESOURCE_IO
293 }, {
294 .name = "pic2",
295 .start = 0x00a0,
296 .end = 0x00a1,
297 .flags = IORESOURCE_BUSY | IORESOURCE_IO
298 }, {
299 .name = "dma2",
300 .start = 0x00c0,
301 .end = 0x00df,
302 .flags = IORESOURCE_BUSY | IORESOURCE_IO
303 }, {
304 .name = "fpu",
305 .start = 0x00f0,
306 .end = 0x00ff,
307 .flags = IORESOURCE_BUSY | IORESOURCE_IO
308 } };
310 #define STANDARD_IO_RESOURCES \
311 (sizeof standard_io_resources / sizeof standard_io_resources[0])
313 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
315 static int __init romchecksum(unsigned char *rom, unsigned long length)
316 {
317 unsigned char *p, sum = 0;
319 for (p = rom; p < rom + length; p++)
320 sum += *p;
321 return sum == 0;
322 }
324 static void __init probe_roms(void)
325 {
326 unsigned long start, length, upper;
327 unsigned char *rom;
328 int i;
330 #ifdef CONFIG_XEN
331 /* Nothing to do if not running in dom0. */
332 if (!is_initial_xendomain())
333 return;
334 #endif
336 /* video rom */
337 upper = adapter_rom_resources[0].start;
338 for (start = video_rom_resource.start; start < upper; start += 2048) {
339 rom = isa_bus_to_virt(start);
340 if (!romsignature(rom))
341 continue;
343 video_rom_resource.start = start;
345 /* 0 < length <= 0x7f * 512, historically */
346 length = rom[2] * 512;
348 /* if checksum okay, trust length byte */
349 if (length && romchecksum(rom, length))
350 video_rom_resource.end = start + length - 1;
352 request_resource(&iomem_resource, &video_rom_resource);
353 break;
354 }
356 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
357 if (start < upper)
358 start = upper;
360 /* system rom */
361 request_resource(&iomem_resource, &system_rom_resource);
362 upper = system_rom_resource.start;
364 /* check for extension rom (ignore length byte!) */
365 rom = isa_bus_to_virt(extension_rom_resource.start);
366 if (romsignature(rom)) {
367 length = extension_rom_resource.end - extension_rom_resource.start + 1;
368 if (romchecksum(rom, length)) {
369 request_resource(&iomem_resource, &extension_rom_resource);
370 upper = extension_rom_resource.start;
371 }
372 }
374 /* check for adapter roms on 2k boundaries */
375 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
376 rom = isa_bus_to_virt(start);
377 if (!romsignature(rom))
378 continue;
380 /* 0 < length <= 0x7f * 512, historically */
381 length = rom[2] * 512;
383 /* but accept any length that fits if checksum okay */
384 if (!length || start + length > upper || !romchecksum(rom, length))
385 continue;
387 adapter_rom_resources[i].start = start;
388 adapter_rom_resources[i].end = start + length - 1;
389 request_resource(&iomem_resource, &adapter_rom_resources[i]);
391 start = adapter_rom_resources[i++].end & ~2047UL;
392 }
393 }
395 /*
396 * Point at the empty zero page to start with. We map the real shared_info
397 * page as soon as fixmap is up and running.
398 */
399 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
400 EXPORT_SYMBOL(HYPERVISOR_shared_info);
402 unsigned long *phys_to_machine_mapping;
403 unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[16];
404 EXPORT_SYMBOL(phys_to_machine_mapping);
406 /* Raw start-of-day parameters from the hypervisor. */
407 start_info_t *xen_start_info;
408 EXPORT_SYMBOL(xen_start_info);
410 void __init add_memory_region(unsigned long long start,
411 unsigned long long size, int type)
412 {
413 int x;
415 if (!efi_enabled) {
416 x = e820.nr_map;
418 if (x == E820MAX) {
419 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
420 return;
421 }
423 e820.map[x].addr = start;
424 e820.map[x].size = size;
425 e820.map[x].type = type;
426 e820.nr_map++;
427 }
428 } /* add_memory_region */
430 static void __init limit_regions(unsigned long long size)
431 {
432 unsigned long long current_addr = 0;
433 int i;
435 if (efi_enabled) {
436 efi_memory_desc_t *md;
437 void *p;
439 for (p = memmap.map, i = 0; p < memmap.map_end;
440 p += memmap.desc_size, i++) {
441 md = p;
442 current_addr = md->phys_addr + (md->num_pages << 12);
443 if (md->type == EFI_CONVENTIONAL_MEMORY) {
444 if (current_addr >= size) {
445 md->num_pages -=
446 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
447 memmap.nr_map = i + 1;
448 return;
449 }
450 }
451 }
452 }
453 for (i = 0; i < e820.nr_map; i++) {
454 current_addr = e820.map[i].addr + e820.map[i].size;
455 if (current_addr < size)
456 continue;
458 if (e820.map[i].type != E820_RAM)
459 continue;
461 if (e820.map[i].addr >= size) {
462 /*
463 * This region starts past the end of the
464 * requested size, skip it completely.
465 */
466 e820.nr_map = i;
467 } else {
468 e820.nr_map = i + 1;
469 e820.map[i].size -= current_addr - size;
470 }
471 return;
472 }
473 #ifdef CONFIG_XEN
474 if (i==e820.nr_map && current_addr < size) {
475 /*
476 * The e820 map finished before our requested size so
477 * extend the final entry to the requested address.
478 */
479 --i;
480 if (e820.map[i].type == E820_RAM)
481 e820.map[i].size -= current_addr - size;
482 else
483 add_memory_region(current_addr, size - current_addr, E820_RAM);
484 }
485 #endif
486 }
488 #define E820_DEBUG 1
490 static void __init print_memory_map(char *who)
491 {
492 int i;
494 for (i = 0; i < e820.nr_map; i++) {
495 printk(" %s: %016Lx - %016Lx ", who,
496 e820.map[i].addr,
497 e820.map[i].addr + e820.map[i].size);
498 switch (e820.map[i].type) {
499 case E820_RAM: printk("(usable)\n");
500 break;
501 case E820_RESERVED:
502 printk("(reserved)\n");
503 break;
504 case E820_ACPI:
505 printk("(ACPI data)\n");
506 break;
507 case E820_NVS:
508 printk("(ACPI NVS)\n");
509 break;
510 default: printk("type %lu\n", e820.map[i].type);
511 break;
512 }
513 }
514 }
516 /*
517 * Sanitize the BIOS e820 map.
518 *
519 * Some e820 responses include overlapping entries. The following
520 * replaces the original e820 map with a new one, removing overlaps.
521 *
522 */
523 struct change_member {
524 struct e820entry *pbios; /* pointer to original bios entry */
525 unsigned long long addr; /* address for this change point */
526 };
527 static struct change_member change_point_list[2*E820MAX] __initdata;
528 static struct change_member *change_point[2*E820MAX] __initdata;
529 static struct e820entry *overlap_list[E820MAX] __initdata;
530 static struct e820entry new_bios[E820MAX] __initdata;
532 int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
533 {
534 struct change_member *change_tmp;
535 unsigned long current_type, last_type;
536 unsigned long long last_addr;
537 int chgidx, still_changing;
538 int overlap_entries;
539 int new_bios_entry;
540 int old_nr, new_nr, chg_nr;
541 int i;
543 /*
544 Visually we're performing the following (1,2,3,4 = memory types)...
546 Sample memory map (w/overlaps):
547 ____22__________________
548 ______________________4_
549 ____1111________________
550 _44_____________________
551 11111111________________
552 ____________________33__
553 ___________44___________
554 __________33333_________
555 ______________22________
556 ___________________2222_
557 _________111111111______
558 _____________________11_
559 _________________4______
561 Sanitized equivalent (no overlap):
562 1_______________________
563 _44_____________________
564 ___1____________________
565 ____22__________________
566 ______11________________
567 _________1______________
568 __________3_____________
569 ___________44___________
570 _____________33_________
571 _______________2________
572 ________________1_______
573 _________________4______
574 ___________________2____
575 ____________________33__
576 ______________________4_
577 */
579 /* if there's only one memory region, don't bother */
580 if (*pnr_map < 2)
581 return -1;
583 old_nr = *pnr_map;
585 /* bail out if we find any unreasonable addresses in bios map */
586 for (i=0; i<old_nr; i++)
587 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
588 return -1;
590 /* create pointers for initial change-point information (for sorting) */
591 for (i=0; i < 2*old_nr; i++)
592 change_point[i] = &change_point_list[i];
594 /* record all known change-points (starting and ending addresses),
595 omitting those that are for empty memory regions */
596 chgidx = 0;
597 for (i=0; i < old_nr; i++) {
598 if (biosmap[i].size != 0) {
599 change_point[chgidx]->addr = biosmap[i].addr;
600 change_point[chgidx++]->pbios = &biosmap[i];
601 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
602 change_point[chgidx++]->pbios = &biosmap[i];
603 }
604 }
605 chg_nr = chgidx; /* true number of change-points */
607 /* sort change-point list by memory addresses (low -> high) */
608 still_changing = 1;
609 while (still_changing) {
610 still_changing = 0;
611 for (i=1; i < chg_nr; i++) {
612 /* if <current_addr> > <last_addr>, swap */
613 /* or, if current=<start_addr> & last=<end_addr>, swap */
614 if ((change_point[i]->addr < change_point[i-1]->addr) ||
615 ((change_point[i]->addr == change_point[i-1]->addr) &&
616 (change_point[i]->addr == change_point[i]->pbios->addr) &&
617 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
618 )
619 {
620 change_tmp = change_point[i];
621 change_point[i] = change_point[i-1];
622 change_point[i-1] = change_tmp;
623 still_changing=1;
624 }
625 }
626 }
628 /* create a new bios memory map, removing overlaps */
629 overlap_entries=0; /* number of entries in the overlap table */
630 new_bios_entry=0; /* index for creating new bios map entries */
631 last_type = 0; /* start with undefined memory type */
632 last_addr = 0; /* start with 0 as last starting address */
633 /* loop through change-points, determining affect on the new bios map */
634 for (chgidx=0; chgidx < chg_nr; chgidx++)
635 {
636 /* keep track of all overlapping bios entries */
637 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
638 {
639 /* add map entry to overlap list (> 1 entry implies an overlap) */
640 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
641 }
642 else
643 {
644 /* remove entry from list (order independent, so swap with last) */
645 for (i=0; i<overlap_entries; i++)
646 {
647 if (overlap_list[i] == change_point[chgidx]->pbios)
648 overlap_list[i] = overlap_list[overlap_entries-1];
649 }
650 overlap_entries--;
651 }
652 /* if there are overlapping entries, decide which "type" to use */
653 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
654 current_type = 0;
655 for (i=0; i<overlap_entries; i++)
656 if (overlap_list[i]->type > current_type)
657 current_type = overlap_list[i]->type;
658 /* continue building up new bios map based on this information */
659 if (current_type != last_type) {
660 if (last_type != 0) {
661 new_bios[new_bios_entry].size =
662 change_point[chgidx]->addr - last_addr;
663 /* move forward only if the new size was non-zero */
664 if (new_bios[new_bios_entry].size != 0)
665 if (++new_bios_entry >= E820MAX)
666 break; /* no more space left for new bios entries */
667 }
668 if (current_type != 0) {
669 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
670 new_bios[new_bios_entry].type = current_type;
671 last_addr=change_point[chgidx]->addr;
672 }
673 last_type = current_type;
674 }
675 }
676 new_nr = new_bios_entry; /* retain count for new bios entries */
678 /* copy new bios mapping into original location */
679 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
680 *pnr_map = new_nr;
682 return 0;
683 }
685 /*
686 * Copy the BIOS e820 map into a safe place.
687 *
688 * Sanity-check it while we're at it..
689 *
690 * If we're lucky and live on a modern system, the setup code
691 * will have given us a memory map that we can use to properly
692 * set up memory. If we aren't, we'll fake a memory map.
693 *
694 * We check to see that the memory map contains at least 2 elements
695 * before we'll use it, because the detection code in setup.S may
696 * not be perfect and most every PC known to man has two memory
697 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
698 * thinkpad 560x, for example, does not cooperate with the memory
699 * detection code.)
700 */
701 int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
702 {
703 #ifndef CONFIG_XEN
704 /* Only one memory region (or negative)? Ignore it */
705 if (nr_map < 2)
706 return -1;
707 #else
708 BUG_ON(nr_map < 1);
709 #endif
711 do {
712 unsigned long long start = biosmap->addr;
713 unsigned long long size = biosmap->size;
714 unsigned long long end = start + size;
715 unsigned long type = biosmap->type;
717 /* Overflow in 64 bits? Ignore the memory map. */
718 if (start > end)
719 return -1;
721 #ifndef CONFIG_XEN
722 /*
723 * Some BIOSes claim RAM in the 640k - 1M region.
724 * Not right. Fix it up.
725 */
726 if (type == E820_RAM) {
727 if (start < 0x100000ULL && end > 0xA0000ULL) {
728 if (start < 0xA0000ULL)
729 add_memory_region(start, 0xA0000ULL-start, type);
730 if (end <= 0x100000ULL)
731 continue;
732 start = 0x100000ULL;
733 size = end - start;
734 }
735 }
736 #endif
737 add_memory_region(start, size, type);
738 } while (biosmap++,--nr_map);
740 #ifdef CONFIG_XEN
741 if (is_initial_xendomain()) {
742 struct xen_memory_map memmap;
744 memmap.nr_entries = E820MAX;
745 set_xen_guest_handle(memmap.buffer, machine_e820.map);
747 if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
748 BUG();
749 machine_e820.nr_map = memmap.nr_entries;
750 } else
751 machine_e820 = e820;
752 #endif
754 return 0;
755 }
757 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
758 struct edd edd;
759 #ifdef CONFIG_EDD_MODULE
760 EXPORT_SYMBOL(edd);
761 #endif
762 #ifndef CONFIG_XEN
763 /**
764 * copy_edd() - Copy the BIOS EDD information
765 * from boot_params into a safe place.
766 *
767 */
768 static inline void copy_edd(void)
769 {
770 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
771 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
772 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
773 edd.edd_info_nr = EDD_NR;
774 }
775 #endif
776 #else
777 static inline void copy_edd(void)
778 {
779 }
780 #endif
782 static void __init parse_cmdline_early (char ** cmdline_p)
783 {
784 char c = ' ', *to = command_line, *from = saved_command_line;
785 int len = 0, max_cmdline;
786 int userdef = 0;
788 if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
789 max_cmdline = COMMAND_LINE_SIZE;
790 memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
791 /* Save unparsed command line copy for /proc/cmdline */
792 saved_command_line[max_cmdline-1] = '\0';
794 for (;;) {
795 if (c != ' ')
796 goto next_char;
797 /*
798 * "mem=nopentium" disables the 4MB page tables.
799 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
800 * to <mem>, overriding the bios size.
801 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
802 * <start> to <start>+<mem>, overriding the bios size.
803 *
804 * HPA tells me bootloaders need to parse mem=, so no new
805 * option should be mem= [also see Documentation/i386/boot.txt]
806 */
807 if (!memcmp(from, "mem=", 4)) {
808 if (to != command_line)
809 to--;
810 if (!memcmp(from+4, "nopentium", 9)) {
811 from += 9+4;
812 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
813 disable_pse = 1;
814 } else {
815 /* If the user specifies memory size, we
816 * limit the BIOS-provided memory map to
817 * that size. exactmap can be used to specify
818 * the exact map. mem=number can be used to
819 * trim the existing memory map.
820 */
821 unsigned long long mem_size;
823 mem_size = memparse(from+4, &from);
824 limit_regions(mem_size);
825 userdef=1;
826 }
827 }
829 else if (!memcmp(from, "memmap=", 7)) {
830 if (to != command_line)
831 to--;
832 if (!memcmp(from+7, "exactmap", 8)) {
833 #ifdef CONFIG_CRASH_DUMP
834 /* If we are doing a crash dump, we
835 * still need to know the real mem
836 * size before original memory map is
837 * reset.
838 */
839 find_max_pfn();
840 saved_max_pfn = max_pfn;
841 #endif
842 from += 8+7;
843 e820.nr_map = 0;
844 userdef = 1;
845 } else {
846 /* If the user specifies memory size, we
847 * limit the BIOS-provided memory map to
848 * that size. exactmap can be used to specify
849 * the exact map. mem=number can be used to
850 * trim the existing memory map.
851 */
852 unsigned long long start_at, mem_size;
854 mem_size = memparse(from+7, &from);
855 if (*from == '@') {
856 start_at = memparse(from+1, &from);
857 add_memory_region(start_at, mem_size, E820_RAM);
858 } else if (*from == '#') {
859 start_at = memparse(from+1, &from);
860 add_memory_region(start_at, mem_size, E820_ACPI);
861 } else if (*from == '$') {
862 start_at = memparse(from+1, &from);
863 add_memory_region(start_at, mem_size, E820_RESERVED);
864 } else {
865 limit_regions(mem_size);
866 userdef=1;
867 }
868 }
869 }
871 else if (!memcmp(from, "noexec=", 7))
872 noexec_setup(from + 7);
875 #ifdef CONFIG_X86_MPPARSE
876 /*
877 * If the BIOS enumerates physical processors before logical,
878 * maxcpus=N at enumeration-time can be used to disable HT.
879 */
880 else if (!memcmp(from, "maxcpus=", 8)) {
881 extern unsigned int maxcpus;
883 maxcpus = simple_strtoul(from + 8, NULL, 0);
884 }
885 #endif
887 #ifdef CONFIG_ACPI
888 /* "acpi=off" disables both ACPI table parsing and interpreter */
889 else if (!memcmp(from, "acpi=off", 8)) {
890 disable_acpi();
891 }
893 /* acpi=force to over-ride black-list */
894 else if (!memcmp(from, "acpi=force", 10)) {
895 acpi_force = 1;
896 acpi_ht = 1;
897 acpi_disabled = 0;
898 }
900 /* acpi=strict disables out-of-spec workarounds */
901 else if (!memcmp(from, "acpi=strict", 11)) {
902 acpi_strict = 1;
903 }
905 /* Limit ACPI just to boot-time to enable HT */
906 else if (!memcmp(from, "acpi=ht", 7)) {
907 if (!acpi_force)
908 disable_acpi();
909 acpi_ht = 1;
910 }
912 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
913 else if (!memcmp(from, "pci=noacpi", 10)) {
914 acpi_disable_pci();
915 }
916 /* "acpi=noirq" disables ACPI interrupt routing */
917 else if (!memcmp(from, "acpi=noirq", 10)) {
918 acpi_noirq_set();
919 }
921 else if (!memcmp(from, "acpi_sci=edge", 13))
922 acpi_sci_flags.trigger = 1;
924 else if (!memcmp(from, "acpi_sci=level", 14))
925 acpi_sci_flags.trigger = 3;
927 else if (!memcmp(from, "acpi_sci=high", 13))
928 acpi_sci_flags.polarity = 1;
930 else if (!memcmp(from, "acpi_sci=low", 12))
931 acpi_sci_flags.polarity = 3;
933 #ifdef CONFIG_X86_IO_APIC
934 else if (!memcmp(from, "acpi_skip_timer_override", 24))
935 acpi_skip_timer_override = 1;
937 if (!memcmp(from, "disable_timer_pin_1", 19))
938 disable_timer_pin_1 = 1;
939 if (!memcmp(from, "enable_timer_pin_1", 18))
940 disable_timer_pin_1 = -1;
942 /* disable IO-APIC */
943 else if (!memcmp(from, "noapic", 6))
944 disable_ioapic_setup();
945 #endif /* CONFIG_X86_IO_APIC */
946 #endif /* CONFIG_ACPI */
948 #ifdef CONFIG_X86_LOCAL_APIC
949 /* enable local APIC */
950 else if (!memcmp(from, "lapic", 5))
951 lapic_enable();
953 /* disable local APIC */
954 else if (!memcmp(from, "nolapic", 6))
955 lapic_disable();
956 #endif /* CONFIG_X86_LOCAL_APIC */
958 #ifdef CONFIG_KEXEC
959 /* crashkernel=size@addr specifies the location to reserve for
960 * a crash kernel. By reserving this memory we guarantee
961 * that linux never set's it up as a DMA target.
962 * Useful for holding code to do something appropriate
963 * after a kernel panic.
964 */
965 else if (!memcmp(from, "crashkernel=", 12)) {
966 #ifndef CONFIG_XEN
967 unsigned long size, base;
968 size = memparse(from+12, &from);
969 if (*from == '@') {
970 base = memparse(from+1, &from);
971 /* FIXME: Do I want a sanity check
972 * to validate the memory range?
973 */
974 crashk_res.start = base;
975 crashk_res.end = base + size - 1;
976 }
977 #else
978 printk("Ignoring crashkernel command line, "
979 "parameter will be supplied by xen\n");
980 #endif
981 }
982 #endif
983 #ifdef CONFIG_PROC_VMCORE
984 /* elfcorehdr= specifies the location of elf core header
985 * stored by the crashed kernel.
986 */
987 else if (!memcmp(from, "elfcorehdr=", 11))
988 elfcorehdr_addr = memparse(from+11, &from);
989 #endif
991 /*
992 * highmem=size forces highmem to be exactly 'size' bytes.
993 * This works even on boxes that have no highmem otherwise.
994 * This also works to reduce highmem size on bigger boxes.
995 */
996 else if (!memcmp(from, "highmem=", 8))
997 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
999 /*
1000 * vmalloc=size forces the vmalloc area to be exactly 'size'
1001 * bytes. This can be used to increase (or decrease) the
1002 * vmalloc area - the default is 128m.
1003 */
1004 else if (!memcmp(from, "vmalloc=", 8))
1005 __VMALLOC_RESERVE = memparse(from+8, &from);
1007 next_char:
1008 c = *(from++);
1009 if (!c)
1010 break;
1011 if (COMMAND_LINE_SIZE <= ++len)
1012 break;
1013 *(to++) = c;
1015 *to = '\0';
1016 *cmdline_p = command_line;
1017 if (userdef) {
1018 printk(KERN_INFO "user-defined physical RAM map:\n");
1019 print_memory_map("user");
1023 /*
1024 * Callback for efi_memory_walk.
1025 */
1026 static int __init
1027 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
1029 unsigned long *max_pfn = arg, pfn;
1031 if (start < end) {
1032 pfn = PFN_UP(end -1);
1033 if (pfn > *max_pfn)
1034 *max_pfn = pfn;
1036 return 0;
1039 static int __init
1040 efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
1042 memory_present(0, start, end);
1043 return 0;
1046 /*
1047 * This function checks if any part of the range <start,end> is mapped
1048 * with type.
1049 */
1050 int
1051 e820_any_mapped(u64 start, u64 end, unsigned type)
1053 int i;
1055 #ifndef CONFIG_XEN
1056 for (i = 0; i < e820.nr_map; i++) {
1057 const struct e820entry *ei = &e820.map[i];
1058 #else
1059 if (!is_initial_xendomain())
1060 return 0;
1061 for (i = 0; i < machine_e820.nr_map; ++i) {
1062 const struct e820entry *ei = &machine_e820.map[i];
1063 #endif
1065 if (type && ei->type != type)
1066 continue;
1067 if (ei->addr >= end || ei->addr + ei->size <= start)
1068 continue;
1069 return 1;
1071 return 0;
1073 EXPORT_SYMBOL_GPL(e820_any_mapped);
1075 /*
1076 * This function checks if the entire range <start,end> is mapped with type.
1078 * Note: this function only works correct if the e820 table is sorted and
1079 * not-overlapping, which is the case
1080 */
1081 int __init
1082 e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
1084 u64 start = s;
1085 u64 end = e;
1086 int i;
1088 #ifndef CONFIG_XEN
1089 for (i = 0; i < e820.nr_map; i++) {
1090 struct e820entry *ei = &e820.map[i];
1091 #else
1092 if (!is_initial_xendomain())
1093 return 0;
1094 for (i = 0; i < machine_e820.nr_map; ++i) {
1095 const struct e820entry *ei = &machine_e820.map[i];
1096 #endif
1097 if (type && ei->type != type)
1098 continue;
1099 /* is the region (part) in overlap with the current region ?*/
1100 if (ei->addr >= end || ei->addr + ei->size <= start)
1101 continue;
1102 /* if the region is at the beginning of <start,end> we move
1103 * start to the end of the region since it's ok until there
1104 */
1105 if (ei->addr <= start)
1106 start = ei->addr + ei->size;
1107 /* if start is now at or beyond end, we're done, full
1108 * coverage */
1109 if (start >= end)
1110 return 1; /* we're done */
1112 return 0;
1115 /*
1116 * Find the highest page frame number we have available
1117 */
1118 void __init find_max_pfn(void)
1120 int i;
1122 max_pfn = 0;
1123 if (efi_enabled) {
1124 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
1125 efi_memmap_walk(efi_memory_present_wrapper, NULL);
1126 return;
1129 for (i = 0; i < e820.nr_map; i++) {
1130 unsigned long start, end;
1131 /* RAM? */
1132 if (e820.map[i].type != E820_RAM)
1133 continue;
1134 start = PFN_UP(e820.map[i].addr);
1135 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1136 if (start >= end)
1137 continue;
1138 if (end > max_pfn)
1139 max_pfn = end;
1140 memory_present(0, start, end);
1144 /*
1145 * Determine low and high memory ranges:
1146 */
1147 unsigned long __init find_max_low_pfn(void)
1149 unsigned long max_low_pfn;
1151 max_low_pfn = max_pfn;
1152 if (max_low_pfn > MAXMEM_PFN) {
1153 if (highmem_pages == -1)
1154 highmem_pages = max_pfn - MAXMEM_PFN;
1155 if (highmem_pages + MAXMEM_PFN < max_pfn)
1156 max_pfn = MAXMEM_PFN + highmem_pages;
1157 if (highmem_pages + MAXMEM_PFN > max_pfn) {
1158 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
1159 highmem_pages = 0;
1161 max_low_pfn = MAXMEM_PFN;
1162 #ifndef CONFIG_HIGHMEM
1163 /* Maximum memory usable is what is directly addressable */
1164 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
1165 MAXMEM>>20);
1166 if (max_pfn > MAX_NONPAE_PFN)
1167 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1168 else
1169 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
1170 max_pfn = MAXMEM_PFN;
1171 #else /* !CONFIG_HIGHMEM */
1172 #ifndef CONFIG_X86_PAE
1173 if (max_pfn > MAX_NONPAE_PFN) {
1174 max_pfn = MAX_NONPAE_PFN;
1175 printk(KERN_WARNING "Warning only 4GB will be used.\n");
1176 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1178 #endif /* !CONFIG_X86_PAE */
1179 #endif /* !CONFIG_HIGHMEM */
1180 } else {
1181 if (highmem_pages == -1)
1182 highmem_pages = 0;
1183 #ifdef CONFIG_HIGHMEM
1184 if (highmem_pages >= max_pfn) {
1185 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
1186 highmem_pages = 0;
1188 if (highmem_pages) {
1189 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
1190 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
1191 highmem_pages = 0;
1193 max_low_pfn -= highmem_pages;
1195 #else
1196 if (highmem_pages)
1197 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1198 #endif
1200 return max_low_pfn;
1203 /*
1204 * Free all available memory for boot time allocation. Used
1205 * as a callback function by efi_memory_walk()
1206 */
1208 static int __init
1209 free_available_memory(unsigned long start, unsigned long end, void *arg)
1211 /* check max_low_pfn */
1212 if (start >= (max_low_pfn << PAGE_SHIFT))
1213 return 0;
1214 if (end >= (max_low_pfn << PAGE_SHIFT))
1215 end = max_low_pfn << PAGE_SHIFT;
1216 if (start < end)
1217 free_bootmem(start, end - start);
1219 return 0;
1221 /*
1222 * Register fully available low RAM pages with the bootmem allocator.
1223 */
1224 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1226 int i;
1228 if (efi_enabled) {
1229 efi_memmap_walk(free_available_memory, NULL);
1230 return;
1232 for (i = 0; i < e820.nr_map; i++) {
1233 unsigned long curr_pfn, last_pfn, size;
1234 /*
1235 * Reserve usable low memory
1236 */
1237 if (e820.map[i].type != E820_RAM)
1238 continue;
1239 /*
1240 * We are rounding up the start address of usable memory:
1241 */
1242 curr_pfn = PFN_UP(e820.map[i].addr);
1243 if (curr_pfn >= max_low_pfn)
1244 continue;
1245 /*
1246 * ... and at the end of the usable range downwards:
1247 */
1248 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1250 #ifdef CONFIG_XEN
1251 /*
1252 * Truncate to the number of actual pages currently
1253 * present.
1254 */
1255 if (last_pfn > xen_start_info->nr_pages)
1256 last_pfn = xen_start_info->nr_pages;
1257 #endif
1259 if (last_pfn > max_low_pfn)
1260 last_pfn = max_low_pfn;
1262 /*
1263 * .. finally, did all the rounding and playing
1264 * around just make the area go away?
1265 */
1266 if (last_pfn <= curr_pfn)
1267 continue;
1269 size = last_pfn - curr_pfn;
1270 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1274 #ifndef CONFIG_XEN
1275 /*
1276 * workaround for Dell systems that neglect to reserve EBDA
1277 */
1278 static void __init reserve_ebda_region(void)
1280 unsigned int addr;
1281 addr = get_bios_ebda();
1282 if (addr)
1283 reserve_bootmem(addr, PAGE_SIZE);
1285 #endif
1287 #ifndef CONFIG_NEED_MULTIPLE_NODES
1288 void __init setup_bootmem_allocator(void);
1289 static unsigned long __init setup_memory(void)
1291 /*
1292 * partially used pages are not usable - thus
1293 * we are rounding upwards:
1294 */
1295 min_low_pfn = PFN_UP(__pa(xen_start_info->pt_base)) +
1296 xen_start_info->nr_pt_frames;
1298 find_max_pfn();
1300 max_low_pfn = find_max_low_pfn();
1302 #ifdef CONFIG_HIGHMEM
1303 highstart_pfn = highend_pfn = max_pfn;
1304 if (max_pfn > max_low_pfn) {
1305 highstart_pfn = max_low_pfn;
1307 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1308 pages_to_mb(highend_pfn - highstart_pfn));
1309 #endif
1310 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1311 pages_to_mb(max_low_pfn));
1313 setup_bootmem_allocator();
1315 return max_low_pfn;
1318 void __init zone_sizes_init(void)
1320 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1321 unsigned int max_dma, low;
1323 max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
1324 low = max_low_pfn;
1326 if (low < max_dma)
1327 zones_size[ZONE_DMA] = low;
1328 else {
1329 zones_size[ZONE_DMA] = max_dma;
1330 zones_size[ZONE_NORMAL] = low - max_dma;
1331 #ifdef CONFIG_HIGHMEM
1332 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1333 #endif
1335 free_area_init(zones_size);
1337 #else
1338 extern unsigned long __init setup_memory(void);
1339 extern void zone_sizes_init(void);
1340 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1342 void __init setup_bootmem_allocator(void)
1344 unsigned long bootmap_size;
1345 /*
1346 * Initialize the boot-time allocator (with low memory only):
1347 */
1348 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1350 register_bootmem_low_pages(max_low_pfn);
1352 /*
1353 * Reserve the bootmem bitmap itself as well. We do this in two
1354 * steps (first step was init_bootmem()) because this catches
1355 * the (very unlikely) case of us accidentally initializing the
1356 * bootmem allocator with an invalid RAM area.
1357 */
1358 reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1359 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
1361 #ifndef CONFIG_XEN
1362 /*
1363 * reserve physical page 0 - it's a special BIOS page on many boxes,
1364 * enabling clean reboots, SMP operation, laptop functions.
1365 */
1366 reserve_bootmem(0, PAGE_SIZE);
1368 /* reserve EBDA region, it's a 4K region */
1369 reserve_ebda_region();
1371 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1372 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1373 unless you have no PS/2 mouse plugged in. */
1374 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1375 boot_cpu_data.x86 == 6)
1376 reserve_bootmem(0xa0000 - 4096, 4096);
1378 #ifdef CONFIG_SMP
1379 /*
1380 * But first pinch a few for the stack/trampoline stuff
1381 * FIXME: Don't need the extra page at 4K, but need to fix
1382 * trampoline before removing it. (see the GDT stuff)
1383 */
1384 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1385 #endif
1386 #ifdef CONFIG_ACPI_SLEEP
1387 /*
1388 * Reserve low memory region for sleep support.
1389 */
1390 acpi_reserve_bootmem();
1391 #endif
1392 #endif /* !CONFIG_XEN */
1394 #ifdef CONFIG_BLK_DEV_INITRD
1395 if (xen_start_info->mod_start) {
1396 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1397 /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
1398 initrd_start = INITRD_START + PAGE_OFFSET;
1399 initrd_end = initrd_start+INITRD_SIZE;
1400 initrd_below_start_ok = 1;
1402 else {
1403 printk(KERN_ERR "initrd extends beyond end of memory "
1404 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1405 INITRD_START + INITRD_SIZE,
1406 max_low_pfn << PAGE_SHIFT);
1407 initrd_start = 0;
1410 #endif
1411 #ifdef CONFIG_KEXEC
1412 #ifdef CONFIG_XEN
1413 xen_machine_kexec_setup_resources();
1414 #else
1415 if (crashk_res.start != crashk_res.end)
1416 reserve_bootmem(crashk_res.start,
1417 crashk_res.end - crashk_res.start + 1);
1418 #endif
1419 #endif
1422 /*
1423 * The node 0 pgdat is initialized before all of these because
1424 * it's needed for bootmem. node>0 pgdats have their virtual
1425 * space allocated before the pagetables are in place to access
1426 * them, so they can't be cleared then.
1428 * This should all compile down to nothing when NUMA is off.
1429 */
1430 void __init remapped_pgdat_init(void)
1432 int nid;
1434 for_each_online_node(nid) {
1435 if (nid != 0)
1436 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1440 /*
1441 * Request address space for all standard RAM and ROM resources
1442 * and also for regions reported as reserved by the e820.
1443 */
1444 static void __init
1445 legacy_init_iomem_resources(struct e820entry *e820, int nr_map,
1446 struct resource *code_resource,
1447 struct resource *data_resource)
1449 int i;
1451 probe_roms();
1453 for (i = 0; i < nr_map; i++) {
1454 struct resource *res;
1455 #ifndef CONFIG_RESOURCES_64BIT
1456 if (e820[i].addr + e820[i].size > 0x100000000ULL)
1457 continue;
1458 #endif
1459 res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
1460 switch (e820[i].type) {
1461 case E820_RAM: res->name = "System RAM"; break;
1462 case E820_ACPI: res->name = "ACPI Tables"; break;
1463 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1464 default: res->name = "reserved";
1466 res->start = e820[i].addr;
1467 res->end = res->start + e820[i].size - 1;
1468 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1469 if (request_resource(&iomem_resource, res)) {
1470 kfree(res);
1471 continue;
1473 if (e820[i].type == E820_RAM) {
1474 /*
1475 * We don't know which RAM region contains kernel data,
1476 * so we try it repeatedly and let the resource manager
1477 * test it.
1478 */
1479 #ifndef CONFIG_XEN
1480 request_resource(res, code_resource);
1481 request_resource(res, data_resource);
1482 #endif
1483 #ifdef CONFIG_KEXEC
1484 if (crashk_res.start != crashk_res.end)
1485 request_resource(res, &crashk_res);
1486 #ifdef CONFIG_XEN
1487 xen_machine_kexec_register_resources(res);
1488 #endif
1489 #endif
1494 /*
1495 * Locate a unused range of the physical address space below 4G which
1496 * can be used for PCI mappings.
1497 */
1498 static void __init
1499 e820_setup_gap(struct e820entry *e820, int nr_map)
1501 unsigned long gapstart, gapsize, round;
1502 unsigned long long last;
1503 int i;
1505 /*
1506 * Search for the bigest gap in the low 32 bits of the e820
1507 * memory space.
1508 */
1509 last = 0x100000000ull;
1510 gapstart = 0x10000000;
1511 gapsize = 0x400000;
1512 i = nr_map;
1513 while (--i >= 0) {
1514 unsigned long long start = e820[i].addr;
1515 unsigned long long end = start + e820[i].size;
1517 /*
1518 * Since "last" is at most 4GB, we know we'll
1519 * fit in 32 bits if this condition is true
1520 */
1521 if (last > end) {
1522 unsigned long gap = last - end;
1524 if (gap > gapsize) {
1525 gapsize = gap;
1526 gapstart = end;
1529 if (start < last)
1530 last = start;
1533 /*
1534 * See how much we want to round up: start off with
1535 * rounding to the next 1MB area.
1536 */
1537 round = 0x100000;
1538 while ((gapsize >> 4) > round)
1539 round += round;
1540 /* Fun with two's complement */
1541 pci_mem_start = (gapstart + round) & -round;
1543 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1544 pci_mem_start, gapstart, gapsize);
1547 /*
1548 * Request address space for all standard resources
1550 * This is called just before pcibios_init(), which is also a
1551 * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
1552 */
1553 static int __init request_standard_resources(void)
1555 int i;
1557 /* Nothing to do if not running in dom0. */
1558 if (!is_initial_xendomain())
1559 return 0;
1561 printk("Setting up standard PCI resources\n");
1562 #ifdef CONFIG_XEN
1563 legacy_init_iomem_resources(machine_e820.map, machine_e820.nr_map,
1564 &code_resource, &data_resource);
1565 #else
1566 if (efi_enabled)
1567 efi_initialize_iomem_resources(&code_resource, &data_resource);
1568 else
1569 legacy_init_iomem_resources(e820.map, e820.nr_map,
1570 &code_resource, &data_resource);
1571 #endif
1573 /* EFI systems may still have VGA */
1574 request_resource(&iomem_resource, &video_ram_resource);
1576 /* request I/O space for devices used on all i[345]86 PCs */
1577 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1578 request_resource(&ioport_resource, &standard_io_resources[i]);
1579 return 0;
1582 subsys_initcall(request_standard_resources);
1584 static void __init register_memory(void)
1586 #ifdef CONFIG_XEN
1587 if (is_initial_xendomain())
1588 e820_setup_gap(machine_e820.map, machine_e820.nr_map);
1589 else
1590 #endif
1591 e820_setup_gap(e820.map, e820.nr_map);
1594 #ifdef CONFIG_MCA
1595 static void set_mca_bus(int x)
1597 MCA_bus = x;
1599 #else
1600 static void set_mca_bus(int x) { }
1601 #endif
1603 /*
1604 * Determine if we were loaded by an EFI loader. If so, then we have also been
1605 * passed the efi memmap, systab, etc., so we should use these data structures
1606 * for initialization. Note, the efi init code path is determined by the
1607 * global efi_enabled. This allows the same kernel image to be used on existing
1608 * systems (with a traditional BIOS) as well as on EFI systems.
1609 */
1610 void __init setup_arch(char **cmdline_p)
1612 int i, j, k, fpp;
1613 struct physdev_set_iopl set_iopl;
1614 unsigned long max_low_pfn;
1615 unsigned long p2m_pages;
1617 /* Force a quick death if the kernel panics (not domain 0). */
1618 extern int panic_timeout;
1619 if (!panic_timeout && !is_initial_xendomain())
1620 panic_timeout = 1;
1622 /* Register a call for panic conditions. */
1623 atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
1625 WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
1626 VMASST_TYPE_4gb_segments));
1627 WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
1628 VMASST_TYPE_writable_pagetables));
1630 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1631 pre_setup_arch_hook();
1632 early_cpu_init();
1633 #ifdef CONFIG_SMP
1634 prefill_possible_map();
1635 #endif
1637 /*
1638 * FIXME: This isn't an official loader_type right
1639 * now but does currently work with elilo.
1640 * If we were configured as an EFI kernel, check to make
1641 * sure that we were loaded correctly from elilo and that
1642 * the system table is valid. If not, then initialize normally.
1643 */
1644 #ifdef CONFIG_EFI
1645 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1646 efi_enabled = 1;
1647 #endif
1649 /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
1650 properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
1651 */
1652 ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
1653 drive_info = DRIVE_INFO;
1654 screen_info = SCREEN_INFO;
1655 copy_edid();
1656 apm_info.bios = APM_BIOS_INFO;
1657 ist_info = IST_INFO;
1658 saved_videomode = VIDEO_MODE;
1659 if( SYS_DESC_TABLE.length != 0 ) {
1660 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1661 machine_id = SYS_DESC_TABLE.table[0];
1662 machine_submodel_id = SYS_DESC_TABLE.table[1];
1663 BIOS_revision = SYS_DESC_TABLE.table[2];
1665 bootloader_type = LOADER_TYPE;
1667 if (is_initial_xendomain()) {
1668 const struct dom0_vga_console_info *info =
1669 (void *)((char *)xen_start_info +
1670 xen_start_info->console.dom0.info_off);
1672 dom0_init_screen_info(info,
1673 xen_start_info->console.dom0.info_size);
1674 xen_start_info->console.domU.mfn = 0;
1675 xen_start_info->console.domU.evtchn = 0;
1676 } else
1677 screen_info.orig_video_isVGA = 0;
1679 #ifdef CONFIG_BLK_DEV_RAM
1680 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1681 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1682 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1683 #endif
1685 ARCH_SETUP
1686 if (efi_enabled)
1687 efi_init();
1688 else {
1689 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1690 print_memory_map(machine_specific_memory_setup());
1693 copy_edd();
1695 if (!MOUNT_ROOT_RDONLY)
1696 root_mountflags &= ~MS_RDONLY;
1697 init_mm.start_code = (unsigned long) _text;
1698 init_mm.end_code = (unsigned long) _etext;
1699 init_mm.end_data = (unsigned long) _edata;
1700 init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) +
1701 xen_start_info->nr_pt_frames) << PAGE_SHIFT;
1703 code_resource.start = virt_to_phys(_text);
1704 code_resource.end = virt_to_phys(_etext)-1;
1705 data_resource.start = virt_to_phys(_etext);
1706 data_resource.end = virt_to_phys(_edata)-1;
1708 parse_cmdline_early(cmdline_p);
1710 #ifdef CONFIG_EARLY_PRINTK
1712 char *s = strstr(*cmdline_p, "earlyprintk=");
1713 if (s) {
1714 setup_early_printk(strchr(s, '=') + 1);
1715 printk("early console enabled\n");
1718 #endif
1720 max_low_pfn = setup_memory();
1722 /*
1723 * NOTE: before this point _nobody_ is allowed to allocate
1724 * any memory using the bootmem allocator. Although the
1725 * alloctor is now initialised only the first 8Mb of the kernel
1726 * virtual address space has been mapped. All allocations before
1727 * paging_init() has completed must use the alloc_bootmem_low_pages()
1728 * variant (which allocates DMA'able memory) and care must be taken
1729 * not to exceed the 8Mb limit.
1730 */
1732 #ifdef CONFIG_SMP
1733 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1734 #endif
1735 paging_init();
1736 remapped_pgdat_init();
1737 sparse_init();
1738 zone_sizes_init();
1740 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1741 /*
1742 * Find and reserve possible boot-time SMP configuration:
1743 */
1744 find_smp_config();
1745 #endif
1747 p2m_pages = max_pfn;
1748 if (xen_start_info->nr_pages > max_pfn) {
1749 /*
1750 * the max_pfn was shrunk (probably by mem= or highmem=
1751 * kernel parameter); shrink reservation with the HV
1752 */
1753 struct xen_memory_reservation reservation = {
1754 .address_bits = 0,
1755 .extent_order = 0,
1756 .domid = DOMID_SELF
1757 };
1758 unsigned int difference;
1759 int ret;
1761 difference = xen_start_info->nr_pages - max_pfn;
1763 set_xen_guest_handle(reservation.extent_start,
1764 ((unsigned long *)xen_start_info->mfn_list) + max_pfn);
1765 reservation.nr_extents = difference;
1766 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
1767 &reservation);
1768 BUG_ON (ret != difference);
1770 else if (max_pfn > xen_start_info->nr_pages)
1771 p2m_pages = xen_start_info->nr_pages;
1773 /* Make sure we have a correctly sized P->M table. */
1774 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1775 phys_to_machine_mapping = alloc_bootmem_low_pages(
1776 max_pfn * sizeof(unsigned long));
1777 memset(phys_to_machine_mapping, ~0,
1778 max_pfn * sizeof(unsigned long));
1779 memcpy(phys_to_machine_mapping,
1780 (unsigned long *)xen_start_info->mfn_list,
1781 p2m_pages * sizeof(unsigned long));
1782 free_bootmem(
1783 __pa(xen_start_info->mfn_list),
1784 PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
1785 sizeof(unsigned long))));
1787 /*
1788 * Initialise the list of the frames that specify the list of
1789 * frames that make up the p2m table. Used by save/restore
1790 */
1791 pfn_to_mfn_frame_list_list = alloc_bootmem_low_pages(PAGE_SIZE);
1793 fpp = PAGE_SIZE/sizeof(unsigned long);
1794 for (i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++) {
1795 if ((j % fpp) == 0) {
1796 k++;
1797 BUG_ON(k>=16);
1798 pfn_to_mfn_frame_list[k] =
1799 alloc_bootmem_low_pages(PAGE_SIZE);
1800 pfn_to_mfn_frame_list_list[k] =
1801 virt_to_mfn(pfn_to_mfn_frame_list[k]);
1802 j=0;
1804 pfn_to_mfn_frame_list[k][j] =
1805 virt_to_mfn(&phys_to_machine_mapping[i]);
1807 HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
1808 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
1809 virt_to_mfn(pfn_to_mfn_frame_list_list);
1812 /* Mark all ISA DMA channels in-use - using them wouldn't work. */
1813 for (i = 0; i < MAX_DMA_CHANNELS; ++i)
1814 if (i != 4 && request_dma(i, "xen") != 0)
1815 BUG();
1817 /*
1818 * NOTE: at this point the bootmem allocator is fully available.
1819 */
1821 if (is_initial_xendomain())
1822 dmi_scan_machine();
1824 #ifdef CONFIG_X86_GENERICARCH
1825 generic_apic_probe(*cmdline_p);
1826 #endif
1827 if (efi_enabled)
1828 efi_map_memmap();
1830 set_iopl.iopl = 1;
1831 WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
1833 #ifdef CONFIG_ACPI
1834 if (!is_initial_xendomain()) {
1835 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
1836 acpi_disabled = 1;
1837 acpi_ht = 0;
1840 /*
1841 * Parse the ACPI tables for possible boot-time SMP configuration.
1842 */
1843 acpi_boot_table_init();
1844 #endif
1846 #ifdef CONFIG_X86_IO_APIC
1847 check_acpi_pci(); /* Checks more than just ACPI actually */
1848 #endif
1850 #ifdef CONFIG_ACPI
1851 acpi_boot_init();
1853 #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
1854 if (def_to_bigsmp)
1855 printk(KERN_WARNING "More than 8 CPUs detected and "
1856 "CONFIG_X86_PC cannot handle it.\nUse "
1857 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
1858 #endif
1859 #endif
1860 #ifdef CONFIG_X86_LOCAL_APIC
1861 if (smp_found_config)
1862 get_smp_config();
1863 #endif
1865 register_memory();
1867 if (is_initial_xendomain()) {
1868 #ifdef CONFIG_VT
1869 #if defined(CONFIG_VGA_CONSOLE)
1870 if (!efi_enabled ||
1871 (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1872 conswitchp = &vga_con;
1873 #elif defined(CONFIG_DUMMY_CONSOLE)
1874 conswitchp = &dummy_con;
1875 #endif
1876 #endif
1877 } else {
1878 #if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
1879 conswitchp = &dummy_con;
1880 #endif
1882 tsc_init();
1885 static int
1886 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
1888 HYPERVISOR_shutdown(SHUTDOWN_crash);
1889 /* we're never actually going to get here... */
1890 return NOTIFY_DONE;
1893 static __init int add_pcspkr(void)
1895 struct platform_device *pd;
1896 int ret;
1898 if (!is_initial_xendomain())
1899 return 0;
1901 pd = platform_device_alloc("pcspkr", -1);
1902 if (!pd)
1903 return -ENOMEM;
1905 ret = platform_device_add(pd);
1906 if (ret)
1907 platform_device_put(pd);
1909 return ret;
1911 device_initcall(add_pcspkr);
1913 /*
1914 * Local Variables:
1915 * mode:c
1916 * c-file-style:"k&r"
1917 * c-basic-offset:8
1918 * End:
1919 */