ia64/xen-unstable

view linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c @ 9612:131051c09008

Move misplaced CONFIG_KEXEC code chunk in setup-xen.c to its proper location.

From: Jan Beulich

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Apr 06 15:22:46 2006 +0100 (2006-04-06)
parents 9316fe0c9c4f
children d857c6281410
line source
1 /*
2 * linux/arch/i386/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 *
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
10 *
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
13 *
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
16 *
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
19 *
20 */
22 /*
23 * This file handles the architecture-dependent parts of initialization
24 */
26 #include <linux/config.h>
27 #include <linux/sched.h>
28 #include <linux/mm.h>
29 #include <linux/mmzone.h>
30 #include <linux/tty.h>
31 #include <linux/ioport.h>
32 #include <linux/acpi.h>
33 #include <linux/apm_bios.h>
34 #include <linux/initrd.h>
35 #include <linux/bootmem.h>
36 #include <linux/seq_file.h>
37 #include <linux/console.h>
38 #include <linux/mca.h>
39 #include <linux/root_dev.h>
40 #include <linux/highmem.h>
41 #include <linux/module.h>
42 #include <linux/efi.h>
43 #include <linux/init.h>
44 #include <linux/edd.h>
45 #include <linux/nodemask.h>
46 #include <linux/kernel.h>
47 #include <linux/percpu.h>
48 #include <linux/notifier.h>
49 #include <linux/kexec.h>
50 #include <linux/crash_dump.h>
51 #include <linux/dmi.h>
53 #include <video/edid.h>
55 #include <asm/apic.h>
56 #include <asm/e820.h>
57 #include <asm/mpspec.h>
58 #include <asm/setup.h>
59 #include <asm/arch_hooks.h>
60 #include <asm/sections.h>
61 #include <asm/io_apic.h>
62 #include <asm/ist.h>
63 #include <asm/io.h>
64 #include <asm/hypervisor.h>
65 #include <xen/interface/physdev.h>
66 #include <xen/interface/memory.h>
67 #include <xen/features.h>
68 #include "setup_arch_pre.h"
69 #include <bios_ebda.h>
71 /* Forward Declaration. */
72 void __init find_max_pfn(void);
74 /* Allows setting of maximum possible memory size */
75 static unsigned long xen_override_max_pfn;
77 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
78 static struct notifier_block xen_panic_block = {
79 xen_panic_event, NULL, 0 /* try to go last */
80 };
82 extern char hypercall_page[PAGE_SIZE];
83 EXPORT_SYMBOL(hypercall_page);
85 int disable_pse __devinitdata = 0;
87 /*
88 * Machine setup..
89 */
91 #ifdef CONFIG_EFI
92 int efi_enabled = 0;
93 EXPORT_SYMBOL(efi_enabled);
94 #endif
96 /* cpu data as detected by the assembly code in head.S */
97 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
98 /* common cpu data for all cpus */
99 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
100 EXPORT_SYMBOL(boot_cpu_data);
102 unsigned long mmu_cr4_features;
104 #ifdef CONFIG_ACPI
105 int acpi_disabled = 0;
106 #else
107 int acpi_disabled = 1;
108 #endif
109 EXPORT_SYMBOL(acpi_disabled);
111 #ifdef CONFIG_ACPI
112 int __initdata acpi_force = 0;
113 extern acpi_interrupt_flags acpi_sci_flags;
114 #endif
116 /* for MCA, but anyone else can use it if they want */
117 unsigned int machine_id;
118 #ifdef CONFIG_MCA
119 EXPORT_SYMBOL(machine_id);
120 #endif
121 unsigned int machine_submodel_id;
122 unsigned int BIOS_revision;
123 unsigned int mca_pentium_flag;
125 /* For PCI or other memory-mapped resources */
126 unsigned long pci_mem_start = 0x10000000;
127 #ifdef CONFIG_PCI
128 EXPORT_SYMBOL(pci_mem_start);
129 #endif
131 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
132 int bootloader_type;
134 /* user-defined highmem size */
135 static unsigned int highmem_pages = -1;
137 /*
138 * Setup options
139 */
140 struct drive_info_struct { char dummy[32]; } drive_info;
141 #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \
142 defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
143 EXPORT_SYMBOL(drive_info);
144 #endif
145 struct screen_info screen_info;
146 EXPORT_SYMBOL(screen_info);
147 struct apm_info apm_info;
148 EXPORT_SYMBOL(apm_info);
149 struct sys_desc_table_struct {
150 unsigned short length;
151 unsigned char table[0];
152 };
153 struct edid_info edid_info;
154 EXPORT_SYMBOL_GPL(edid_info);
155 struct ist_info ist_info;
156 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
157 defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
158 EXPORT_SYMBOL(ist_info);
159 #endif
160 struct e820map e820;
162 extern void early_cpu_init(void);
163 extern void generic_apic_probe(char *);
164 extern int root_mountflags;
166 unsigned long saved_videomode;
168 #define RAMDISK_IMAGE_START_MASK 0x07FF
169 #define RAMDISK_PROMPT_FLAG 0x8000
170 #define RAMDISK_LOAD_FLAG 0x4000
172 static char command_line[COMMAND_LINE_SIZE];
174 unsigned char __initdata boot_params[PARAM_SIZE];
176 static struct resource data_resource = {
177 .name = "Kernel data",
178 .start = 0,
179 .end = 0,
180 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
181 };
183 static struct resource code_resource = {
184 .name = "Kernel code",
185 .start = 0,
186 .end = 0,
187 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
188 };
190 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
191 static struct resource system_rom_resource = {
192 .name = "System ROM",
193 .start = 0xf0000,
194 .end = 0xfffff,
195 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
196 };
198 static struct resource extension_rom_resource = {
199 .name = "Extension ROM",
200 .start = 0xe0000,
201 .end = 0xeffff,
202 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
203 };
205 static struct resource adapter_rom_resources[] = { {
206 .name = "Adapter ROM",
207 .start = 0xc8000,
208 .end = 0,
209 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
210 }, {
211 .name = "Adapter ROM",
212 .start = 0,
213 .end = 0,
214 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
215 }, {
216 .name = "Adapter ROM",
217 .start = 0,
218 .end = 0,
219 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
220 }, {
221 .name = "Adapter ROM",
222 .start = 0,
223 .end = 0,
224 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
225 }, {
226 .name = "Adapter ROM",
227 .start = 0,
228 .end = 0,
229 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
230 }, {
231 .name = "Adapter ROM",
232 .start = 0,
233 .end = 0,
234 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
235 } };
237 #define ADAPTER_ROM_RESOURCES \
238 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
240 static struct resource video_rom_resource = {
241 .name = "Video ROM",
242 .start = 0xc0000,
243 .end = 0xc7fff,
244 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
245 };
246 #endif
248 static struct resource video_ram_resource = {
249 .name = "Video RAM area",
250 .start = 0xa0000,
251 .end = 0xbffff,
252 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
253 };
255 static struct resource standard_io_resources[] = { {
256 .name = "dma1",
257 .start = 0x0000,
258 .end = 0x001f,
259 .flags = IORESOURCE_BUSY | IORESOURCE_IO
260 }, {
261 .name = "pic1",
262 .start = 0x0020,
263 .end = 0x0021,
264 .flags = IORESOURCE_BUSY | IORESOURCE_IO
265 }, {
266 .name = "timer0",
267 .start = 0x0040,
268 .end = 0x0043,
269 .flags = IORESOURCE_BUSY | IORESOURCE_IO
270 }, {
271 .name = "timer1",
272 .start = 0x0050,
273 .end = 0x0053,
274 .flags = IORESOURCE_BUSY | IORESOURCE_IO
275 }, {
276 .name = "keyboard",
277 .start = 0x0060,
278 .end = 0x006f,
279 .flags = IORESOURCE_BUSY | IORESOURCE_IO
280 }, {
281 .name = "dma page reg",
282 .start = 0x0080,
283 .end = 0x008f,
284 .flags = IORESOURCE_BUSY | IORESOURCE_IO
285 }, {
286 .name = "pic2",
287 .start = 0x00a0,
288 .end = 0x00a1,
289 .flags = IORESOURCE_BUSY | IORESOURCE_IO
290 }, {
291 .name = "dma2",
292 .start = 0x00c0,
293 .end = 0x00df,
294 .flags = IORESOURCE_BUSY | IORESOURCE_IO
295 }, {
296 .name = "fpu",
297 .start = 0x00f0,
298 .end = 0x00ff,
299 .flags = IORESOURCE_BUSY | IORESOURCE_IO
300 } };
302 #define STANDARD_IO_RESOURCES \
303 (sizeof standard_io_resources / sizeof standard_io_resources[0])
305 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
306 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
308 static int __init romchecksum(unsigned char *rom, unsigned long length)
309 {
310 unsigned char *p, sum = 0;
312 for (p = rom; p < rom + length; p++)
313 sum += *p;
314 return sum == 0;
315 }
317 static void __init probe_roms(void)
318 {
319 unsigned long start, length, upper;
320 unsigned char *rom;
321 int i;
323 /* Nothing to do if not running in dom0. */
324 if (!(xen_start_info->flags & SIF_INITDOMAIN))
325 return;
327 /* video rom */
328 upper = adapter_rom_resources[0].start;
329 for (start = video_rom_resource.start; start < upper; start += 2048) {
330 rom = isa_bus_to_virt(start);
331 if (!romsignature(rom))
332 continue;
334 video_rom_resource.start = start;
336 /* 0 < length <= 0x7f * 512, historically */
337 length = rom[2] * 512;
339 /* if checksum okay, trust length byte */
340 if (length && romchecksum(rom, length))
341 video_rom_resource.end = start + length - 1;
343 request_resource(&iomem_resource, &video_rom_resource);
344 break;
345 }
347 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
348 if (start < upper)
349 start = upper;
351 /* system rom */
352 request_resource(&iomem_resource, &system_rom_resource);
353 upper = system_rom_resource.start;
355 /* check for extension rom (ignore length byte!) */
356 rom = isa_bus_to_virt(extension_rom_resource.start);
357 if (romsignature(rom)) {
358 length = extension_rom_resource.end - extension_rom_resource.start + 1;
359 if (romchecksum(rom, length)) {
360 request_resource(&iomem_resource, &extension_rom_resource);
361 upper = extension_rom_resource.start;
362 }
363 }
365 /* check for adapter roms on 2k boundaries */
366 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
367 rom = isa_bus_to_virt(start);
368 if (!romsignature(rom))
369 continue;
371 /* 0 < length <= 0x7f * 512, historically */
372 length = rom[2] * 512;
374 /* but accept any length that fits if checksum okay */
375 if (!length || start + length > upper || !romchecksum(rom, length))
376 continue;
378 adapter_rom_resources[i].start = start;
379 adapter_rom_resources[i].end = start + length - 1;
380 request_resource(&iomem_resource, &adapter_rom_resources[i]);
382 start = adapter_rom_resources[i++].end & ~2047UL;
383 }
384 }
385 #endif
387 /*
388 * Point at the empty zero page to start with. We map the real shared_info
389 * page as soon as fixmap is up and running.
390 */
391 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
392 EXPORT_SYMBOL(HYPERVISOR_shared_info);
394 unsigned long *phys_to_machine_mapping;
395 unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[16];
396 EXPORT_SYMBOL(phys_to_machine_mapping);
398 /* Raw start-of-day parameters from the hypervisor. */
399 start_info_t *xen_start_info;
400 EXPORT_SYMBOL(xen_start_info);
402 static void __init limit_regions(unsigned long long size)
403 {
404 unsigned long long current_addr = 0;
405 int i;
407 if (efi_enabled) {
408 efi_memory_desc_t *md;
409 void *p;
411 for (p = memmap.map, i = 0; p < memmap.map_end;
412 p += memmap.desc_size, i++) {
413 md = p;
414 current_addr = md->phys_addr + (md->num_pages << 12);
415 if (md->type == EFI_CONVENTIONAL_MEMORY) {
416 if (current_addr >= size) {
417 md->num_pages -=
418 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
419 memmap.nr_map = i + 1;
420 return;
421 }
422 }
423 }
424 }
425 for (i = 0; i < e820.nr_map; i++) {
426 current_addr = e820.map[i].addr + e820.map[i].size;
427 if (current_addr < size)
428 continue;
430 if (e820.map[i].type != E820_RAM)
431 continue;
433 if (e820.map[i].addr >= size) {
434 /*
435 * This region starts past the end of the
436 * requested size, skip it completely.
437 */
438 e820.nr_map = i;
439 } else {
440 e820.nr_map = i + 1;
441 e820.map[i].size -= current_addr - size;
442 }
443 return;
444 }
445 }
447 static void __init add_memory_region(unsigned long long start,
448 unsigned long long size, int type)
449 {
450 int x;
452 if (!efi_enabled) {
453 x = e820.nr_map;
455 if (x == E820MAX) {
456 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
457 return;
458 }
460 e820.map[x].addr = start;
461 e820.map[x].size = size;
462 e820.map[x].type = type;
463 e820.nr_map++;
464 }
465 } /* add_memory_region */
467 #define E820_DEBUG 1
469 static void __init print_memory_map(char *who)
470 {
471 int i;
473 for (i = 0; i < e820.nr_map; i++) {
474 printk(" %s: %016Lx - %016Lx ", who,
475 e820.map[i].addr,
476 e820.map[i].addr + e820.map[i].size);
477 switch (e820.map[i].type) {
478 case E820_RAM: printk("(usable)\n");
479 break;
480 case E820_RESERVED:
481 printk("(reserved)\n");
482 break;
483 case E820_ACPI:
484 printk("(ACPI data)\n");
485 break;
486 case E820_NVS:
487 printk("(ACPI NVS)\n");
488 break;
489 default: printk("type %lu\n", e820.map[i].type);
490 break;
491 }
492 }
493 }
495 #if 0
496 /*
497 * Sanitize the BIOS e820 map.
498 *
499 * Some e820 responses include overlapping entries. The following
500 * replaces the original e820 map with a new one, removing overlaps.
501 *
502 */
503 struct change_member {
504 struct e820entry *pbios; /* pointer to original bios entry */
505 unsigned long long addr; /* address for this change point */
506 };
507 static struct change_member change_point_list[2*E820MAX] __initdata;
508 static struct change_member *change_point[2*E820MAX] __initdata;
509 static struct e820entry *overlap_list[E820MAX] __initdata;
510 static struct e820entry new_bios[E820MAX] __initdata;
512 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
513 {
514 struct change_member *change_tmp;
515 unsigned long current_type, last_type;
516 unsigned long long last_addr;
517 int chgidx, still_changing;
518 int overlap_entries;
519 int new_bios_entry;
520 int old_nr, new_nr, chg_nr;
521 int i;
523 /*
524 Visually we're performing the following (1,2,3,4 = memory types)...
526 Sample memory map (w/overlaps):
527 ____22__________________
528 ______________________4_
529 ____1111________________
530 _44_____________________
531 11111111________________
532 ____________________33__
533 ___________44___________
534 __________33333_________
535 ______________22________
536 ___________________2222_
537 _________111111111______
538 _____________________11_
539 _________________4______
541 Sanitized equivalent (no overlap):
542 1_______________________
543 _44_____________________
544 ___1____________________
545 ____22__________________
546 ______11________________
547 _________1______________
548 __________3_____________
549 ___________44___________
550 _____________33_________
551 _______________2________
552 ________________1_______
553 _________________4______
554 ___________________2____
555 ____________________33__
556 ______________________4_
557 */
559 /* if there's only one memory region, don't bother */
560 if (*pnr_map < 2)
561 return -1;
563 old_nr = *pnr_map;
565 /* bail out if we find any unreasonable addresses in bios map */
566 for (i=0; i<old_nr; i++)
567 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
568 return -1;
570 /* create pointers for initial change-point information (for sorting) */
571 for (i=0; i < 2*old_nr; i++)
572 change_point[i] = &change_point_list[i];
574 /* record all known change-points (starting and ending addresses),
575 omitting those that are for empty memory regions */
576 chgidx = 0;
577 for (i=0; i < old_nr; i++) {
578 if (biosmap[i].size != 0) {
579 change_point[chgidx]->addr = biosmap[i].addr;
580 change_point[chgidx++]->pbios = &biosmap[i];
581 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
582 change_point[chgidx++]->pbios = &biosmap[i];
583 }
584 }
585 chg_nr = chgidx; /* true number of change-points */
587 /* sort change-point list by memory addresses (low -> high) */
588 still_changing = 1;
589 while (still_changing) {
590 still_changing = 0;
591 for (i=1; i < chg_nr; i++) {
592 /* if <current_addr> > <last_addr>, swap */
593 /* or, if current=<start_addr> & last=<end_addr>, swap */
594 if ((change_point[i]->addr < change_point[i-1]->addr) ||
595 ((change_point[i]->addr == change_point[i-1]->addr) &&
596 (change_point[i]->addr == change_point[i]->pbios->addr) &&
597 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
598 )
599 {
600 change_tmp = change_point[i];
601 change_point[i] = change_point[i-1];
602 change_point[i-1] = change_tmp;
603 still_changing=1;
604 }
605 }
606 }
608 /* create a new bios memory map, removing overlaps */
609 overlap_entries=0; /* number of entries in the overlap table */
610 new_bios_entry=0; /* index for creating new bios map entries */
611 last_type = 0; /* start with undefined memory type */
612 last_addr = 0; /* start with 0 as last starting address */
613 /* loop through change-points, determining affect on the new bios map */
614 for (chgidx=0; chgidx < chg_nr; chgidx++)
615 {
616 /* keep track of all overlapping bios entries */
617 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
618 {
619 /* add map entry to overlap list (> 1 entry implies an overlap) */
620 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
621 }
622 else
623 {
624 /* remove entry from list (order independent, so swap with last) */
625 for (i=0; i<overlap_entries; i++)
626 {
627 if (overlap_list[i] == change_point[chgidx]->pbios)
628 overlap_list[i] = overlap_list[overlap_entries-1];
629 }
630 overlap_entries--;
631 }
632 /* if there are overlapping entries, decide which "type" to use */
633 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
634 current_type = 0;
635 for (i=0; i<overlap_entries; i++)
636 if (overlap_list[i]->type > current_type)
637 current_type = overlap_list[i]->type;
638 /* continue building up new bios map based on this information */
639 if (current_type != last_type) {
640 if (last_type != 0) {
641 new_bios[new_bios_entry].size =
642 change_point[chgidx]->addr - last_addr;
643 /* move forward only if the new size was non-zero */
644 if (new_bios[new_bios_entry].size != 0)
645 if (++new_bios_entry >= E820MAX)
646 break; /* no more space left for new bios entries */
647 }
648 if (current_type != 0) {
649 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
650 new_bios[new_bios_entry].type = current_type;
651 last_addr=change_point[chgidx]->addr;
652 }
653 last_type = current_type;
654 }
655 }
656 new_nr = new_bios_entry; /* retain count for new bios entries */
658 /* copy new bios mapping into original location */
659 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
660 *pnr_map = new_nr;
662 return 0;
663 }
665 /*
666 * Copy the BIOS e820 map into a safe place.
667 *
668 * Sanity-check it while we're at it..
669 *
670 * If we're lucky and live on a modern system, the setup code
671 * will have given us a memory map that we can use to properly
672 * set up memory. If we aren't, we'll fake a memory map.
673 *
674 * We check to see that the memory map contains at least 2 elements
675 * before we'll use it, because the detection code in setup.S may
676 * not be perfect and most every PC known to man has two memory
677 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
678 * thinkpad 560x, for example, does not cooperate with the memory
679 * detection code.)
680 */
681 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
682 {
683 /* Only one memory region (or negative)? Ignore it */
684 if (nr_map < 2)
685 return -1;
687 do {
688 unsigned long long start = biosmap->addr;
689 unsigned long long size = biosmap->size;
690 unsigned long long end = start + size;
691 unsigned long type = biosmap->type;
693 /* Overflow in 64 bits? Ignore the memory map. */
694 if (start > end)
695 return -1;
697 /*
698 * Some BIOSes claim RAM in the 640k - 1M region.
699 * Not right. Fix it up.
700 */
701 if (type == E820_RAM) {
702 if (start < 0x100000ULL && end > 0xA0000ULL) {
703 if (start < 0xA0000ULL)
704 add_memory_region(start, 0xA0000ULL-start, type);
705 if (end <= 0x100000ULL)
706 continue;
707 start = 0x100000ULL;
708 size = end - start;
709 }
710 }
711 add_memory_region(start, size, type);
712 } while (biosmap++,--nr_map);
713 return 0;
714 }
715 #endif
717 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
718 struct edd edd;
719 #ifdef CONFIG_EDD_MODULE
720 EXPORT_SYMBOL(edd);
721 #endif
722 /**
723 * copy_edd() - Copy the BIOS EDD information
724 * from boot_params into a safe place.
725 *
726 */
727 static inline void copy_edd(void)
728 {
729 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
730 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
731 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
732 edd.edd_info_nr = EDD_NR;
733 }
734 #else
735 static inline void copy_edd(void)
736 {
737 }
738 #endif
740 /*
741 * Do NOT EVER look at the BIOS memory size location.
742 * It does not work on many machines.
743 */
744 #define LOWMEMSIZE() (0x9f000)
746 static void __init parse_cmdline_early (char ** cmdline_p)
747 {
748 char c = ' ', *to = command_line, *from = saved_command_line;
749 int len = 0, max_cmdline;
750 int userdef = 0;
752 if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
753 max_cmdline = COMMAND_LINE_SIZE;
754 memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
755 /* Save unparsed command line copy for /proc/cmdline */
756 saved_command_line[max_cmdline-1] = '\0';
758 for (;;) {
759 if (c != ' ')
760 goto next_char;
761 /*
762 * "mem=nopentium" disables the 4MB page tables.
763 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
764 * to <mem>, overriding the bios size.
765 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
766 * <start> to <start>+<mem>, overriding the bios size.
767 *
768 * HPA tells me bootloaders need to parse mem=, so no new
769 * option should be mem= [also see Documentation/i386/boot.txt]
770 */
771 if (!memcmp(from, "mem=", 4)) {
772 if (to != command_line)
773 to--;
774 if (!memcmp(from+4, "nopentium", 9)) {
775 from += 9+4;
776 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
777 disable_pse = 1;
778 } else {
779 /* If the user specifies memory size, we
780 * limit the BIOS-provided memory map to
781 * that size. exactmap can be used to specify
782 * the exact map. mem=number can be used to
783 * trim the existing memory map.
784 */
785 unsigned long long mem_size;
787 mem_size = memparse(from+4, &from);
788 #if 0
789 limit_regions(mem_size);
790 userdef=1;
791 #else
792 xen_override_max_pfn =
793 (unsigned long)(mem_size>>PAGE_SHIFT);
794 #endif
795 }
796 }
798 else if (!memcmp(from, "memmap=", 7)) {
799 if (to != command_line)
800 to--;
801 if (!memcmp(from+7, "exactmap", 8)) {
802 #ifdef CONFIG_CRASH_DUMP
803 /* If we are doing a crash dump, we
804 * still need to know the real mem
805 * size before original memory map is
806 * reset.
807 */
808 find_max_pfn();
809 saved_max_pfn = max_pfn;
810 #endif
811 from += 8+7;
812 e820.nr_map = 0;
813 userdef = 1;
814 } else {
815 /* If the user specifies memory size, we
816 * limit the BIOS-provided memory map to
817 * that size. exactmap can be used to specify
818 * the exact map. mem=number can be used to
819 * trim the existing memory map.
820 */
821 unsigned long long start_at, mem_size;
823 mem_size = memparse(from+7, &from);
824 if (*from == '@') {
825 start_at = memparse(from+1, &from);
826 add_memory_region(start_at, mem_size, E820_RAM);
827 } else if (*from == '#') {
828 start_at = memparse(from+1, &from);
829 add_memory_region(start_at, mem_size, E820_ACPI);
830 } else if (*from == '$') {
831 start_at = memparse(from+1, &from);
832 add_memory_region(start_at, mem_size, E820_RESERVED);
833 } else {
834 limit_regions(mem_size);
835 userdef=1;
836 }
837 }
838 }
840 else if (!memcmp(from, "noexec=", 7))
841 noexec_setup(from + 7);
844 #ifdef CONFIG_X86_MPPARSE
845 /*
846 * If the BIOS enumerates physical processors before logical,
847 * maxcpus=N at enumeration-time can be used to disable HT.
848 */
849 else if (!memcmp(from, "maxcpus=", 8)) {
850 extern unsigned int maxcpus;
852 maxcpus = simple_strtoul(from + 8, NULL, 0);
853 }
854 #endif
856 #ifdef CONFIG_ACPI
857 /* "acpi=off" disables both ACPI table parsing and interpreter */
858 else if (!memcmp(from, "acpi=off", 8)) {
859 disable_acpi();
860 }
862 /* acpi=force to over-ride black-list */
863 else if (!memcmp(from, "acpi=force", 10)) {
864 acpi_force = 1;
865 acpi_ht = 1;
866 acpi_disabled = 0;
867 }
869 /* acpi=strict disables out-of-spec workarounds */
870 else if (!memcmp(from, "acpi=strict", 11)) {
871 acpi_strict = 1;
872 }
874 /* Limit ACPI just to boot-time to enable HT */
875 else if (!memcmp(from, "acpi=ht", 7)) {
876 if (!acpi_force)
877 disable_acpi();
878 acpi_ht = 1;
879 }
881 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
882 else if (!memcmp(from, "pci=noacpi", 10)) {
883 acpi_disable_pci();
884 }
885 /* "acpi=noirq" disables ACPI interrupt routing */
886 else if (!memcmp(from, "acpi=noirq", 10)) {
887 acpi_noirq_set();
888 }
890 else if (!memcmp(from, "acpi_sci=edge", 13))
891 acpi_sci_flags.trigger = 1;
893 else if (!memcmp(from, "acpi_sci=level", 14))
894 acpi_sci_flags.trigger = 3;
896 else if (!memcmp(from, "acpi_sci=high", 13))
897 acpi_sci_flags.polarity = 1;
899 else if (!memcmp(from, "acpi_sci=low", 12))
900 acpi_sci_flags.polarity = 3;
902 #ifdef CONFIG_X86_IO_APIC
903 else if (!memcmp(from, "acpi_skip_timer_override", 24))
904 acpi_skip_timer_override = 1;
906 if (!memcmp(from, "disable_timer_pin_1", 19))
907 disable_timer_pin_1 = 1;
908 if (!memcmp(from, "enable_timer_pin_1", 18))
909 disable_timer_pin_1 = -1;
911 /* disable IO-APIC */
912 else if (!memcmp(from, "noapic", 6))
913 disable_ioapic_setup();
914 #endif /* CONFIG_X86_IO_APIC */
915 #endif /* CONFIG_ACPI */
917 #ifdef CONFIG_X86_LOCAL_APIC
918 /* enable local APIC */
919 else if (!memcmp(from, "lapic", 5))
920 lapic_enable();
922 /* disable local APIC */
923 else if (!memcmp(from, "nolapic", 6))
924 lapic_disable();
925 #endif /* CONFIG_X86_LOCAL_APIC */
927 #ifdef CONFIG_KEXEC
928 /* crashkernel=size@addr specifies the location to reserve for
929 * a crash kernel. By reserving this memory we guarantee
930 * that linux never set's it up as a DMA target.
931 * Useful for holding code to do something appropriate
932 * after a kernel panic.
933 */
934 else if (!memcmp(from, "crashkernel=", 12)) {
935 unsigned long size, base;
936 size = memparse(from+12, &from);
937 if (*from == '@') {
938 base = memparse(from+1, &from);
939 /* FIXME: Do I want a sanity check
940 * to validate the memory range?
941 */
942 crashk_res.start = base;
943 crashk_res.end = base + size - 1;
944 }
945 }
946 #endif
947 #ifdef CONFIG_PROC_VMCORE
948 /* elfcorehdr= specifies the location of elf core header
949 * stored by the crashed kernel.
950 */
951 else if (!memcmp(from, "elfcorehdr=", 11))
952 elfcorehdr_addr = memparse(from+11, &from);
953 #endif
955 /*
956 * highmem=size forces highmem to be exactly 'size' bytes.
957 * This works even on boxes that have no highmem otherwise.
958 * This also works to reduce highmem size on bigger boxes.
959 */
960 else if (!memcmp(from, "highmem=", 8))
961 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
963 /*
964 * vmalloc=size forces the vmalloc area to be exactly 'size'
965 * bytes. This can be used to increase (or decrease) the
966 * vmalloc area - the default is 128m.
967 */
968 else if (!memcmp(from, "vmalloc=", 8))
969 __VMALLOC_RESERVE = memparse(from+8, &from);
971 next_char:
972 c = *(from++);
973 if (!c)
974 break;
975 if (COMMAND_LINE_SIZE <= ++len)
976 break;
977 *(to++) = c;
978 }
979 *to = '\0';
980 *cmdline_p = command_line;
981 if (userdef) {
982 printk(KERN_INFO "user-defined physical RAM map:\n");
983 print_memory_map("user");
984 }
985 }
987 #if 0 /* !XEN */
988 /*
989 * Callback for efi_memory_walk.
990 */
991 static int __init
992 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
993 {
994 unsigned long *max_pfn = arg, pfn;
996 if (start < end) {
997 pfn = PFN_UP(end -1);
998 if (pfn > *max_pfn)
999 *max_pfn = pfn;
1001 return 0;
1004 static int __init
1005 efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
1007 memory_present(0, start, end);
1008 return 0;
1011 /*
1012 * Find the highest page frame number we have available
1013 */
1014 void __init find_max_pfn(void)
1016 int i;
1018 max_pfn = 0;
1019 if (efi_enabled) {
1020 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
1021 efi_memmap_walk(efi_memory_present_wrapper, NULL);
1022 return;
1025 for (i = 0; i < e820.nr_map; i++) {
1026 unsigned long start, end;
1027 /* RAM? */
1028 if (e820.map[i].type != E820_RAM)
1029 continue;
1030 start = PFN_UP(e820.map[i].addr);
1031 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1032 if (start >= end)
1033 continue;
1034 if (end > max_pfn)
1035 max_pfn = end;
1036 memory_present(0, start, end);
1039 #else
1040 /* We don't use the fake e820 because we need to respond to user override. */
1041 void __init find_max_pfn(void)
1043 if (xen_override_max_pfn == 0) {
1044 max_pfn = xen_start_info->nr_pages;
1045 /* Default 8MB slack (to balance backend allocations). */
1046 max_pfn += 8 << (20 - PAGE_SHIFT);
1047 } else if (xen_override_max_pfn > xen_start_info->nr_pages) {
1048 max_pfn = xen_override_max_pfn;
1049 } else {
1050 max_pfn = xen_start_info->nr_pages;
1053 #endif /* XEN */
1055 /*
1056 * Determine low and high memory ranges:
1057 */
1058 unsigned long __init find_max_low_pfn(void)
1060 unsigned long max_low_pfn;
1062 max_low_pfn = max_pfn;
1063 if (max_low_pfn > MAXMEM_PFN) {
1064 if (highmem_pages == -1)
1065 highmem_pages = max_pfn - MAXMEM_PFN;
1066 if (highmem_pages + MAXMEM_PFN < max_pfn)
1067 max_pfn = MAXMEM_PFN + highmem_pages;
1068 if (highmem_pages + MAXMEM_PFN > max_pfn) {
1069 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
1070 highmem_pages = 0;
1072 max_low_pfn = MAXMEM_PFN;
1073 #ifndef CONFIG_HIGHMEM
1074 /* Maximum memory usable is what is directly addressable */
1075 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
1076 MAXMEM>>20);
1077 if (max_pfn > MAX_NONPAE_PFN)
1078 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1079 else
1080 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
1081 max_pfn = MAXMEM_PFN;
1082 #else /* !CONFIG_HIGHMEM */
1083 #ifndef CONFIG_X86_PAE
1084 if (max_pfn > MAX_NONPAE_PFN) {
1085 max_pfn = MAX_NONPAE_PFN;
1086 printk(KERN_WARNING "Warning only 4GB will be used.\n");
1087 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
1089 #endif /* !CONFIG_X86_PAE */
1090 #endif /* !CONFIG_HIGHMEM */
1091 } else {
1092 if (highmem_pages == -1)
1093 highmem_pages = 0;
1094 #ifdef CONFIG_HIGHMEM
1095 if (highmem_pages >= max_pfn) {
1096 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
1097 highmem_pages = 0;
1099 if (highmem_pages) {
1100 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
1101 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
1102 highmem_pages = 0;
1104 max_low_pfn -= highmem_pages;
1106 #else
1107 if (highmem_pages)
1108 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
1109 #endif
1111 return max_low_pfn;
1114 /*
1115 * Free all available memory for boot time allocation. Used
1116 * as a callback function by efi_memory_walk()
1117 */
1119 static int __init
1120 free_available_memory(unsigned long start, unsigned long end, void *arg)
1122 /* check max_low_pfn */
1123 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1124 return 0;
1125 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1126 end = (max_low_pfn + 1) << PAGE_SHIFT;
1127 if (start < end)
1128 free_bootmem(start, end - start);
1130 return 0;
1132 /*
1133 * Register fully available low RAM pages with the bootmem allocator.
1134 */
1135 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1137 int i;
1139 if (efi_enabled) {
1140 efi_memmap_walk(free_available_memory, NULL);
1141 return;
1143 for (i = 0; i < e820.nr_map; i++) {
1144 unsigned long curr_pfn, last_pfn, size;
1145 /*
1146 * Reserve usable low memory
1147 */
1148 if (e820.map[i].type != E820_RAM)
1149 continue;
1150 /*
1151 * We are rounding up the start address of usable memory:
1152 */
1153 curr_pfn = PFN_UP(e820.map[i].addr);
1154 if (curr_pfn >= max_low_pfn)
1155 continue;
1156 /*
1157 * ... and at the end of the usable range downwards:
1158 */
1159 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1161 if (last_pfn > max_low_pfn)
1162 last_pfn = max_low_pfn;
1164 /*
1165 * .. finally, did all the rounding and playing
1166 * around just make the area go away?
1167 */
1168 if (last_pfn <= curr_pfn)
1169 continue;
1171 size = last_pfn - curr_pfn;
1172 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1176 #ifndef CONFIG_XEN
1177 /*
1178 * workaround for Dell systems that neglect to reserve EBDA
1179 */
1180 static void __init reserve_ebda_region(void)
1182 unsigned int addr;
1183 addr = get_bios_ebda();
1184 if (addr)
1185 reserve_bootmem(addr, PAGE_SIZE);
1187 #endif
1189 #ifndef CONFIG_NEED_MULTIPLE_NODES
1190 void __init setup_bootmem_allocator(void);
1191 static unsigned long __init setup_memory(void)
1193 /*
1194 * partially used pages are not usable - thus
1195 * we are rounding upwards:
1196 */
1197 min_low_pfn = PFN_UP(__pa(xen_start_info->pt_base)) +
1198 xen_start_info->nr_pt_frames;
1200 find_max_pfn();
1202 max_low_pfn = find_max_low_pfn();
1204 #ifdef CONFIG_HIGHMEM
1205 highstart_pfn = highend_pfn = max_pfn;
1206 if (max_pfn > max_low_pfn) {
1207 highstart_pfn = max_low_pfn;
1209 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1210 pages_to_mb(highend_pfn - highstart_pfn));
1211 #endif
1212 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1213 pages_to_mb(max_low_pfn));
1215 setup_bootmem_allocator();
1217 return max_low_pfn;
1220 void __init zone_sizes_init(void)
1222 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1223 unsigned int max_dma, low;
1225 /*
1226 * XEN: Our notion of "DMA memory" is fake when running over Xen.
1227 * We simply put all RAM in the DMA zone so that those drivers which
1228 * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
1229 * Those drivers that *do* require lowmem are screwed anyway when
1230 * running over Xen!
1231 */
1232 max_dma = max_low_pfn;
1233 low = max_low_pfn;
1235 if (low < max_dma)
1236 zones_size[ZONE_DMA] = low;
1237 else {
1238 zones_size[ZONE_DMA] = max_dma;
1239 zones_size[ZONE_NORMAL] = low - max_dma;
1240 #ifdef CONFIG_HIGHMEM
1241 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1242 #endif
1244 free_area_init(zones_size);
1246 #else
1247 extern unsigned long __init setup_memory(void);
1248 extern void zone_sizes_init(void);
1249 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
1251 void __init setup_bootmem_allocator(void)
1253 unsigned long bootmap_size;
1254 /*
1255 * Initialize the boot-time allocator (with low memory only):
1256 */
1257 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1259 register_bootmem_low_pages(max_low_pfn);
1261 /*
1262 * Reserve the bootmem bitmap itself as well. We do this in two
1263 * steps (first step was init_bootmem()) because this catches
1264 * the (very unlikely) case of us accidentally initializing the
1265 * bootmem allocator with an invalid RAM area.
1266 */
1267 reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
1268 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
1270 #ifndef CONFIG_XEN
1271 /*
1272 * reserve physical page 0 - it's a special BIOS page on many boxes,
1273 * enabling clean reboots, SMP operation, laptop functions.
1274 */
1275 reserve_bootmem(0, PAGE_SIZE);
1277 /* reserve EBDA region, it's a 4K region */
1278 reserve_ebda_region();
1280 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1281 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1282 unless you have no PS/2 mouse plugged in. */
1283 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1284 boot_cpu_data.x86 == 6)
1285 reserve_bootmem(0xa0000 - 4096, 4096);
1287 #ifdef CONFIG_SMP
1288 /*
1289 * But first pinch a few for the stack/trampoline stuff
1290 * FIXME: Don't need the extra page at 4K, but need to fix
1291 * trampoline before removing it. (see the GDT stuff)
1292 */
1293 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1294 #endif
1295 #ifdef CONFIG_ACPI_SLEEP
1296 /*
1297 * Reserve low memory region for sleep support.
1298 */
1299 acpi_reserve_bootmem();
1300 #endif
1301 #endif /* !CONFIG_XEN */
1303 #ifdef CONFIG_BLK_DEV_INITRD
1304 if (xen_start_info->mod_start) {
1305 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1306 /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
1307 initrd_start = INITRD_START + PAGE_OFFSET;
1308 initrd_end = initrd_start+INITRD_SIZE;
1309 initrd_below_start_ok = 1;
1311 else {
1312 printk(KERN_ERR "initrd extends beyond end of memory "
1313 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1314 INITRD_START + INITRD_SIZE,
1315 max_low_pfn << PAGE_SHIFT);
1316 initrd_start = 0;
1319 #endif
1320 #ifdef CONFIG_KEXEC
1321 if (crashk_res.start != crashk_res.end)
1322 reserve_bootmem(crashk_res.start,
1323 crashk_res.end - crashk_res.start + 1);
1324 #endif
1326 if (!xen_feature(XENFEAT_auto_translated_physmap))
1327 phys_to_machine_mapping =
1328 (unsigned long *)xen_start_info->mfn_list;
1331 /*
1332 * The node 0 pgdat is initialized before all of these because
1333 * it's needed for bootmem. node>0 pgdats have their virtual
1334 * space allocated before the pagetables are in place to access
1335 * them, so they can't be cleared then.
1337 * This should all compile down to nothing when NUMA is off.
1338 */
1339 void __init remapped_pgdat_init(void)
1341 int nid;
1343 for_each_online_node(nid) {
1344 if (nid != 0)
1345 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1349 /*
1350 * Request address space for all standard RAM and ROM resources
1351 * and also for regions reported as reserved by the e820.
1352 */
1353 static void __init
1354 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1356 int i;
1357 #ifdef CONFIG_XEN
1358 dom0_op_t op;
1359 struct dom0_memory_map_entry *map;
1360 unsigned long gapstart, gapsize;
1361 unsigned long long last;
1362 #endif
1364 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1365 probe_roms();
1366 #endif
1368 #ifdef CONFIG_XEN
1369 map = alloc_bootmem_low_pages(PAGE_SIZE);
1370 op.cmd = DOM0_PHYSICAL_MEMORY_MAP;
1371 op.u.physical_memory_map.memory_map = map;
1372 op.u.physical_memory_map.max_map_entries =
1373 PAGE_SIZE / sizeof(struct dom0_memory_map_entry);
1374 BUG_ON(HYPERVISOR_dom0_op(&op));
1376 last = 0x100000000ULL;
1377 gapstart = 0x10000000;
1378 gapsize = 0x400000;
1380 for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) {
1381 struct resource *res;
1383 if ((last > map[i].end) && ((last - map[i].end) > gapsize)) {
1384 gapsize = last - map[i].end;
1385 gapstart = map[i].end;
1387 if (map[i].start < last)
1388 last = map[i].start;
1390 if (map[i].end > 0x100000000ULL)
1391 continue;
1392 res = alloc_bootmem_low(sizeof(struct resource));
1393 res->name = map[i].is_ram ? "System RAM" : "reserved";
1394 res->start = map[i].start;
1395 res->end = map[i].end - 1;
1396 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1397 request_resource(&iomem_resource, res);
1400 free_bootmem(__pa(map), PAGE_SIZE);
1402 /*
1403 * Start allocating dynamic PCI memory a bit into the gap,
1404 * aligned up to the nearest megabyte.
1406 * Question: should we try to pad it up a bit (do something
1407 * like " + (gapsize >> 3)" in there too?). We now have the
1408 * technology.
1409 */
1410 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1412 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1413 pci_mem_start, gapstart, gapsize);
1414 #else
1415 for (i = 0; i < e820.nr_map; i++) {
1416 struct resource *res;
1417 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1418 continue;
1419 res = alloc_bootmem_low(sizeof(struct resource));
1420 switch (e820.map[i].type) {
1421 case E820_RAM: res->name = "System RAM"; break;
1422 case E820_ACPI: res->name = "ACPI Tables"; break;
1423 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1424 default: res->name = "reserved";
1426 res->start = e820.map[i].addr;
1427 res->end = res->start + e820.map[i].size - 1;
1428 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1429 request_resource(&iomem_resource, res);
1430 if (e820.map[i].type == E820_RAM) {
1431 /*
1432 * We don't know which RAM region contains kernel data,
1433 * so we try it repeatedly and let the resource manager
1434 * test it.
1435 */
1436 request_resource(res, code_resource);
1437 request_resource(res, data_resource);
1438 #ifdef CONFIG_KEXEC
1439 request_resource(res, &crashk_res);
1440 #endif
1443 #endif
1446 /*
1447 * Request address space for all standard resources
1448 */
1449 static void __init register_memory(void)
1451 #ifndef CONFIG_XEN
1452 unsigned long gapstart, gapsize, round;
1453 unsigned long long last;
1454 #endif
1455 int i;
1457 /* Nothing to do if not running in dom0. */
1458 if (!(xen_start_info->flags & SIF_INITDOMAIN))
1459 return;
1461 if (efi_enabled)
1462 efi_initialize_iomem_resources(&code_resource, &data_resource);
1463 else
1464 legacy_init_iomem_resources(&code_resource, &data_resource);
1466 /* EFI systems may still have VGA */
1467 request_resource(&iomem_resource, &video_ram_resource);
1469 /* request I/O space for devices used on all i[345]86 PCs */
1470 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1471 request_resource(&ioport_resource, &standard_io_resources[i]);
1473 #ifndef CONFIG_XEN
1474 /*
1475 * Search for the bigest gap in the low 32 bits of the e820
1476 * memory space.
1477 */
1478 last = 0x100000000ull;
1479 gapstart = 0x10000000;
1480 gapsize = 0x400000;
1481 i = e820.nr_map;
1482 while (--i >= 0) {
1483 unsigned long long start = e820.map[i].addr;
1484 unsigned long long end = start + e820.map[i].size;
1486 /*
1487 * Since "last" is at most 4GB, we know we'll
1488 * fit in 32 bits if this condition is true
1489 */
1490 if (last > end) {
1491 unsigned long gap = last - end;
1493 if (gap > gapsize) {
1494 gapsize = gap;
1495 gapstart = end;
1498 if (start < last)
1499 last = start;
1502 /*
1503 * See how much we want to round up: start off with
1504 * rounding to the next 1MB area.
1505 */
1506 round = 0x100000;
1507 while ((gapsize >> 4) > round)
1508 round += round;
1509 /* Fun with two's complement */
1510 pci_mem_start = (gapstart + round) & -round;
1512 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1513 pci_mem_start, gapstart, gapsize);
1514 #endif
1517 /* Use inline assembly to define this because the nops are defined
1518 as inline assembly strings in the include files and we cannot
1519 get them easily into strings. */
1520 asm("\t.data\nintelnops: "
1521 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1522 GENERIC_NOP7 GENERIC_NOP8);
1523 asm("\t.data\nk8nops: "
1524 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1525 K8_NOP7 K8_NOP8);
1526 asm("\t.data\nk7nops: "
1527 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1528 K7_NOP7 K7_NOP8);
1530 extern unsigned char intelnops[], k8nops[], k7nops[];
1531 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1532 NULL,
1533 intelnops,
1534 intelnops + 1,
1535 intelnops + 1 + 2,
1536 intelnops + 1 + 2 + 3,
1537 intelnops + 1 + 2 + 3 + 4,
1538 intelnops + 1 + 2 + 3 + 4 + 5,
1539 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1540 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1541 };
1542 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1543 NULL,
1544 k8nops,
1545 k8nops + 1,
1546 k8nops + 1 + 2,
1547 k8nops + 1 + 2 + 3,
1548 k8nops + 1 + 2 + 3 + 4,
1549 k8nops + 1 + 2 + 3 + 4 + 5,
1550 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1551 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1552 };
1553 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1554 NULL,
1555 k7nops,
1556 k7nops + 1,
1557 k7nops + 1 + 2,
1558 k7nops + 1 + 2 + 3,
1559 k7nops + 1 + 2 + 3 + 4,
1560 k7nops + 1 + 2 + 3 + 4 + 5,
1561 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1562 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1563 };
1564 static struct nop {
1565 int cpuid;
1566 unsigned char **noptable;
1567 } noptypes[] = {
1568 { X86_FEATURE_K8, k8_nops },
1569 { X86_FEATURE_K7, k7_nops },
1570 { -1, NULL }
1571 };
1573 /* Replace instructions with better alternatives for this CPU type.
1575 This runs before SMP is initialized to avoid SMP problems with
1576 self modifying code. This implies that assymetric systems where
1577 APs have less capabilities than the boot processor are not handled.
1578 Tough. Make sure you disable such features by hand. */
1579 void apply_alternatives(void *start, void *end)
1581 struct alt_instr *a;
1582 int diff, i, k;
1583 unsigned char **noptable = intel_nops;
1584 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1585 if (boot_cpu_has(noptypes[i].cpuid)) {
1586 noptable = noptypes[i].noptable;
1587 break;
1590 for (a = start; (void *)a < end; a++) {
1591 if (!boot_cpu_has(a->cpuid))
1592 continue;
1593 BUG_ON(a->replacementlen > a->instrlen);
1594 memcpy(a->instr, a->replacement, a->replacementlen);
1595 diff = a->instrlen - a->replacementlen;
1596 /* Pad the rest with nops */
1597 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1598 k = diff;
1599 if (k > ASM_NOP_MAX)
1600 k = ASM_NOP_MAX;
1601 memcpy(a->instr + i, noptable[k], k);
1606 void __init alternative_instructions(void)
1608 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1609 apply_alternatives(__alt_instructions, __alt_instructions_end);
1612 static char * __init machine_specific_memory_setup(void);
1614 #ifdef CONFIG_MCA
1615 static void set_mca_bus(int x)
1617 MCA_bus = x;
1619 #else
1620 static void set_mca_bus(int x) { }
1621 #endif
1623 /*
1624 * Determine if we were loaded by an EFI loader. If so, then we have also been
1625 * passed the efi memmap, systab, etc., so we should use these data structures
1626 * for initialization. Note, the efi init code path is determined by the
1627 * global efi_enabled. This allows the same kernel image to be used on existing
1628 * systems (with a traditional BIOS) as well as on EFI systems.
1629 */
1630 void __init setup_arch(char **cmdline_p)
1632 int i, j, k, fpp;
1633 physdev_op_t op;
1634 unsigned long max_low_pfn;
1636 /* Force a quick death if the kernel panics (not domain 0). */
1637 extern int panic_timeout;
1638 if (!panic_timeout && !(xen_start_info->flags & SIF_INITDOMAIN))
1639 panic_timeout = 1;
1641 /* Register a call for panic conditions. */
1642 notifier_chain_register(&panic_notifier_list, &xen_panic_block);
1644 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
1645 HYPERVISOR_vm_assist(VMASST_CMD_enable,
1646 VMASST_TYPE_writable_pagetables);
1648 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1649 early_cpu_init();
1651 /*
1652 * FIXME: This isn't an official loader_type right
1653 * now but does currently work with elilo.
1654 * If we were configured as an EFI kernel, check to make
1655 * sure that we were loaded correctly from elilo and that
1656 * the system table is valid. If not, then initialize normally.
1657 */
1658 #ifdef CONFIG_EFI
1659 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1660 efi_enabled = 1;
1661 #endif
1663 /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
1664 properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
1665 */
1666 ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
1667 drive_info = DRIVE_INFO;
1668 screen_info = SCREEN_INFO;
1669 edid_info = EDID_INFO;
1670 apm_info.bios = APM_BIOS_INFO;
1671 ist_info = IST_INFO;
1672 saved_videomode = VIDEO_MODE;
1673 if( SYS_DESC_TABLE.length != 0 ) {
1674 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1675 machine_id = SYS_DESC_TABLE.table[0];
1676 machine_submodel_id = SYS_DESC_TABLE.table[1];
1677 BIOS_revision = SYS_DESC_TABLE.table[2];
1679 bootloader_type = LOADER_TYPE;
1681 if (xen_start_info->flags & SIF_INITDOMAIN) {
1682 /* This is drawn from a dump from vgacon:startup in
1683 * standard Linux. */
1684 screen_info.orig_video_mode = 3;
1685 screen_info.orig_video_isVGA = 1;
1686 screen_info.orig_video_lines = 25;
1687 screen_info.orig_video_cols = 80;
1688 screen_info.orig_video_ega_bx = 3;
1689 screen_info.orig_video_points = 16;
1690 } else
1691 screen_info.orig_video_isVGA = 0;
1693 #ifdef CONFIG_BLK_DEV_RAM
1694 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1695 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1696 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1697 #endif
1699 setup_xen_features();
1701 ARCH_SETUP
1702 if (efi_enabled)
1703 efi_init();
1704 else {
1705 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1706 print_memory_map(machine_specific_memory_setup());
1709 copy_edd();
1711 if (!MOUNT_ROOT_RDONLY)
1712 root_mountflags &= ~MS_RDONLY;
1713 init_mm.start_code = (unsigned long) _text;
1714 init_mm.end_code = (unsigned long) _etext;
1715 init_mm.end_data = (unsigned long) _edata;
1716 init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) +
1717 xen_start_info->nr_pt_frames) << PAGE_SHIFT;
1719 /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */
1720 /*code_resource.start = virt_to_phys(_text);*/
1721 /*code_resource.end = virt_to_phys(_etext)-1;*/
1722 /*data_resource.start = virt_to_phys(_etext);*/
1723 /*data_resource.end = virt_to_phys(_edata)-1;*/
1725 parse_cmdline_early(cmdline_p);
1727 max_low_pfn = setup_memory();
1729 /*
1730 * NOTE: before this point _nobody_ is allowed to allocate
1731 * any memory using the bootmem allocator. Although the
1732 * alloctor is now initialised only the first 8Mb of the kernel
1733 * virtual address space has been mapped. All allocations before
1734 * paging_init() has completed must use the alloc_bootmem_low_pages()
1735 * variant (which allocates DMA'able memory) and care must be taken
1736 * not to exceed the 8Mb limit.
1737 */
1739 #ifdef CONFIG_SMP
1740 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1741 #endif
1742 paging_init();
1743 remapped_pgdat_init();
1744 sparse_init();
1745 zone_sizes_init();
1747 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1748 /*
1749 * Find and reserve possible boot-time SMP configuration:
1750 */
1751 find_smp_config();
1752 #endif
1754 /* Make sure we have a correctly sized P->M table. */
1755 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1756 phys_to_machine_mapping = alloc_bootmem_low_pages(
1757 max_pfn * sizeof(unsigned long));
1758 memset(phys_to_machine_mapping, ~0,
1759 max_pfn * sizeof(unsigned long));
1760 memcpy(phys_to_machine_mapping,
1761 (unsigned long *)xen_start_info->mfn_list,
1762 xen_start_info->nr_pages * sizeof(unsigned long));
1763 free_bootmem(
1764 __pa(xen_start_info->mfn_list),
1765 PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
1766 sizeof(unsigned long))));
1768 /*
1769 * Initialise the list of the frames that specify the list of
1770 * frames that make up the p2m table. Used by save/restore
1771 */
1772 pfn_to_mfn_frame_list_list = alloc_bootmem_low_pages(PAGE_SIZE);
1773 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
1774 virt_to_mfn(pfn_to_mfn_frame_list_list);
1776 fpp = PAGE_SIZE/sizeof(unsigned long);
1777 for (i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++) {
1778 if ((j % fpp) == 0) {
1779 k++;
1780 BUG_ON(k>=16);
1781 pfn_to_mfn_frame_list[k] =
1782 alloc_bootmem_low_pages(PAGE_SIZE);
1783 pfn_to_mfn_frame_list_list[k] =
1784 virt_to_mfn(pfn_to_mfn_frame_list[k]);
1785 j=0;
1787 pfn_to_mfn_frame_list[k][j] =
1788 virt_to_mfn(&phys_to_machine_mapping[i]);
1790 HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
1793 /*
1794 * NOTE: at this point the bootmem allocator is fully available.
1795 */
1797 #ifdef CONFIG_EARLY_PRINTK
1799 char *s = strstr(*cmdline_p, "earlyprintk=");
1800 if (s) {
1801 extern void setup_early_printk(char *);
1803 setup_early_printk(strchr(s, '=') + 1);
1804 printk("early console enabled\n");
1807 #endif
1809 if (xen_start_info->flags & SIF_INITDOMAIN)
1810 dmi_scan_machine();
1812 #ifdef CONFIG_X86_GENERICARCH
1813 generic_apic_probe(*cmdline_p);
1814 #endif
1815 if (efi_enabled)
1816 efi_map_memmap();
1818 op.cmd = PHYSDEVOP_SET_IOPL;
1819 op.u.set_iopl.iopl = 1;
1820 HYPERVISOR_physdev_op(&op);
1822 #ifdef CONFIG_X86_IO_APIC
1823 check_acpi_pci(); /* Checks more than just ACPI actually */
1824 #endif
1826 #ifdef CONFIG_ACPI
1827 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
1828 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
1829 acpi_disabled = 1;
1830 acpi_ht = 0;
1833 /*
1834 * Parse the ACPI tables for possible boot-time SMP configuration.
1835 */
1836 acpi_boot_table_init();
1837 acpi_boot_init();
1839 #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
1840 if (def_to_bigsmp)
1841 printk(KERN_WARNING "More than 8 CPUs detected and "
1842 "CONFIG_X86_PC cannot handle it.\nUse "
1843 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
1844 #endif
1845 #endif
1846 #ifdef CONFIG_X86_LOCAL_APIC
1847 if (smp_found_config)
1848 get_smp_config();
1849 #endif
1851 /* XXX Disable irqdebug until we have a way to avoid interrupt
1852 * conflicts. */
1853 noirqdebug_setup("");
1855 register_memory();
1857 if (xen_start_info->flags & SIF_INITDOMAIN) {
1858 if (!(xen_start_info->flags & SIF_PRIVILEGED))
1859 panic("Xen granted us console access "
1860 "but not privileged status");
1862 #ifdef CONFIG_VT
1863 #if defined(CONFIG_VGA_CONSOLE)
1864 if (!efi_enabled ||
1865 (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1866 conswitchp = &vga_con;
1867 #elif defined(CONFIG_DUMMY_CONSOLE)
1868 conswitchp = &dummy_con;
1869 #endif
1870 #endif
1871 } else {
1872 extern int console_use_vt;
1873 console_use_vt = 0;
1877 static int
1878 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
1880 HYPERVISOR_shutdown(SHUTDOWN_crash);
1881 /* we're never actually going to get here... */
1882 return NOTIFY_DONE;
1885 #include "setup_arch_post.h"
1886 /*
1887 * Local Variables:
1888 * mode:c
1889 * c-file-style:"k&r"
1890 * c-basic-offset:8
1891 * End:
1892 */