ia64/xen-unstable

view linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c @ 6294:1a0723cd37f1

Fix many uses of machine addresses in XenLinux. Primarily
this fixes users of virt_to_machine/machine_to_virt to
use virt_to_mfn/mfn_to_virt where that is more appropriate.

This should be a big step to improved PAE stability.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Aug 19 16:06:43 2005 +0000 (2005-08-19)
parents d25da0ddd9d5
children f51fe43c5d1c 5f4724c13040 81576d3d1ca8 3a8f27c6d56c
line source
1 /*
2 * linux/arch/i386/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 *
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
10 *
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
13 *
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
16 *
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
19 *
20 */
22 /*
23 * This file handles the architecture-dependent parts of initialization
24 */
26 #include <linux/sched.h>
27 #include <linux/mm.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/mca.h>
37 #include <linux/root_dev.h>
38 #include <linux/highmem.h>
39 #include <linux/module.h>
40 #include <linux/efi.h>
41 #include <linux/init.h>
42 #include <linux/edd.h>
43 #include <linux/nodemask.h>
44 #include <linux/kernel.h>
45 #include <linux/percpu.h>
46 #include <linux/notifier.h>
47 #include <video/edid.h>
48 #include <asm/e820.h>
49 #include <asm/mpspec.h>
50 #include <asm/setup.h>
51 #include <asm/arch_hooks.h>
52 #include <asm/sections.h>
53 #include <asm/io_apic.h>
54 #include <asm/ist.h>
55 #include <asm/io.h>
56 #include <asm-xen/hypervisor.h>
57 #include <asm-xen/xen-public/physdev.h>
58 #include "setup_arch_pre.h"
59 #include <bios_ebda.h>
61 /* Allows setting of maximum possible memory size */
62 static unsigned long xen_override_max_pfn;
64 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
65 static struct notifier_block xen_panic_block = {
66 xen_panic_event, NULL, 0 /* try to go last */
67 };
69 int disable_pse __initdata = 0;
71 /*
72 * Machine setup..
73 */
75 #ifdef CONFIG_EFI
76 int efi_enabled = 0;
77 EXPORT_SYMBOL(efi_enabled);
78 #endif
80 /* cpu data as detected by the assembly code in head.S */
81 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
82 /* common cpu data for all cpus */
83 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
85 unsigned long mmu_cr4_features;
87 #ifdef CONFIG_ACPI_INTERPRETER
88 int acpi_disabled = 0;
89 #else
90 int acpi_disabled = 1;
91 #endif
92 EXPORT_SYMBOL(acpi_disabled);
94 #ifdef CONFIG_ACPI_BOOT
95 int __initdata acpi_force = 0;
96 extern acpi_interrupt_flags acpi_sci_flags;
97 #endif
99 /* for MCA, but anyone else can use it if they want */
100 unsigned int machine_id;
101 unsigned int machine_submodel_id;
102 unsigned int BIOS_revision;
103 unsigned int mca_pentium_flag;
105 /* For PCI or other memory-mapped resources */
106 unsigned long pci_mem_start = 0x10000000;
108 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
109 int bootloader_type;
111 /* user-defined highmem size */
112 static unsigned int highmem_pages = -1;
114 /*
115 * Setup options
116 */
117 struct drive_info_struct { char dummy[32]; } drive_info;
118 struct screen_info screen_info;
119 struct apm_info apm_info;
120 struct sys_desc_table_struct {
121 unsigned short length;
122 unsigned char table[0];
123 };
124 struct edid_info edid_info;
125 struct ist_info ist_info;
126 struct e820map e820;
128 extern void early_cpu_init(void);
129 extern void dmi_scan_machine(void);
130 extern void generic_apic_probe(char *);
131 extern int root_mountflags;
133 unsigned long saved_videomode;
135 #define RAMDISK_IMAGE_START_MASK 0x07FF
136 #define RAMDISK_PROMPT_FLAG 0x8000
137 #define RAMDISK_LOAD_FLAG 0x4000
139 static char command_line[COMMAND_LINE_SIZE];
141 unsigned char __initdata boot_params[PARAM_SIZE];
143 static struct resource data_resource = {
144 .name = "Kernel data",
145 .start = 0,
146 .end = 0,
147 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
148 };
150 static struct resource code_resource = {
151 .name = "Kernel code",
152 .start = 0,
153 .end = 0,
154 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
155 };
157 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
158 static struct resource system_rom_resource = {
159 .name = "System ROM",
160 .start = 0xf0000,
161 .end = 0xfffff,
162 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
163 };
165 static struct resource extension_rom_resource = {
166 .name = "Extension ROM",
167 .start = 0xe0000,
168 .end = 0xeffff,
169 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
170 };
172 static struct resource adapter_rom_resources[] = { {
173 .name = "Adapter ROM",
174 .start = 0xc8000,
175 .end = 0,
176 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
177 }, {
178 .name = "Adapter ROM",
179 .start = 0,
180 .end = 0,
181 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
182 }, {
183 .name = "Adapter ROM",
184 .start = 0,
185 .end = 0,
186 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
187 }, {
188 .name = "Adapter ROM",
189 .start = 0,
190 .end = 0,
191 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
192 }, {
193 .name = "Adapter ROM",
194 .start = 0,
195 .end = 0,
196 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
197 }, {
198 .name = "Adapter ROM",
199 .start = 0,
200 .end = 0,
201 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
202 } };
204 #define ADAPTER_ROM_RESOURCES \
205 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
207 static struct resource video_rom_resource = {
208 .name = "Video ROM",
209 .start = 0xc0000,
210 .end = 0xc7fff,
211 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
212 };
213 #endif
215 static struct resource video_ram_resource = {
216 .name = "Video RAM area",
217 .start = 0xa0000,
218 .end = 0xbffff,
219 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
220 };
222 static struct resource standard_io_resources[] = { {
223 .name = "dma1",
224 .start = 0x0000,
225 .end = 0x001f,
226 .flags = IORESOURCE_BUSY | IORESOURCE_IO
227 }, {
228 .name = "pic1",
229 .start = 0x0020,
230 .end = 0x0021,
231 .flags = IORESOURCE_BUSY | IORESOURCE_IO
232 }, {
233 .name = "timer0",
234 .start = 0x0040,
235 .end = 0x0043,
236 .flags = IORESOURCE_BUSY | IORESOURCE_IO
237 }, {
238 .name = "timer1",
239 .start = 0x0050,
240 .end = 0x0053,
241 .flags = IORESOURCE_BUSY | IORESOURCE_IO
242 }, {
243 .name = "keyboard",
244 .start = 0x0060,
245 .end = 0x006f,
246 .flags = IORESOURCE_BUSY | IORESOURCE_IO
247 }, {
248 .name = "dma page reg",
249 .start = 0x0080,
250 .end = 0x008f,
251 .flags = IORESOURCE_BUSY | IORESOURCE_IO
252 }, {
253 .name = "pic2",
254 .start = 0x00a0,
255 .end = 0x00a1,
256 .flags = IORESOURCE_BUSY | IORESOURCE_IO
257 }, {
258 .name = "dma2",
259 .start = 0x00c0,
260 .end = 0x00df,
261 .flags = IORESOURCE_BUSY | IORESOURCE_IO
262 }, {
263 .name = "fpu",
264 .start = 0x00f0,
265 .end = 0x00ff,
266 .flags = IORESOURCE_BUSY | IORESOURCE_IO
267 } };
269 #define STANDARD_IO_RESOURCES \
270 (sizeof standard_io_resources / sizeof standard_io_resources[0])
272 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
273 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
275 static int __init romchecksum(unsigned char *rom, unsigned long length)
276 {
277 unsigned char *p, sum = 0;
279 for (p = rom; p < rom + length; p++)
280 sum += *p;
281 return sum == 0;
282 }
284 static void __init probe_roms(void)
285 {
286 unsigned long start, length, upper;
287 unsigned char *rom;
288 int i;
290 /* Nothing to do if not running in dom0. */
291 if (!(xen_start_info.flags & SIF_INITDOMAIN))
292 return;
294 /* video rom */
295 upper = adapter_rom_resources[0].start;
296 for (start = video_rom_resource.start; start < upper; start += 2048) {
297 rom = isa_bus_to_virt(start);
298 if (!romsignature(rom))
299 continue;
301 video_rom_resource.start = start;
303 /* 0 < length <= 0x7f * 512, historically */
304 length = rom[2] * 512;
306 /* if checksum okay, trust length byte */
307 if (length && romchecksum(rom, length))
308 video_rom_resource.end = start + length - 1;
310 request_resource(&iomem_resource, &video_rom_resource);
311 break;
312 }
314 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
315 if (start < upper)
316 start = upper;
318 /* system rom */
319 request_resource(&iomem_resource, &system_rom_resource);
320 upper = system_rom_resource.start;
322 /* check for extension rom (ignore length byte!) */
323 rom = isa_bus_to_virt(extension_rom_resource.start);
324 if (romsignature(rom)) {
325 length = extension_rom_resource.end - extension_rom_resource.start + 1;
326 if (romchecksum(rom, length)) {
327 request_resource(&iomem_resource, &extension_rom_resource);
328 upper = extension_rom_resource.start;
329 }
330 }
332 /* check for adapter roms on 2k boundaries */
333 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
334 rom = isa_bus_to_virt(start);
335 if (!romsignature(rom))
336 continue;
338 /* 0 < length <= 0x7f * 512, historically */
339 length = rom[2] * 512;
341 /* but accept any length that fits if checksum okay */
342 if (!length || start + length > upper || !romchecksum(rom, length))
343 continue;
345 adapter_rom_resources[i].start = start;
346 adapter_rom_resources[i].end = start + length - 1;
347 request_resource(&iomem_resource, &adapter_rom_resources[i]);
349 start = adapter_rom_resources[i++].end & ~2047UL;
350 }
351 }
352 #endif
354 /*
355 * Point at the empty zero page to start with. We map the real shared_info
356 * page as soon as fixmap is up and running.
357 */
358 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
359 EXPORT_SYMBOL(HYPERVISOR_shared_info);
361 unsigned int *phys_to_machine_mapping, *pfn_to_mfn_frame_list;
362 EXPORT_SYMBOL(phys_to_machine_mapping);
364 /* Raw start-of-day parameters from the hypervisor. */
365 union xen_start_info_union xen_start_info_union;
367 static void __init limit_regions(unsigned long long size)
368 {
369 unsigned long long current_addr = 0;
370 int i;
372 if (efi_enabled) {
373 for (i = 0; i < memmap.nr_map; i++) {
374 current_addr = memmap.map[i].phys_addr +
375 (memmap.map[i].num_pages << 12);
376 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
377 if (current_addr >= size) {
378 memmap.map[i].num_pages -=
379 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
380 memmap.nr_map = i + 1;
381 return;
382 }
383 }
384 }
385 }
386 for (i = 0; i < e820.nr_map; i++) {
387 if (e820.map[i].type == E820_RAM) {
388 current_addr = e820.map[i].addr + e820.map[i].size;
389 if (current_addr >= size) {
390 e820.map[i].size -= current_addr-size;
391 e820.nr_map = i + 1;
392 return;
393 }
394 }
395 }
396 }
398 static void __init add_memory_region(unsigned long long start,
399 unsigned long long size, int type)
400 {
401 int x;
403 if (!efi_enabled) {
404 x = e820.nr_map;
406 if (x == E820MAX) {
407 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
408 return;
409 }
411 e820.map[x].addr = start;
412 e820.map[x].size = size;
413 e820.map[x].type = type;
414 e820.nr_map++;
415 }
416 } /* add_memory_region */
418 #define E820_DEBUG 1
420 static void __init print_memory_map(char *who)
421 {
422 int i;
424 for (i = 0; i < e820.nr_map; i++) {
425 printk(" %s: %016Lx - %016Lx ", who,
426 e820.map[i].addr,
427 e820.map[i].addr + e820.map[i].size);
428 switch (e820.map[i].type) {
429 case E820_RAM: printk("(usable)\n");
430 break;
431 case E820_RESERVED:
432 printk("(reserved)\n");
433 break;
434 case E820_ACPI:
435 printk("(ACPI data)\n");
436 break;
437 case E820_NVS:
438 printk("(ACPI NVS)\n");
439 break;
440 default: printk("type %lu\n", e820.map[i].type);
441 break;
442 }
443 }
444 }
446 #if 0
447 /*
448 * Sanitize the BIOS e820 map.
449 *
450 * Some e820 responses include overlapping entries. The following
451 * replaces the original e820 map with a new one, removing overlaps.
452 *
453 */
454 struct change_member {
455 struct e820entry *pbios; /* pointer to original bios entry */
456 unsigned long long addr; /* address for this change point */
457 };
458 static struct change_member change_point_list[2*E820MAX] __initdata;
459 static struct change_member *change_point[2*E820MAX] __initdata;
460 static struct e820entry *overlap_list[E820MAX] __initdata;
461 static struct e820entry new_bios[E820MAX] __initdata;
463 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
464 {
465 struct change_member *change_tmp;
466 unsigned long current_type, last_type;
467 unsigned long long last_addr;
468 int chgidx, still_changing;
469 int overlap_entries;
470 int new_bios_entry;
471 int old_nr, new_nr, chg_nr;
472 int i;
474 /*
475 Visually we're performing the following (1,2,3,4 = memory types)...
477 Sample memory map (w/overlaps):
478 ____22__________________
479 ______________________4_
480 ____1111________________
481 _44_____________________
482 11111111________________
483 ____________________33__
484 ___________44___________
485 __________33333_________
486 ______________22________
487 ___________________2222_
488 _________111111111______
489 _____________________11_
490 _________________4______
492 Sanitized equivalent (no overlap):
493 1_______________________
494 _44_____________________
495 ___1____________________
496 ____22__________________
497 ______11________________
498 _________1______________
499 __________3_____________
500 ___________44___________
501 _____________33_________
502 _______________2________
503 ________________1_______
504 _________________4______
505 ___________________2____
506 ____________________33__
507 ______________________4_
508 */
510 /* if there's only one memory region, don't bother */
511 if (*pnr_map < 2)
512 return -1;
514 old_nr = *pnr_map;
516 /* bail out if we find any unreasonable addresses in bios map */
517 for (i=0; i<old_nr; i++)
518 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
519 return -1;
521 /* create pointers for initial change-point information (for sorting) */
522 for (i=0; i < 2*old_nr; i++)
523 change_point[i] = &change_point_list[i];
525 /* record all known change-points (starting and ending addresses),
526 omitting those that are for empty memory regions */
527 chgidx = 0;
528 for (i=0; i < old_nr; i++) {
529 if (biosmap[i].size != 0) {
530 change_point[chgidx]->addr = biosmap[i].addr;
531 change_point[chgidx++]->pbios = &biosmap[i];
532 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
533 change_point[chgidx++]->pbios = &biosmap[i];
534 }
535 }
536 chg_nr = chgidx; /* true number of change-points */
538 /* sort change-point list by memory addresses (low -> high) */
539 still_changing = 1;
540 while (still_changing) {
541 still_changing = 0;
542 for (i=1; i < chg_nr; i++) {
543 /* if <current_addr> > <last_addr>, swap */
544 /* or, if current=<start_addr> & last=<end_addr>, swap */
545 if ((change_point[i]->addr < change_point[i-1]->addr) ||
546 ((change_point[i]->addr == change_point[i-1]->addr) &&
547 (change_point[i]->addr == change_point[i]->pbios->addr) &&
548 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
549 )
550 {
551 change_tmp = change_point[i];
552 change_point[i] = change_point[i-1];
553 change_point[i-1] = change_tmp;
554 still_changing=1;
555 }
556 }
557 }
559 /* create a new bios memory map, removing overlaps */
560 overlap_entries=0; /* number of entries in the overlap table */
561 new_bios_entry=0; /* index for creating new bios map entries */
562 last_type = 0; /* start with undefined memory type */
563 last_addr = 0; /* start with 0 as last starting address */
564 /* loop through change-points, determining affect on the new bios map */
565 for (chgidx=0; chgidx < chg_nr; chgidx++)
566 {
567 /* keep track of all overlapping bios entries */
568 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
569 {
570 /* add map entry to overlap list (> 1 entry implies an overlap) */
571 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
572 }
573 else
574 {
575 /* remove entry from list (order independent, so swap with last) */
576 for (i=0; i<overlap_entries; i++)
577 {
578 if (overlap_list[i] == change_point[chgidx]->pbios)
579 overlap_list[i] = overlap_list[overlap_entries-1];
580 }
581 overlap_entries--;
582 }
583 /* if there are overlapping entries, decide which "type" to use */
584 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
585 current_type = 0;
586 for (i=0; i<overlap_entries; i++)
587 if (overlap_list[i]->type > current_type)
588 current_type = overlap_list[i]->type;
589 /* continue building up new bios map based on this information */
590 if (current_type != last_type) {
591 if (last_type != 0) {
592 new_bios[new_bios_entry].size =
593 change_point[chgidx]->addr - last_addr;
594 /* move forward only if the new size was non-zero */
595 if (new_bios[new_bios_entry].size != 0)
596 if (++new_bios_entry >= E820MAX)
597 break; /* no more space left for new bios entries */
598 }
599 if (current_type != 0) {
600 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
601 new_bios[new_bios_entry].type = current_type;
602 last_addr=change_point[chgidx]->addr;
603 }
604 last_type = current_type;
605 }
606 }
607 new_nr = new_bios_entry; /* retain count for new bios entries */
609 /* copy new bios mapping into original location */
610 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
611 *pnr_map = new_nr;
613 return 0;
614 }
616 /*
617 * Copy the BIOS e820 map into a safe place.
618 *
619 * Sanity-check it while we're at it..
620 *
621 * If we're lucky and live on a modern system, the setup code
622 * will have given us a memory map that we can use to properly
623 * set up memory. If we aren't, we'll fake a memory map.
624 *
625 * We check to see that the memory map contains at least 2 elements
626 * before we'll use it, because the detection code in setup.S may
627 * not be perfect and most every PC known to man has two memory
628 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
629 * thinkpad 560x, for example, does not cooperate with the memory
630 * detection code.)
631 */
632 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
633 {
634 /* Only one memory region (or negative)? Ignore it */
635 if (nr_map < 2)
636 return -1;
638 do {
639 unsigned long long start = biosmap->addr;
640 unsigned long long size = biosmap->size;
641 unsigned long long end = start + size;
642 unsigned long type = biosmap->type;
644 /* Overflow in 64 bits? Ignore the memory map. */
645 if (start > end)
646 return -1;
648 /*
649 * Some BIOSes claim RAM in the 640k - 1M region.
650 * Not right. Fix it up.
651 */
652 if (type == E820_RAM) {
653 if (start < 0x100000ULL && end > 0xA0000ULL) {
654 if (start < 0xA0000ULL)
655 add_memory_region(start, 0xA0000ULL-start, type);
656 if (end <= 0x100000ULL)
657 continue;
658 start = 0x100000ULL;
659 size = end - start;
660 }
661 }
662 add_memory_region(start, size, type);
663 } while (biosmap++,--nr_map);
664 return 0;
665 }
666 #endif
668 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
669 struct edd edd;
670 #ifdef CONFIG_EDD_MODULE
671 EXPORT_SYMBOL(edd);
672 #endif
673 /**
674 * copy_edd() - Copy the BIOS EDD information
675 * from boot_params into a safe place.
676 *
677 */
678 static inline void copy_edd(void)
679 {
680 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
681 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
682 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
683 edd.edd_info_nr = EDD_NR;
684 }
685 #else
686 static inline void copy_edd(void)
687 {
688 }
689 #endif
691 /*
692 * Do NOT EVER look at the BIOS memory size location.
693 * It does not work on many machines.
694 */
695 #define LOWMEMSIZE() (0x9f000)
697 static void __init parse_cmdline_early (char ** cmdline_p)
698 {
699 char c = ' ', *to = command_line, *from = saved_command_line;
700 int len = 0, max_cmdline;
701 int userdef = 0;
703 if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
704 max_cmdline = COMMAND_LINE_SIZE;
705 memcpy(saved_command_line, xen_start_info.cmd_line, max_cmdline);
706 /* Save unparsed command line copy for /proc/cmdline */
707 saved_command_line[max_cmdline-1] = '\0';
709 for (;;) {
710 if (c != ' ')
711 goto next_char;
712 /*
713 * "mem=nopentium" disables the 4MB page tables.
714 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
715 * to <mem>, overriding the bios size.
716 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
717 * <start> to <start>+<mem>, overriding the bios size.
718 *
719 * HPA tells me bootloaders need to parse mem=, so no new
720 * option should be mem= [also see Documentation/i386/boot.txt]
721 */
722 if (!memcmp(from, "mem=", 4)) {
723 if (to != command_line)
724 to--;
725 if (!memcmp(from+4, "nopentium", 9)) {
726 from += 9+4;
727 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
728 disable_pse = 1;
729 } else {
730 /* If the user specifies memory size, we
731 * limit the BIOS-provided memory map to
732 * that size. exactmap can be used to specify
733 * the exact map. mem=number can be used to
734 * trim the existing memory map.
735 */
736 unsigned long long mem_size;
738 mem_size = memparse(from+4, &from);
739 #if 0
740 limit_regions(mem_size);
741 userdef=1;
742 #else
743 xen_override_max_pfn =
744 (unsigned long)(mem_size>>PAGE_SHIFT);
745 #endif
746 }
747 }
749 else if (!memcmp(from, "memmap=", 7)) {
750 if (to != command_line)
751 to--;
752 if (!memcmp(from+7, "exactmap", 8)) {
753 from += 8+7;
754 e820.nr_map = 0;
755 userdef = 1;
756 } else {
757 /* If the user specifies memory size, we
758 * limit the BIOS-provided memory map to
759 * that size. exactmap can be used to specify
760 * the exact map. mem=number can be used to
761 * trim the existing memory map.
762 */
763 unsigned long long start_at, mem_size;
765 mem_size = memparse(from+7, &from);
766 if (*from == '@') {
767 start_at = memparse(from+1, &from);
768 add_memory_region(start_at, mem_size, E820_RAM);
769 } else if (*from == '#') {
770 start_at = memparse(from+1, &from);
771 add_memory_region(start_at, mem_size, E820_ACPI);
772 } else if (*from == '$') {
773 start_at = memparse(from+1, &from);
774 add_memory_region(start_at, mem_size, E820_RESERVED);
775 } else {
776 limit_regions(mem_size);
777 userdef=1;
778 }
779 }
780 }
782 else if (!memcmp(from, "noexec=", 7))
783 noexec_setup(from + 7);
786 #ifdef CONFIG_X86_MPPARSE
787 /*
788 * If the BIOS enumerates physical processors before logical,
789 * maxcpus=N at enumeration-time can be used to disable HT.
790 */
791 else if (!memcmp(from, "maxcpus=", 8)) {
792 extern unsigned int maxcpus;
794 maxcpus = simple_strtoul(from + 8, NULL, 0);
795 }
796 #endif
798 #ifdef CONFIG_ACPI_BOOT
799 /* "acpi=off" disables both ACPI table parsing and interpreter */
800 else if (!memcmp(from, "acpi=off", 8)) {
801 disable_acpi();
802 }
804 /* acpi=force to over-ride black-list */
805 else if (!memcmp(from, "acpi=force", 10)) {
806 acpi_force = 1;
807 acpi_ht = 1;
808 acpi_disabled = 0;
809 }
811 /* acpi=strict disables out-of-spec workarounds */
812 else if (!memcmp(from, "acpi=strict", 11)) {
813 acpi_strict = 1;
814 }
816 /* Limit ACPI just to boot-time to enable HT */
817 else if (!memcmp(from, "acpi=ht", 7)) {
818 if (!acpi_force)
819 disable_acpi();
820 acpi_ht = 1;
821 }
823 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
824 else if (!memcmp(from, "pci=noacpi", 10)) {
825 acpi_disable_pci();
826 }
827 /* "acpi=noirq" disables ACPI interrupt routing */
828 else if (!memcmp(from, "acpi=noirq", 10)) {
829 acpi_noirq_set();
830 }
832 else if (!memcmp(from, "acpi_sci=edge", 13))
833 acpi_sci_flags.trigger = 1;
835 else if (!memcmp(from, "acpi_sci=level", 14))
836 acpi_sci_flags.trigger = 3;
838 else if (!memcmp(from, "acpi_sci=high", 13))
839 acpi_sci_flags.polarity = 1;
841 else if (!memcmp(from, "acpi_sci=low", 12))
842 acpi_sci_flags.polarity = 3;
844 #ifdef CONFIG_X86_IO_APIC
845 else if (!memcmp(from, "acpi_skip_timer_override", 24))
846 acpi_skip_timer_override = 1;
847 #endif
849 #ifdef CONFIG_X86_LOCAL_APIC
850 /* disable IO-APIC */
851 else if (!memcmp(from, "noapic", 6))
852 disable_ioapic_setup();
853 #endif /* CONFIG_X86_LOCAL_APIC */
854 #endif /* CONFIG_ACPI_BOOT */
856 /*
857 * highmem=size forces highmem to be exactly 'size' bytes.
858 * This works even on boxes that have no highmem otherwise.
859 * This also works to reduce highmem size on bigger boxes.
860 */
861 else if (!memcmp(from, "highmem=", 8))
862 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
864 /*
865 * vmalloc=size forces the vmalloc area to be exactly 'size'
866 * bytes. This can be used to increase (or decrease) the
867 * vmalloc area - the default is 128m.
868 */
869 else if (!memcmp(from, "vmalloc=", 8))
870 __VMALLOC_RESERVE = memparse(from+8, &from);
872 next_char:
873 c = *(from++);
874 if (!c)
875 break;
876 if (COMMAND_LINE_SIZE <= ++len)
877 break;
878 *(to++) = c;
879 }
880 *to = '\0';
881 *cmdline_p = command_line;
882 if (userdef) {
883 printk(KERN_INFO "user-defined physical RAM map:\n");
884 print_memory_map("user");
885 }
886 }
888 #if 0 /* !XEN */
889 /*
890 * Callback for efi_memory_walk.
891 */
892 static int __init
893 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
894 {
895 unsigned long *max_pfn = arg, pfn;
897 if (start < end) {
898 pfn = PFN_UP(end -1);
899 if (pfn > *max_pfn)
900 *max_pfn = pfn;
901 }
902 return 0;
903 }
906 /*
907 * Find the highest page frame number we have available
908 */
909 void __init find_max_pfn(void)
910 {
911 int i;
913 max_pfn = 0;
914 if (efi_enabled) {
915 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
916 return;
917 }
919 for (i = 0; i < e820.nr_map; i++) {
920 unsigned long start, end;
921 /* RAM? */
922 if (e820.map[i].type != E820_RAM)
923 continue;
924 start = PFN_UP(e820.map[i].addr);
925 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
926 if (start >= end)
927 continue;
928 if (end > max_pfn)
929 max_pfn = end;
930 }
931 }
932 #else
933 /* We don't use the fake e820 because we need to respond to user override. */
934 void __init find_max_pfn(void)
935 {
936 if ( xen_override_max_pfn < xen_start_info.nr_pages )
937 xen_override_max_pfn = xen_start_info.nr_pages;
938 max_pfn = xen_override_max_pfn;
939 }
940 #endif /* XEN */
942 /*
943 * Determine low and high memory ranges:
944 */
945 unsigned long __init find_max_low_pfn(void)
946 {
947 unsigned long max_low_pfn;
949 max_low_pfn = max_pfn;
950 if (max_low_pfn > MAXMEM_PFN) {
951 if (highmem_pages == -1)
952 highmem_pages = max_pfn - MAXMEM_PFN;
953 if (highmem_pages + MAXMEM_PFN < max_pfn)
954 max_pfn = MAXMEM_PFN + highmem_pages;
955 if (highmem_pages + MAXMEM_PFN > max_pfn) {
956 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
957 highmem_pages = 0;
958 }
959 max_low_pfn = MAXMEM_PFN;
960 #ifndef CONFIG_HIGHMEM
961 /* Maximum memory usable is what is directly addressable */
962 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
963 MAXMEM>>20);
964 if (max_pfn > MAX_NONPAE_PFN)
965 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
966 else
967 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
968 max_pfn = MAXMEM_PFN;
969 #else /* !CONFIG_HIGHMEM */
970 #ifndef CONFIG_X86_PAE
971 if (max_pfn > MAX_NONPAE_PFN) {
972 max_pfn = MAX_NONPAE_PFN;
973 printk(KERN_WARNING "Warning only 4GB will be used.\n");
974 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
975 }
976 #endif /* !CONFIG_X86_PAE */
977 #endif /* !CONFIG_HIGHMEM */
978 } else {
979 if (highmem_pages == -1)
980 highmem_pages = 0;
981 #ifdef CONFIG_HIGHMEM
982 if (highmem_pages >= max_pfn) {
983 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
984 highmem_pages = 0;
985 }
986 if (highmem_pages) {
987 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
988 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
989 highmem_pages = 0;
990 }
991 max_low_pfn -= highmem_pages;
992 }
993 #else
994 if (highmem_pages)
995 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
996 #endif
997 }
998 return max_low_pfn;
999 }
1001 /*
1002 * Free all available memory for boot time allocation. Used
1003 * as a callback function by efi_memory_walk()
1004 */
1006 static int __init
1007 free_available_memory(unsigned long start, unsigned long end, void *arg)
1009 /* check max_low_pfn */
1010 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1011 return 0;
1012 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1013 end = (max_low_pfn + 1) << PAGE_SHIFT;
1014 if (start < end)
1015 free_bootmem(start, end - start);
1017 return 0;
1019 /*
1020 * Register fully available low RAM pages with the bootmem allocator.
1021 */
1022 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1024 int i;
1026 if (efi_enabled) {
1027 efi_memmap_walk(free_available_memory, NULL);
1028 return;
1030 for (i = 0; i < e820.nr_map; i++) {
1031 unsigned long curr_pfn, last_pfn, size;
1032 /*
1033 * Reserve usable low memory
1034 */
1035 if (e820.map[i].type != E820_RAM)
1036 continue;
1037 /*
1038 * We are rounding up the start address of usable memory:
1039 */
1040 curr_pfn = PFN_UP(e820.map[i].addr);
1041 if (curr_pfn >= max_low_pfn)
1042 continue;
1043 /*
1044 * ... and at the end of the usable range downwards:
1045 */
1046 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1048 if (last_pfn > max_low_pfn)
1049 last_pfn = max_low_pfn;
1051 /*
1052 * .. finally, did all the rounding and playing
1053 * around just make the area go away?
1054 */
1055 if (last_pfn <= curr_pfn)
1056 continue;
1058 size = last_pfn - curr_pfn;
1059 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1063 #ifndef CONFIG_XEN
1064 /*
1065 * workaround for Dell systems that neglect to reserve EBDA
1066 */
1067 static void __init reserve_ebda_region(void)
1069 unsigned int addr;
1070 addr = get_bios_ebda();
1071 if (addr)
1072 reserve_bootmem(addr, PAGE_SIZE);
1074 #endif
1076 #ifndef CONFIG_DISCONTIGMEM
1077 void __init setup_bootmem_allocator(void);
1078 static unsigned long __init setup_memory(void)
1081 /*
1082 * partially used pages are not usable - thus
1083 * we are rounding upwards:
1084 */
1085 min_low_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + xen_start_info.nr_pt_frames;
1087 find_max_pfn();
1089 max_low_pfn = find_max_low_pfn();
1091 #ifdef CONFIG_HIGHMEM
1092 highstart_pfn = highend_pfn = max_pfn;
1093 if (max_pfn > max_low_pfn) {
1094 highstart_pfn = max_low_pfn;
1096 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1097 pages_to_mb(highend_pfn - highstart_pfn));
1098 #endif
1099 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1100 pages_to_mb(max_low_pfn));
1102 setup_bootmem_allocator();
1104 return max_low_pfn;
1107 void __init zone_sizes_init(void)
1109 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1110 unsigned int max_dma, low;
1112 /*
1113 * XEN: Our notion of "DMA memory" is fake when running over Xen.
1114 * We simply put all RAM in the DMA zone so that those drivers which
1115 * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
1116 * Those drivers that *do* require lowmem are screwed anyway when
1117 * running over Xen!
1118 */
1119 max_dma = max_low_pfn;
1120 low = max_low_pfn;
1122 if (low < max_dma)
1123 zones_size[ZONE_DMA] = low;
1124 else {
1125 zones_size[ZONE_DMA] = max_dma;
1126 zones_size[ZONE_NORMAL] = low - max_dma;
1127 #ifdef CONFIG_HIGHMEM
1128 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1129 #endif
1131 free_area_init(zones_size);
1133 #else
1134 extern unsigned long setup_memory(void);
1135 extern void zone_sizes_init(void);
1136 #endif /* !CONFIG_DISCONTIGMEM */
1138 void __init setup_bootmem_allocator(void)
1140 unsigned long bootmap_size;
1141 /*
1142 * Initialize the boot-time allocator (with low memory only):
1143 */
1144 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1146 register_bootmem_low_pages(max_low_pfn);
1148 /*
1149 * Reserve the bootmem bitmap itself as well. We do this in two
1150 * steps (first step was init_bootmem()) because this catches
1151 * the (very unlikely) case of us accidentally initializing the
1152 * bootmem allocator with an invalid RAM area.
1153 */
1154 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
1155 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1157 #ifndef CONFIG_XEN
1158 /*
1159 * reserve physical page 0 - it's a special BIOS page on many boxes,
1160 * enabling clean reboots, SMP operation, laptop functions.
1161 */
1162 reserve_bootmem(0, PAGE_SIZE);
1164 /* reserve EBDA region, it's a 4K region */
1165 reserve_ebda_region();
1167 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1168 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1169 unless you have no PS/2 mouse plugged in. */
1170 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1171 boot_cpu_data.x86 == 6)
1172 reserve_bootmem(0xa0000 - 4096, 4096);
1174 #ifdef CONFIG_SMP
1175 /*
1176 * But first pinch a few for the stack/trampoline stuff
1177 * FIXME: Don't need the extra page at 4K, but need to fix
1178 * trampoline before removing it. (see the GDT stuff)
1179 */
1180 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1181 #endif
1182 #ifdef CONFIG_ACPI_SLEEP
1183 /*
1184 * Reserve low memory region for sleep support.
1185 */
1186 acpi_reserve_bootmem();
1187 #endif
1188 #endif /* !CONFIG_XEN */
1190 #ifdef CONFIG_BLK_DEV_INITRD
1191 if (xen_start_info.mod_start) {
1192 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1193 /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
1194 initrd_start = INITRD_START + PAGE_OFFSET;
1195 initrd_end = initrd_start+INITRD_SIZE;
1196 initrd_below_start_ok = 1;
1198 else {
1199 printk(KERN_ERR "initrd extends beyond end of memory "
1200 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1201 INITRD_START + INITRD_SIZE,
1202 max_low_pfn << PAGE_SHIFT);
1203 initrd_start = 0;
1206 #endif
1208 phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list;
1211 /*
1212 * The node 0 pgdat is initialized before all of these because
1213 * it's needed for bootmem. node>0 pgdats have their virtual
1214 * space allocated before the pagetables are in place to access
1215 * them, so they can't be cleared then.
1217 * This should all compile down to nothing when NUMA is off.
1218 */
1219 void __init remapped_pgdat_init(void)
1221 int nid;
1223 for_each_online_node(nid) {
1224 if (nid != 0)
1225 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1229 /*
1230 * Request address space for all standard RAM and ROM resources
1231 * and also for regions reported as reserved by the e820.
1232 */
1233 static void __init
1234 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1236 int i;
1238 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1239 probe_roms();
1240 #endif
1241 for (i = 0; i < e820.nr_map; i++) {
1242 struct resource *res;
1243 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1244 continue;
1245 res = alloc_bootmem_low(sizeof(struct resource));
1246 switch (e820.map[i].type) {
1247 case E820_RAM: res->name = "System RAM"; break;
1248 case E820_ACPI: res->name = "ACPI Tables"; break;
1249 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1250 default: res->name = "reserved";
1252 res->start = e820.map[i].addr;
1253 res->end = res->start + e820.map[i].size - 1;
1254 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1255 request_resource(&iomem_resource, res);
1256 if (e820.map[i].type == E820_RAM) {
1257 /*
1258 * We don't know which RAM region contains kernel data,
1259 * so we try it repeatedly and let the resource manager
1260 * test it.
1261 */
1262 request_resource(res, code_resource);
1263 request_resource(res, data_resource);
1268 /*
1269 * Request address space for all standard resources
1270 */
1271 static void __init register_memory(void)
1273 unsigned long gapstart, gapsize;
1274 unsigned long long last;
1275 int i;
1277 if (efi_enabled)
1278 efi_initialize_iomem_resources(&code_resource, &data_resource);
1279 else
1280 legacy_init_iomem_resources(&code_resource, &data_resource);
1282 if (xen_start_info.flags & SIF_INITDOMAIN)
1283 /* EFI systems may still have VGA */
1284 request_resource(&iomem_resource, &video_ram_resource);
1286 /* request I/O space for devices used on all i[345]86 PCs */
1287 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1288 request_resource(&ioport_resource, &standard_io_resources[i]);
1290 /*
1291 * Search for the bigest gap in the low 32 bits of the e820
1292 * memory space.
1293 */
1294 last = 0x100000000ull;
1295 gapstart = 0x10000000;
1296 gapsize = 0x400000;
1297 i = e820.nr_map;
1298 while (--i >= 0) {
1299 unsigned long long start = e820.map[i].addr;
1300 unsigned long long end = start + e820.map[i].size;
1302 /*
1303 * Since "last" is at most 4GB, we know we'll
1304 * fit in 32 bits if this condition is true
1305 */
1306 if (last > end) {
1307 unsigned long gap = last - end;
1309 if (gap > gapsize) {
1310 gapsize = gap;
1311 gapstart = end;
1314 if (start < last)
1315 last = start;
1318 /*
1319 * Start allocating dynamic PCI memory a bit into the gap,
1320 * aligned up to the nearest megabyte.
1322 * Question: should we try to pad it up a bit (do something
1323 * like " + (gapsize >> 3)" in there too?). We now have the
1324 * technology.
1325 */
1326 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1328 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1329 pci_mem_start, gapstart, gapsize);
1332 /* Use inline assembly to define this because the nops are defined
1333 as inline assembly strings in the include files and we cannot
1334 get them easily into strings. */
1335 asm("\t.data\nintelnops: "
1336 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1337 GENERIC_NOP7 GENERIC_NOP8);
1338 asm("\t.data\nk8nops: "
1339 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1340 K8_NOP7 K8_NOP8);
1341 asm("\t.data\nk7nops: "
1342 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1343 K7_NOP7 K7_NOP8);
1345 extern unsigned char intelnops[], k8nops[], k7nops[];
1346 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1347 NULL,
1348 intelnops,
1349 intelnops + 1,
1350 intelnops + 1 + 2,
1351 intelnops + 1 + 2 + 3,
1352 intelnops + 1 + 2 + 3 + 4,
1353 intelnops + 1 + 2 + 3 + 4 + 5,
1354 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1355 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1356 };
1357 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1358 NULL,
1359 k8nops,
1360 k8nops + 1,
1361 k8nops + 1 + 2,
1362 k8nops + 1 + 2 + 3,
1363 k8nops + 1 + 2 + 3 + 4,
1364 k8nops + 1 + 2 + 3 + 4 + 5,
1365 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1366 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1367 };
1368 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1369 NULL,
1370 k7nops,
1371 k7nops + 1,
1372 k7nops + 1 + 2,
1373 k7nops + 1 + 2 + 3,
1374 k7nops + 1 + 2 + 3 + 4,
1375 k7nops + 1 + 2 + 3 + 4 + 5,
1376 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1377 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1378 };
1379 static struct nop {
1380 int cpuid;
1381 unsigned char **noptable;
1382 } noptypes[] = {
1383 { X86_FEATURE_K8, k8_nops },
1384 { X86_FEATURE_K7, k7_nops },
1385 { -1, NULL }
1386 };
1388 /* Replace instructions with better alternatives for this CPU type.
1390 This runs before SMP is initialized to avoid SMP problems with
1391 self modifying code. This implies that assymetric systems where
1392 APs have less capabilities than the boot processor are not handled.
1393 In this case boot with "noreplacement". */
1394 void apply_alternatives(void *start, void *end)
1396 struct alt_instr *a;
1397 int diff, i, k;
1398 unsigned char **noptable = intel_nops;
1399 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1400 if (boot_cpu_has(noptypes[i].cpuid)) {
1401 noptable = noptypes[i].noptable;
1402 break;
1405 for (a = start; (void *)a < end; a++) {
1406 if (!boot_cpu_has(a->cpuid))
1407 continue;
1408 BUG_ON(a->replacementlen > a->instrlen);
1409 memcpy(a->instr, a->replacement, a->replacementlen);
1410 diff = a->instrlen - a->replacementlen;
1411 /* Pad the rest with nops */
1412 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1413 k = diff;
1414 if (k > ASM_NOP_MAX)
1415 k = ASM_NOP_MAX;
1416 memcpy(a->instr + i, noptable[k], k);
1421 static int no_replacement __initdata = 0;
1423 void __init alternative_instructions(void)
1425 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1426 if (no_replacement)
1427 return;
1428 apply_alternatives(__alt_instructions, __alt_instructions_end);
1431 static int __init noreplacement_setup(char *s)
1433 no_replacement = 1;
1434 return 0;
1437 __setup("noreplacement", noreplacement_setup);
1439 static char * __init machine_specific_memory_setup(void);
1441 #ifdef CONFIG_MCA
1442 static void set_mca_bus(int x)
1444 MCA_bus = x;
1446 #else
1447 static void set_mca_bus(int x) { }
1448 #endif
1450 /*
1451 * Determine if we were loaded by an EFI loader. If so, then we have also been
1452 * passed the efi memmap, systab, etc., so we should use these data structures
1453 * for initialization. Note, the efi init code path is determined by the
1454 * global efi_enabled. This allows the same kernel image to be used on existing
1455 * systems (with a traditional BIOS) as well as on EFI systems.
1456 */
1457 void __init setup_arch(char **cmdline_p)
1459 int i, j;
1460 physdev_op_t op;
1461 unsigned long max_low_pfn;
1463 /* Force a quick death if the kernel panics. */
1464 extern int panic_timeout;
1465 if (panic_timeout == 0)
1466 panic_timeout = 1;
1468 /* Register a call for panic conditions. */
1469 notifier_chain_register(&panic_notifier_list, &xen_panic_block);
1471 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
1472 HYPERVISOR_vm_assist(VMASST_CMD_enable,
1473 VMASST_TYPE_writable_pagetables);
1475 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1476 early_cpu_init();
1478 /*
1479 * FIXME: This isn't an official loader_type right
1480 * now but does currently work with elilo.
1481 * If we were configured as an EFI kernel, check to make
1482 * sure that we were loaded correctly from elilo and that
1483 * the system table is valid. If not, then initialize normally.
1484 */
1485 #ifdef CONFIG_EFI
1486 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1487 efi_enabled = 1;
1488 #endif
1490 /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
1491 properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
1492 */
1493 ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
1494 drive_info = DRIVE_INFO;
1495 screen_info = SCREEN_INFO;
1496 edid_info = EDID_INFO;
1497 apm_info.bios = APM_BIOS_INFO;
1498 ist_info = IST_INFO;
1499 saved_videomode = VIDEO_MODE;
1500 if( SYS_DESC_TABLE.length != 0 ) {
1501 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1502 machine_id = SYS_DESC_TABLE.table[0];
1503 machine_submodel_id = SYS_DESC_TABLE.table[1];
1504 BIOS_revision = SYS_DESC_TABLE.table[2];
1506 bootloader_type = LOADER_TYPE;
1508 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
1509 /* This is drawn from a dump from vgacon:startup in standard Linux. */
1510 screen_info.orig_video_mode = 3;
1511 screen_info.orig_video_isVGA = 1;
1512 screen_info.orig_video_lines = 25;
1513 screen_info.orig_video_cols = 80;
1514 screen_info.orig_video_ega_bx = 3;
1515 screen_info.orig_video_points = 16;
1516 #endif
1518 #ifdef CONFIG_BLK_DEV_RAM
1519 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1520 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1521 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1522 #endif
1523 ARCH_SETUP
1524 if (efi_enabled)
1525 efi_init();
1526 else {
1527 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1528 print_memory_map(machine_specific_memory_setup());
1531 copy_edd();
1533 if (!MOUNT_ROOT_RDONLY)
1534 root_mountflags &= ~MS_RDONLY;
1535 init_mm.start_code = (unsigned long) _text;
1536 init_mm.end_code = (unsigned long) _etext;
1537 init_mm.end_data = (unsigned long) _edata;
1538 init_mm.brk = (PFN_UP(__pa(xen_start_info.pt_base)) +
1539 xen_start_info.nr_pt_frames) << PAGE_SHIFT;
1541 /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */
1542 /*code_resource.start = virt_to_phys(_text);*/
1543 /*code_resource.end = virt_to_phys(_etext)-1;*/
1544 /*data_resource.start = virt_to_phys(_etext);*/
1545 /*data_resource.end = virt_to_phys(_edata)-1;*/
1547 parse_cmdline_early(cmdline_p);
1549 max_low_pfn = setup_memory();
1551 /*
1552 * NOTE: before this point _nobody_ is allowed to allocate
1553 * any memory using the bootmem allocator. Although the
1554 * alloctor is now initialised only the first 8Mb of the kernel
1555 * virtual address space has been mapped. All allocations before
1556 * paging_init() has completed must use the alloc_bootmem_low_pages()
1557 * variant (which allocates DMA'able memory) and care must be taken
1558 * not to exceed the 8Mb limit.
1559 */
1561 #ifdef CONFIG_SMP
1562 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1563 #endif
1564 paging_init();
1565 remapped_pgdat_init();
1566 zone_sizes_init();
1568 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1569 /*
1570 * Find and reserve possible boot-time SMP configuration:
1571 */
1572 find_smp_config();
1573 #endif
1575 /* Make sure we have a correctly sized P->M table. */
1576 if (max_pfn != xen_start_info.nr_pages) {
1577 phys_to_machine_mapping = alloc_bootmem_low_pages(
1578 max_pfn * sizeof(unsigned long));
1580 if (max_pfn > xen_start_info.nr_pages) {
1581 /* set to INVALID_P2M_ENTRY */
1582 memset(phys_to_machine_mapping, ~0,
1583 max_pfn * sizeof(unsigned long));
1584 memcpy(phys_to_machine_mapping,
1585 (unsigned long *)xen_start_info.mfn_list,
1586 xen_start_info.nr_pages * sizeof(unsigned long));
1587 } else {
1588 memcpy(phys_to_machine_mapping,
1589 (unsigned long *)xen_start_info.mfn_list,
1590 max_pfn * sizeof(unsigned long));
1591 if (HYPERVISOR_dom_mem_op(
1592 MEMOP_decrease_reservation,
1593 (unsigned long *)xen_start_info.mfn_list + max_pfn,
1594 xen_start_info.nr_pages - max_pfn, 0) !=
1595 (xen_start_info.nr_pages - max_pfn)) BUG();
1597 free_bootmem(
1598 __pa(xen_start_info.mfn_list),
1599 PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
1600 sizeof(unsigned long))));
1603 pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
1604 for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
1606 pfn_to_mfn_frame_list[j] =
1607 virt_to_mfn(&phys_to_machine_mapping[i]);
1609 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
1610 virt_to_mfn(pfn_to_mfn_frame_list);
1612 /*
1613 * NOTE: at this point the bootmem allocator is fully available.
1614 */
1616 #ifdef CONFIG_EARLY_PRINTK
1618 char *s = strstr(*cmdline_p, "earlyprintk=");
1619 if (s) {
1620 extern void setup_early_printk(char *);
1622 setup_early_printk(s);
1623 printk("early console enabled\n");
1626 #endif
1629 dmi_scan_machine();
1631 #ifdef CONFIG_X86_GENERICARCH
1632 generic_apic_probe(*cmdline_p);
1633 #endif
1634 if (efi_enabled)
1635 efi_map_memmap();
1637 op.cmd = PHYSDEVOP_SET_IOPL;
1638 op.u.set_iopl.iopl = 1;
1639 HYPERVISOR_physdev_op(&op);
1641 #ifdef CONFIG_ACPI_BOOT
1642 if (!(xen_start_info.flags & SIF_INITDOMAIN)) {
1643 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
1644 acpi_disabled = 1;
1645 acpi_ht = 0;
1647 #endif
1649 #ifdef CONFIG_ACPI_BOOT
1650 /*
1651 * Parse the ACPI tables for possible boot-time SMP configuration.
1652 */
1653 acpi_boot_table_init();
1654 acpi_boot_init();
1655 #endif
1657 #ifdef CONFIG_X86_LOCAL_APIC
1658 if (smp_found_config)
1659 get_smp_config();
1660 #endif
1662 /* XXX Disable irqdebug until we have a way to avoid interrupt
1663 * conflicts. */
1664 noirqdebug_setup("");
1666 register_memory();
1668 if (xen_start_info.flags & SIF_INITDOMAIN) {
1669 if (!(xen_start_info.flags & SIF_PRIVILEGED))
1670 panic("Xen granted us console access "
1671 "but not privileged status");
1673 #ifdef CONFIG_VT
1674 #if defined(CONFIG_VGA_CONSOLE)
1675 if (!efi_enabled ||
1676 (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1677 conswitchp = &vga_con;
1678 #elif defined(CONFIG_DUMMY_CONSOLE)
1679 conswitchp = &dummy_con;
1680 #endif
1681 #endif
1682 } else {
1683 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1684 extern const struct consw xennull_con;
1685 extern int console_use_vt;
1686 #if defined(CONFIG_VGA_CONSOLE)
1687 /* disable VGA driver */
1688 ORIG_VIDEO_ISVGA = VIDEO_TYPE_VLFB;
1689 #endif
1690 conswitchp = &xennull_con;
1691 console_use_vt = 0;
1692 #endif
1696 static int
1697 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
1699 HYPERVISOR_crash();
1700 /* we're never actually going to get here... */
1701 return NOTIFY_DONE;
1704 #include "setup_arch_post.h"
1705 /*
1706 * Local Variables:
1707 * mode:c
1708 * c-file-style:"k&r"
1709 * c-basic-offset:8
1710 * End:
1711 */