ia64/xen-unstable

view linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c @ 6671:7a36f58f64ee

merge?
author kaf24@firebug.cl.cam.ac.uk
date Wed Sep 07 09:50:57 2005 +0000 (2005-09-07)
parents fb90dd31c6d7 8db9c5873b9b
children a75b08af8d19
line source
1 /*
2 * linux/arch/i386/kernel/setup.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
7 *
8 * Memory region support
9 * David Parsons <orc@pell.chi.il.us>, July-August 1999
10 *
11 * Added E820 sanitization routine (removes overlapping memory regions);
12 * Brian Moyle <bmoyle@mvista.com>, February 2001
13 *
14 * Moved CPU detection code to cpu/${cpu}.c
15 * Patrick Mochel <mochel@osdl.org>, March 2002
16 *
17 * Provisions for empty E820 memory regions (reported by certain BIOSes).
18 * Alex Achenbach <xela@slit.de>, December 2002.
19 *
20 */
22 /*
23 * This file handles the architecture-dependent parts of initialization
24 */
26 #include <linux/sched.h>
27 #include <linux/mm.h>
28 #include <linux/tty.h>
29 #include <linux/ioport.h>
30 #include <linux/acpi.h>
31 #include <linux/apm_bios.h>
32 #include <linux/initrd.h>
33 #include <linux/bootmem.h>
34 #include <linux/seq_file.h>
35 #include <linux/console.h>
36 #include <linux/mca.h>
37 #include <linux/root_dev.h>
38 #include <linux/highmem.h>
39 #include <linux/module.h>
40 #include <linux/efi.h>
41 #include <linux/init.h>
42 #include <linux/edd.h>
43 #include <linux/nodemask.h>
44 #include <linux/kernel.h>
45 #include <linux/percpu.h>
46 #include <linux/notifier.h>
47 #include <video/edid.h>
48 #include <asm/e820.h>
49 #include <asm/mpspec.h>
50 #include <asm/setup.h>
51 #include <asm/arch_hooks.h>
52 #include <asm/sections.h>
53 #include <asm/io_apic.h>
54 #include <asm/ist.h>
55 #include <asm/io.h>
56 #include <asm-xen/hypervisor.h>
57 #include <asm-xen/xen-public/physdev.h>
58 #include <asm-xen/xen-public/memory.h>
59 #include "setup_arch_pre.h"
60 #include <bios_ebda.h>
62 /* Allows setting of maximum possible memory size */
63 static unsigned long xen_override_max_pfn;
65 static int xen_panic_event(struct notifier_block *, unsigned long, void *);
66 static struct notifier_block xen_panic_block = {
67 xen_panic_event, NULL, 0 /* try to go last */
68 };
70 int disable_pse __initdata = 0;
72 /*
73 * Machine setup..
74 */
76 #ifdef CONFIG_EFI
77 int efi_enabled = 0;
78 EXPORT_SYMBOL(efi_enabled);
79 #endif
81 /* cpu data as detected by the assembly code in head.S */
82 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
83 /* common cpu data for all cpus */
84 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
86 unsigned long mmu_cr4_features;
88 #ifdef CONFIG_ACPI_INTERPRETER
89 int acpi_disabled = 0;
90 #else
91 int acpi_disabled = 1;
92 #endif
93 EXPORT_SYMBOL(acpi_disabled);
95 #ifdef CONFIG_ACPI_BOOT
96 int __initdata acpi_force = 0;
97 extern acpi_interrupt_flags acpi_sci_flags;
98 #endif
100 /* for MCA, but anyone else can use it if they want */
101 unsigned int machine_id;
102 unsigned int machine_submodel_id;
103 unsigned int BIOS_revision;
104 unsigned int mca_pentium_flag;
106 /* For PCI or other memory-mapped resources */
107 unsigned long pci_mem_start = 0x10000000;
109 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
110 int bootloader_type;
112 /* user-defined highmem size */
113 static unsigned int highmem_pages = -1;
115 /*
116 * Setup options
117 */
118 struct drive_info_struct { char dummy[32]; } drive_info;
119 struct screen_info screen_info;
120 struct apm_info apm_info;
121 struct sys_desc_table_struct {
122 unsigned short length;
123 unsigned char table[0];
124 };
125 struct edid_info edid_info;
126 struct ist_info ist_info;
127 struct e820map e820;
129 extern void early_cpu_init(void);
130 extern void dmi_scan_machine(void);
131 extern void generic_apic_probe(char *);
132 extern int root_mountflags;
134 unsigned long saved_videomode;
136 #define RAMDISK_IMAGE_START_MASK 0x07FF
137 #define RAMDISK_PROMPT_FLAG 0x8000
138 #define RAMDISK_LOAD_FLAG 0x4000
140 static char command_line[COMMAND_LINE_SIZE];
142 unsigned char __initdata boot_params[PARAM_SIZE];
144 static struct resource data_resource = {
145 .name = "Kernel data",
146 .start = 0,
147 .end = 0,
148 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
149 };
151 static struct resource code_resource = {
152 .name = "Kernel code",
153 .start = 0,
154 .end = 0,
155 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
156 };
158 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
159 static struct resource system_rom_resource = {
160 .name = "System ROM",
161 .start = 0xf0000,
162 .end = 0xfffff,
163 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
164 };
166 static struct resource extension_rom_resource = {
167 .name = "Extension ROM",
168 .start = 0xe0000,
169 .end = 0xeffff,
170 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
171 };
173 static struct resource adapter_rom_resources[] = { {
174 .name = "Adapter ROM",
175 .start = 0xc8000,
176 .end = 0,
177 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
178 }, {
179 .name = "Adapter ROM",
180 .start = 0,
181 .end = 0,
182 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
183 }, {
184 .name = "Adapter ROM",
185 .start = 0,
186 .end = 0,
187 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
188 }, {
189 .name = "Adapter ROM",
190 .start = 0,
191 .end = 0,
192 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
193 }, {
194 .name = "Adapter ROM",
195 .start = 0,
196 .end = 0,
197 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
198 }, {
199 .name = "Adapter ROM",
200 .start = 0,
201 .end = 0,
202 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
203 } };
205 #define ADAPTER_ROM_RESOURCES \
206 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
208 static struct resource video_rom_resource = {
209 .name = "Video ROM",
210 .start = 0xc0000,
211 .end = 0xc7fff,
212 .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
213 };
214 #endif
216 static struct resource video_ram_resource = {
217 .name = "Video RAM area",
218 .start = 0xa0000,
219 .end = 0xbffff,
220 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
221 };
223 static struct resource standard_io_resources[] = { {
224 .name = "dma1",
225 .start = 0x0000,
226 .end = 0x001f,
227 .flags = IORESOURCE_BUSY | IORESOURCE_IO
228 }, {
229 .name = "pic1",
230 .start = 0x0020,
231 .end = 0x0021,
232 .flags = IORESOURCE_BUSY | IORESOURCE_IO
233 }, {
234 .name = "timer0",
235 .start = 0x0040,
236 .end = 0x0043,
237 .flags = IORESOURCE_BUSY | IORESOURCE_IO
238 }, {
239 .name = "timer1",
240 .start = 0x0050,
241 .end = 0x0053,
242 .flags = IORESOURCE_BUSY | IORESOURCE_IO
243 }, {
244 .name = "keyboard",
245 .start = 0x0060,
246 .end = 0x006f,
247 .flags = IORESOURCE_BUSY | IORESOURCE_IO
248 }, {
249 .name = "dma page reg",
250 .start = 0x0080,
251 .end = 0x008f,
252 .flags = IORESOURCE_BUSY | IORESOURCE_IO
253 }, {
254 .name = "pic2",
255 .start = 0x00a0,
256 .end = 0x00a1,
257 .flags = IORESOURCE_BUSY | IORESOURCE_IO
258 }, {
259 .name = "dma2",
260 .start = 0x00c0,
261 .end = 0x00df,
262 .flags = IORESOURCE_BUSY | IORESOURCE_IO
263 }, {
264 .name = "fpu",
265 .start = 0x00f0,
266 .end = 0x00ff,
267 .flags = IORESOURCE_BUSY | IORESOURCE_IO
268 } };
270 #define STANDARD_IO_RESOURCES \
271 (sizeof standard_io_resources / sizeof standard_io_resources[0])
273 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
274 #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
276 static int __init romchecksum(unsigned char *rom, unsigned long length)
277 {
278 unsigned char *p, sum = 0;
280 for (p = rom; p < rom + length; p++)
281 sum += *p;
282 return sum == 0;
283 }
285 static void __init probe_roms(void)
286 {
287 unsigned long start, length, upper;
288 unsigned char *rom;
289 int i;
291 /* Nothing to do if not running in dom0. */
292 if (!(xen_start_info->flags & SIF_INITDOMAIN))
293 return;
295 /* video rom */
296 upper = adapter_rom_resources[0].start;
297 for (start = video_rom_resource.start; start < upper; start += 2048) {
298 rom = isa_bus_to_virt(start);
299 if (!romsignature(rom))
300 continue;
302 video_rom_resource.start = start;
304 /* 0 < length <= 0x7f * 512, historically */
305 length = rom[2] * 512;
307 /* if checksum okay, trust length byte */
308 if (length && romchecksum(rom, length))
309 video_rom_resource.end = start + length - 1;
311 request_resource(&iomem_resource, &video_rom_resource);
312 break;
313 }
315 start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
316 if (start < upper)
317 start = upper;
319 /* system rom */
320 request_resource(&iomem_resource, &system_rom_resource);
321 upper = system_rom_resource.start;
323 /* check for extension rom (ignore length byte!) */
324 rom = isa_bus_to_virt(extension_rom_resource.start);
325 if (romsignature(rom)) {
326 length = extension_rom_resource.end - extension_rom_resource.start + 1;
327 if (romchecksum(rom, length)) {
328 request_resource(&iomem_resource, &extension_rom_resource);
329 upper = extension_rom_resource.start;
330 }
331 }
333 /* check for adapter roms on 2k boundaries */
334 for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
335 rom = isa_bus_to_virt(start);
336 if (!romsignature(rom))
337 continue;
339 /* 0 < length <= 0x7f * 512, historically */
340 length = rom[2] * 512;
342 /* but accept any length that fits if checksum okay */
343 if (!length || start + length > upper || !romchecksum(rom, length))
344 continue;
346 adapter_rom_resources[i].start = start;
347 adapter_rom_resources[i].end = start + length - 1;
348 request_resource(&iomem_resource, &adapter_rom_resources[i]);
350 start = adapter_rom_resources[i++].end & ~2047UL;
351 }
352 }
353 #endif
355 /*
356 * Point at the empty zero page to start with. We map the real shared_info
357 * page as soon as fixmap is up and running.
358 */
359 shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
360 EXPORT_SYMBOL(HYPERVISOR_shared_info);
362 unsigned long *phys_to_machine_mapping;
363 unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[16];
364 EXPORT_SYMBOL(phys_to_machine_mapping);
366 /* Raw start-of-day parameters from the hypervisor. */
367 start_info_t *xen_start_info;
369 static void __init limit_regions(unsigned long long size)
370 {
371 unsigned long long current_addr = 0;
372 int i;
374 if (efi_enabled) {
375 for (i = 0; i < memmap.nr_map; i++) {
376 current_addr = memmap.map[i].phys_addr +
377 (memmap.map[i].num_pages << 12);
378 if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) {
379 if (current_addr >= size) {
380 memmap.map[i].num_pages -=
381 (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
382 memmap.nr_map = i + 1;
383 return;
384 }
385 }
386 }
387 }
388 for (i = 0; i < e820.nr_map; i++) {
389 if (e820.map[i].type == E820_RAM) {
390 current_addr = e820.map[i].addr + e820.map[i].size;
391 if (current_addr >= size) {
392 e820.map[i].size -= current_addr-size;
393 e820.nr_map = i + 1;
394 return;
395 }
396 }
397 }
398 }
400 static void __init add_memory_region(unsigned long long start,
401 unsigned long long size, int type)
402 {
403 int x;
405 if (!efi_enabled) {
406 x = e820.nr_map;
408 if (x == E820MAX) {
409 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
410 return;
411 }
413 e820.map[x].addr = start;
414 e820.map[x].size = size;
415 e820.map[x].type = type;
416 e820.nr_map++;
417 }
418 } /* add_memory_region */
420 #define E820_DEBUG 1
422 static void __init print_memory_map(char *who)
423 {
424 int i;
426 for (i = 0; i < e820.nr_map; i++) {
427 printk(" %s: %016Lx - %016Lx ", who,
428 e820.map[i].addr,
429 e820.map[i].addr + e820.map[i].size);
430 switch (e820.map[i].type) {
431 case E820_RAM: printk("(usable)\n");
432 break;
433 case E820_RESERVED:
434 printk("(reserved)\n");
435 break;
436 case E820_ACPI:
437 printk("(ACPI data)\n");
438 break;
439 case E820_NVS:
440 printk("(ACPI NVS)\n");
441 break;
442 default: printk("type %lu\n", e820.map[i].type);
443 break;
444 }
445 }
446 }
448 #if 0
449 /*
450 * Sanitize the BIOS e820 map.
451 *
452 * Some e820 responses include overlapping entries. The following
453 * replaces the original e820 map with a new one, removing overlaps.
454 *
455 */
456 struct change_member {
457 struct e820entry *pbios; /* pointer to original bios entry */
458 unsigned long long addr; /* address for this change point */
459 };
460 static struct change_member change_point_list[2*E820MAX] __initdata;
461 static struct change_member *change_point[2*E820MAX] __initdata;
462 static struct e820entry *overlap_list[E820MAX] __initdata;
463 static struct e820entry new_bios[E820MAX] __initdata;
465 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
466 {
467 struct change_member *change_tmp;
468 unsigned long current_type, last_type;
469 unsigned long long last_addr;
470 int chgidx, still_changing;
471 int overlap_entries;
472 int new_bios_entry;
473 int old_nr, new_nr, chg_nr;
474 int i;
476 /*
477 Visually we're performing the following (1,2,3,4 = memory types)...
479 Sample memory map (w/overlaps):
480 ____22__________________
481 ______________________4_
482 ____1111________________
483 _44_____________________
484 11111111________________
485 ____________________33__
486 ___________44___________
487 __________33333_________
488 ______________22________
489 ___________________2222_
490 _________111111111______
491 _____________________11_
492 _________________4______
494 Sanitized equivalent (no overlap):
495 1_______________________
496 _44_____________________
497 ___1____________________
498 ____22__________________
499 ______11________________
500 _________1______________
501 __________3_____________
502 ___________44___________
503 _____________33_________
504 _______________2________
505 ________________1_______
506 _________________4______
507 ___________________2____
508 ____________________33__
509 ______________________4_
510 */
512 /* if there's only one memory region, don't bother */
513 if (*pnr_map < 2)
514 return -1;
516 old_nr = *pnr_map;
518 /* bail out if we find any unreasonable addresses in bios map */
519 for (i=0; i<old_nr; i++)
520 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
521 return -1;
523 /* create pointers for initial change-point information (for sorting) */
524 for (i=0; i < 2*old_nr; i++)
525 change_point[i] = &change_point_list[i];
527 /* record all known change-points (starting and ending addresses),
528 omitting those that are for empty memory regions */
529 chgidx = 0;
530 for (i=0; i < old_nr; i++) {
531 if (biosmap[i].size != 0) {
532 change_point[chgidx]->addr = biosmap[i].addr;
533 change_point[chgidx++]->pbios = &biosmap[i];
534 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
535 change_point[chgidx++]->pbios = &biosmap[i];
536 }
537 }
538 chg_nr = chgidx; /* true number of change-points */
540 /* sort change-point list by memory addresses (low -> high) */
541 still_changing = 1;
542 while (still_changing) {
543 still_changing = 0;
544 for (i=1; i < chg_nr; i++) {
545 /* if <current_addr> > <last_addr>, swap */
546 /* or, if current=<start_addr> & last=<end_addr>, swap */
547 if ((change_point[i]->addr < change_point[i-1]->addr) ||
548 ((change_point[i]->addr == change_point[i-1]->addr) &&
549 (change_point[i]->addr == change_point[i]->pbios->addr) &&
550 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
551 )
552 {
553 change_tmp = change_point[i];
554 change_point[i] = change_point[i-1];
555 change_point[i-1] = change_tmp;
556 still_changing=1;
557 }
558 }
559 }
561 /* create a new bios memory map, removing overlaps */
562 overlap_entries=0; /* number of entries in the overlap table */
563 new_bios_entry=0; /* index for creating new bios map entries */
564 last_type = 0; /* start with undefined memory type */
565 last_addr = 0; /* start with 0 as last starting address */
566 /* loop through change-points, determining affect on the new bios map */
567 for (chgidx=0; chgidx < chg_nr; chgidx++)
568 {
569 /* keep track of all overlapping bios entries */
570 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
571 {
572 /* add map entry to overlap list (> 1 entry implies an overlap) */
573 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
574 }
575 else
576 {
577 /* remove entry from list (order independent, so swap with last) */
578 for (i=0; i<overlap_entries; i++)
579 {
580 if (overlap_list[i] == change_point[chgidx]->pbios)
581 overlap_list[i] = overlap_list[overlap_entries-1];
582 }
583 overlap_entries--;
584 }
585 /* if there are overlapping entries, decide which "type" to use */
586 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
587 current_type = 0;
588 for (i=0; i<overlap_entries; i++)
589 if (overlap_list[i]->type > current_type)
590 current_type = overlap_list[i]->type;
591 /* continue building up new bios map based on this information */
592 if (current_type != last_type) {
593 if (last_type != 0) {
594 new_bios[new_bios_entry].size =
595 change_point[chgidx]->addr - last_addr;
596 /* move forward only if the new size was non-zero */
597 if (new_bios[new_bios_entry].size != 0)
598 if (++new_bios_entry >= E820MAX)
599 break; /* no more space left for new bios entries */
600 }
601 if (current_type != 0) {
602 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
603 new_bios[new_bios_entry].type = current_type;
604 last_addr=change_point[chgidx]->addr;
605 }
606 last_type = current_type;
607 }
608 }
609 new_nr = new_bios_entry; /* retain count for new bios entries */
611 /* copy new bios mapping into original location */
612 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
613 *pnr_map = new_nr;
615 return 0;
616 }
618 /*
619 * Copy the BIOS e820 map into a safe place.
620 *
621 * Sanity-check it while we're at it..
622 *
623 * If we're lucky and live on a modern system, the setup code
624 * will have given us a memory map that we can use to properly
625 * set up memory. If we aren't, we'll fake a memory map.
626 *
627 * We check to see that the memory map contains at least 2 elements
628 * before we'll use it, because the detection code in setup.S may
629 * not be perfect and most every PC known to man has two memory
630 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
631 * thinkpad 560x, for example, does not cooperate with the memory
632 * detection code.)
633 */
634 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
635 {
636 /* Only one memory region (or negative)? Ignore it */
637 if (nr_map < 2)
638 return -1;
640 do {
641 unsigned long long start = biosmap->addr;
642 unsigned long long size = biosmap->size;
643 unsigned long long end = start + size;
644 unsigned long type = biosmap->type;
646 /* Overflow in 64 bits? Ignore the memory map. */
647 if (start > end)
648 return -1;
650 /*
651 * Some BIOSes claim RAM in the 640k - 1M region.
652 * Not right. Fix it up.
653 */
654 if (type == E820_RAM) {
655 if (start < 0x100000ULL && end > 0xA0000ULL) {
656 if (start < 0xA0000ULL)
657 add_memory_region(start, 0xA0000ULL-start, type);
658 if (end <= 0x100000ULL)
659 continue;
660 start = 0x100000ULL;
661 size = end - start;
662 }
663 }
664 add_memory_region(start, size, type);
665 } while (biosmap++,--nr_map);
666 return 0;
667 }
668 #endif
670 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
671 struct edd edd;
672 #ifdef CONFIG_EDD_MODULE
673 EXPORT_SYMBOL(edd);
674 #endif
675 /**
676 * copy_edd() - Copy the BIOS EDD information
677 * from boot_params into a safe place.
678 *
679 */
680 static inline void copy_edd(void)
681 {
682 memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature));
683 memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info));
684 edd.mbr_signature_nr = EDD_MBR_SIG_NR;
685 edd.edd_info_nr = EDD_NR;
686 }
687 #else
688 static inline void copy_edd(void)
689 {
690 }
691 #endif
693 /*
694 * Do NOT EVER look at the BIOS memory size location.
695 * It does not work on many machines.
696 */
697 #define LOWMEMSIZE() (0x9f000)
699 static void __init parse_cmdline_early (char ** cmdline_p)
700 {
701 char c = ' ', *to = command_line, *from = saved_command_line;
702 int len = 0, max_cmdline;
703 int userdef = 0;
705 if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
706 max_cmdline = COMMAND_LINE_SIZE;
707 memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
708 /* Save unparsed command line copy for /proc/cmdline */
709 saved_command_line[max_cmdline-1] = '\0';
711 for (;;) {
712 if (c != ' ')
713 goto next_char;
714 /*
715 * "mem=nopentium" disables the 4MB page tables.
716 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
717 * to <mem>, overriding the bios size.
718 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
719 * <start> to <start>+<mem>, overriding the bios size.
720 *
721 * HPA tells me bootloaders need to parse mem=, so no new
722 * option should be mem= [also see Documentation/i386/boot.txt]
723 */
724 if (!memcmp(from, "mem=", 4)) {
725 if (to != command_line)
726 to--;
727 if (!memcmp(from+4, "nopentium", 9)) {
728 from += 9+4;
729 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
730 disable_pse = 1;
731 } else {
732 /* If the user specifies memory size, we
733 * limit the BIOS-provided memory map to
734 * that size. exactmap can be used to specify
735 * the exact map. mem=number can be used to
736 * trim the existing memory map.
737 */
738 unsigned long long mem_size;
740 mem_size = memparse(from+4, &from);
741 #if 0
742 limit_regions(mem_size);
743 userdef=1;
744 #else
745 xen_override_max_pfn =
746 (unsigned long)(mem_size>>PAGE_SHIFT);
747 #endif
748 }
749 }
751 else if (!memcmp(from, "memmap=", 7)) {
752 if (to != command_line)
753 to--;
754 if (!memcmp(from+7, "exactmap", 8)) {
755 from += 8+7;
756 e820.nr_map = 0;
757 userdef = 1;
758 } else {
759 /* If the user specifies memory size, we
760 * limit the BIOS-provided memory map to
761 * that size. exactmap can be used to specify
762 * the exact map. mem=number can be used to
763 * trim the existing memory map.
764 */
765 unsigned long long start_at, mem_size;
767 mem_size = memparse(from+7, &from);
768 if (*from == '@') {
769 start_at = memparse(from+1, &from);
770 add_memory_region(start_at, mem_size, E820_RAM);
771 } else if (*from == '#') {
772 start_at = memparse(from+1, &from);
773 add_memory_region(start_at, mem_size, E820_ACPI);
774 } else if (*from == '$') {
775 start_at = memparse(from+1, &from);
776 add_memory_region(start_at, mem_size, E820_RESERVED);
777 } else {
778 limit_regions(mem_size);
779 userdef=1;
780 }
781 }
782 }
784 else if (!memcmp(from, "noexec=", 7))
785 noexec_setup(from + 7);
788 #ifdef CONFIG_X86_MPPARSE
789 /*
790 * If the BIOS enumerates physical processors before logical,
791 * maxcpus=N at enumeration-time can be used to disable HT.
792 */
793 else if (!memcmp(from, "maxcpus=", 8)) {
794 extern unsigned int maxcpus;
796 maxcpus = simple_strtoul(from + 8, NULL, 0);
797 }
798 #endif
800 #ifdef CONFIG_ACPI_BOOT
801 /* "acpi=off" disables both ACPI table parsing and interpreter */
802 else if (!memcmp(from, "acpi=off", 8)) {
803 disable_acpi();
804 }
806 /* acpi=force to over-ride black-list */
807 else if (!memcmp(from, "acpi=force", 10)) {
808 acpi_force = 1;
809 acpi_ht = 1;
810 acpi_disabled = 0;
811 }
813 /* acpi=strict disables out-of-spec workarounds */
814 else if (!memcmp(from, "acpi=strict", 11)) {
815 acpi_strict = 1;
816 }
818 /* Limit ACPI just to boot-time to enable HT */
819 else if (!memcmp(from, "acpi=ht", 7)) {
820 if (!acpi_force)
821 disable_acpi();
822 acpi_ht = 1;
823 }
825 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
826 else if (!memcmp(from, "pci=noacpi", 10)) {
827 acpi_disable_pci();
828 }
829 /* "acpi=noirq" disables ACPI interrupt routing */
830 else if (!memcmp(from, "acpi=noirq", 10)) {
831 acpi_noirq_set();
832 }
834 else if (!memcmp(from, "acpi_sci=edge", 13))
835 acpi_sci_flags.trigger = 1;
837 else if (!memcmp(from, "acpi_sci=level", 14))
838 acpi_sci_flags.trigger = 3;
840 else if (!memcmp(from, "acpi_sci=high", 13))
841 acpi_sci_flags.polarity = 1;
843 else if (!memcmp(from, "acpi_sci=low", 12))
844 acpi_sci_flags.polarity = 3;
846 #ifdef CONFIG_X86_IO_APIC
847 else if (!memcmp(from, "acpi_skip_timer_override", 24))
848 acpi_skip_timer_override = 1;
849 #endif
851 #ifdef CONFIG_X86_LOCAL_APIC
852 /* disable IO-APIC */
853 else if (!memcmp(from, "noapic", 6))
854 disable_ioapic_setup();
855 #endif /* CONFIG_X86_LOCAL_APIC */
856 #endif /* CONFIG_ACPI_BOOT */
858 /*
859 * highmem=size forces highmem to be exactly 'size' bytes.
860 * This works even on boxes that have no highmem otherwise.
861 * This also works to reduce highmem size on bigger boxes.
862 */
863 else if (!memcmp(from, "highmem=", 8))
864 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
866 /*
867 * vmalloc=size forces the vmalloc area to be exactly 'size'
868 * bytes. This can be used to increase (or decrease) the
869 * vmalloc area - the default is 128m.
870 */
871 else if (!memcmp(from, "vmalloc=", 8))
872 __VMALLOC_RESERVE = memparse(from+8, &from);
874 next_char:
875 c = *(from++);
876 if (!c)
877 break;
878 if (COMMAND_LINE_SIZE <= ++len)
879 break;
880 *(to++) = c;
881 }
882 *to = '\0';
883 *cmdline_p = command_line;
884 if (userdef) {
885 printk(KERN_INFO "user-defined physical RAM map:\n");
886 print_memory_map("user");
887 }
888 }
890 #if 0 /* !XEN */
891 /*
892 * Callback for efi_memory_walk.
893 */
894 static int __init
895 efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
896 {
897 unsigned long *max_pfn = arg, pfn;
899 if (start < end) {
900 pfn = PFN_UP(end -1);
901 if (pfn > *max_pfn)
902 *max_pfn = pfn;
903 }
904 return 0;
905 }
908 /*
909 * Find the highest page frame number we have available
910 */
911 void __init find_max_pfn(void)
912 {
913 int i;
915 max_pfn = 0;
916 if (efi_enabled) {
917 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
918 return;
919 }
921 for (i = 0; i < e820.nr_map; i++) {
922 unsigned long start, end;
923 /* RAM? */
924 if (e820.map[i].type != E820_RAM)
925 continue;
926 start = PFN_UP(e820.map[i].addr);
927 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
928 if (start >= end)
929 continue;
930 if (end > max_pfn)
931 max_pfn = end;
932 }
933 }
934 #else
935 /* We don't use the fake e820 because we need to respond to user override. */
936 void __init find_max_pfn(void)
937 {
938 if ( xen_override_max_pfn < xen_start_info->nr_pages )
939 xen_override_max_pfn = xen_start_info->nr_pages;
940 max_pfn = xen_override_max_pfn;
941 }
942 #endif /* XEN */
944 /*
945 * Determine low and high memory ranges:
946 */
947 unsigned long __init find_max_low_pfn(void)
948 {
949 unsigned long max_low_pfn;
951 max_low_pfn = max_pfn;
952 if (max_low_pfn > MAXMEM_PFN) {
953 if (highmem_pages == -1)
954 highmem_pages = max_pfn - MAXMEM_PFN;
955 if (highmem_pages + MAXMEM_PFN < max_pfn)
956 max_pfn = MAXMEM_PFN + highmem_pages;
957 if (highmem_pages + MAXMEM_PFN > max_pfn) {
958 printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
959 highmem_pages = 0;
960 }
961 max_low_pfn = MAXMEM_PFN;
962 #ifndef CONFIG_HIGHMEM
963 /* Maximum memory usable is what is directly addressable */
964 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
965 MAXMEM>>20);
966 if (max_pfn > MAX_NONPAE_PFN)
967 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
968 else
969 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
970 max_pfn = MAXMEM_PFN;
971 #else /* !CONFIG_HIGHMEM */
972 #ifndef CONFIG_X86_PAE
973 if (max_pfn > MAX_NONPAE_PFN) {
974 max_pfn = MAX_NONPAE_PFN;
975 printk(KERN_WARNING "Warning only 4GB will be used.\n");
976 printk(KERN_WARNING "Use a PAE enabled kernel.\n");
977 }
978 #endif /* !CONFIG_X86_PAE */
979 #endif /* !CONFIG_HIGHMEM */
980 } else {
981 if (highmem_pages == -1)
982 highmem_pages = 0;
983 #ifdef CONFIG_HIGHMEM
984 if (highmem_pages >= max_pfn) {
985 printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
986 highmem_pages = 0;
987 }
988 if (highmem_pages) {
989 if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
990 printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
991 highmem_pages = 0;
992 }
993 max_low_pfn -= highmem_pages;
994 }
995 #else
996 if (highmem_pages)
997 printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
998 #endif
999 }
1000 return max_low_pfn;
1003 /*
1004 * Free all available memory for boot time allocation. Used
1005 * as a callback function by efi_memory_walk()
1006 */
1008 static int __init
1009 free_available_memory(unsigned long start, unsigned long end, void *arg)
1011 /* check max_low_pfn */
1012 if (start >= ((max_low_pfn + 1) << PAGE_SHIFT))
1013 return 0;
1014 if (end >= ((max_low_pfn + 1) << PAGE_SHIFT))
1015 end = (max_low_pfn + 1) << PAGE_SHIFT;
1016 if (start < end)
1017 free_bootmem(start, end - start);
1019 return 0;
1021 /*
1022 * Register fully available low RAM pages with the bootmem allocator.
1023 */
1024 static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
1026 int i;
1028 if (efi_enabled) {
1029 efi_memmap_walk(free_available_memory, NULL);
1030 return;
1032 for (i = 0; i < e820.nr_map; i++) {
1033 unsigned long curr_pfn, last_pfn, size;
1034 /*
1035 * Reserve usable low memory
1036 */
1037 if (e820.map[i].type != E820_RAM)
1038 continue;
1039 /*
1040 * We are rounding up the start address of usable memory:
1041 */
1042 curr_pfn = PFN_UP(e820.map[i].addr);
1043 if (curr_pfn >= max_low_pfn)
1044 continue;
1045 /*
1046 * ... and at the end of the usable range downwards:
1047 */
1048 last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
1050 if (last_pfn > max_low_pfn)
1051 last_pfn = max_low_pfn;
1053 /*
1054 * .. finally, did all the rounding and playing
1055 * around just make the area go away?
1056 */
1057 if (last_pfn <= curr_pfn)
1058 continue;
1060 size = last_pfn - curr_pfn;
1061 free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
1065 #ifndef CONFIG_XEN
1066 /*
1067 * workaround for Dell systems that neglect to reserve EBDA
1068 */
1069 static void __init reserve_ebda_region(void)
1071 unsigned int addr;
1072 addr = get_bios_ebda();
1073 if (addr)
1074 reserve_bootmem(addr, PAGE_SIZE);
1076 #endif
1078 #ifndef CONFIG_DISCONTIGMEM
1079 void __init setup_bootmem_allocator(void);
1080 static unsigned long __init setup_memory(void)
1082 /*
1083 * partially used pages are not usable - thus
1084 * we are rounding upwards:
1085 */
1086 min_low_pfn = PFN_UP(__pa(xen_start_info->pt_base)) +
1087 xen_start_info->nr_pt_frames;
1089 find_max_pfn();
1091 max_low_pfn = find_max_low_pfn();
1093 #ifdef CONFIG_HIGHMEM
1094 highstart_pfn = highend_pfn = max_pfn;
1095 if (max_pfn > max_low_pfn) {
1096 highstart_pfn = max_low_pfn;
1098 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1099 pages_to_mb(highend_pfn - highstart_pfn));
1100 #endif
1101 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1102 pages_to_mb(max_low_pfn));
1104 setup_bootmem_allocator();
1106 return max_low_pfn;
1109 void __init zone_sizes_init(void)
1111 unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
1112 unsigned int max_dma, low;
1114 /*
1115 * XEN: Our notion of "DMA memory" is fake when running over Xen.
1116 * We simply put all RAM in the DMA zone so that those drivers which
1117 * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
1118 * Those drivers that *do* require lowmem are screwed anyway when
1119 * running over Xen!
1120 */
1121 max_dma = max_low_pfn;
1122 low = max_low_pfn;
1124 if (low < max_dma)
1125 zones_size[ZONE_DMA] = low;
1126 else {
1127 zones_size[ZONE_DMA] = max_dma;
1128 zones_size[ZONE_NORMAL] = low - max_dma;
1129 #ifdef CONFIG_HIGHMEM
1130 zones_size[ZONE_HIGHMEM] = highend_pfn - low;
1131 #endif
1133 free_area_init(zones_size);
1135 #else
1136 extern unsigned long setup_memory(void);
1137 extern void zone_sizes_init(void);
1138 #endif /* !CONFIG_DISCONTIGMEM */
1140 void __init setup_bootmem_allocator(void)
1142 unsigned long bootmap_size;
1143 /*
1144 * Initialize the boot-time allocator (with low memory only):
1145 */
1146 bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
1148 register_bootmem_low_pages(max_low_pfn);
1150 /*
1151 * Reserve the bootmem bitmap itself as well. We do this in two
1152 * steps (first step was init_bootmem()) because this catches
1153 * the (very unlikely) case of us accidentally initializing the
1154 * bootmem allocator with an invalid RAM area.
1155 */
1156 reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
1157 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
1159 #ifndef CONFIG_XEN
1160 /*
1161 * reserve physical page 0 - it's a special BIOS page on many boxes,
1162 * enabling clean reboots, SMP operation, laptop functions.
1163 */
1164 reserve_bootmem(0, PAGE_SIZE);
1166 /* reserve EBDA region, it's a 4K region */
1167 reserve_ebda_region();
1169 /* could be an AMD 768MPX chipset. Reserve a page before VGA to prevent
1170 PCI prefetch into it (errata #56). Usually the page is reserved anyways,
1171 unless you have no PS/2 mouse plugged in. */
1172 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
1173 boot_cpu_data.x86 == 6)
1174 reserve_bootmem(0xa0000 - 4096, 4096);
1176 #ifdef CONFIG_SMP
1177 /*
1178 * But first pinch a few for the stack/trampoline stuff
1179 * FIXME: Don't need the extra page at 4K, but need to fix
1180 * trampoline before removing it. (see the GDT stuff)
1181 */
1182 reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
1183 #endif
1184 #ifdef CONFIG_ACPI_SLEEP
1185 /*
1186 * Reserve low memory region for sleep support.
1187 */
1188 acpi_reserve_bootmem();
1189 #endif
1190 #endif /* !CONFIG_XEN */
1192 #ifdef CONFIG_BLK_DEV_INITRD
1193 if (xen_start_info->mod_start) {
1194 if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
1195 /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/
1196 initrd_start = INITRD_START + PAGE_OFFSET;
1197 initrd_end = initrd_start+INITRD_SIZE;
1198 initrd_below_start_ok = 1;
1200 else {
1201 printk(KERN_ERR "initrd extends beyond end of memory "
1202 "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
1203 INITRD_START + INITRD_SIZE,
1204 max_low_pfn << PAGE_SHIFT);
1205 initrd_start = 0;
1208 #endif
1210 phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
1213 /*
1214 * The node 0 pgdat is initialized before all of these because
1215 * it's needed for bootmem. node>0 pgdats have their virtual
1216 * space allocated before the pagetables are in place to access
1217 * them, so they can't be cleared then.
1219 * This should all compile down to nothing when NUMA is off.
1220 */
1221 void __init remapped_pgdat_init(void)
1223 int nid;
1225 for_each_online_node(nid) {
1226 if (nid != 0)
1227 memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
1231 /*
1232 * Request address space for all standard RAM and ROM resources
1233 * and also for regions reported as reserved by the e820.
1234 */
1235 static void __init
1236 legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
1238 int i;
1239 #ifdef CONFIG_XEN
1240 dom0_op_t op;
1241 struct dom0_memory_map_entry *map;
1242 unsigned long gapstart, gapsize;
1243 unsigned long long last;
1244 #endif
1246 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1247 probe_roms();
1248 #endif
1250 #ifdef CONFIG_XEN
1251 map = alloc_bootmem_low_pages(PAGE_SIZE);
1252 op.cmd = DOM0_PHYSICAL_MEMORY_MAP;
1253 op.u.physical_memory_map.memory_map = map;
1254 op.u.physical_memory_map.max_map_entries =
1255 PAGE_SIZE / sizeof(struct dom0_memory_map_entry);
1256 BUG_ON(HYPERVISOR_dom0_op(&op));
1258 last = 0x100000000ULL;
1259 gapstart = 0x10000000;
1260 gapsize = 0x400000;
1262 for (i = op.u.physical_memory_map.nr_map_entries - 1; i >= 0; i--) {
1263 struct resource *res;
1265 if ((last > map[i].end) && ((last - map[i].end) > gapsize)) {
1266 gapsize = last - map[i].end;
1267 gapstart = map[i].end;
1269 if (map[i].start < last)
1270 last = map[i].start;
1272 if (map[i].end > 0x100000000ULL)
1273 continue;
1274 res = alloc_bootmem_low(sizeof(struct resource));
1275 res->name = map[i].is_ram ? "System RAM" : "reserved";
1276 res->start = map[i].start;
1277 res->end = map[i].end - 1;
1278 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1279 request_resource(&iomem_resource, res);
1282 free_bootmem(__pa(map), PAGE_SIZE);
1284 /*
1285 * Start allocating dynamic PCI memory a bit into the gap,
1286 * aligned up to the nearest megabyte.
1288 * Question: should we try to pad it up a bit (do something
1289 * like " + (gapsize >> 3)" in there too?). We now have the
1290 * technology.
1291 */
1292 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1294 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1295 pci_mem_start, gapstart, gapsize);
1296 #else
1297 for (i = 0; i < e820.nr_map; i++) {
1298 struct resource *res;
1299 if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
1300 continue;
1301 res = alloc_bootmem_low(sizeof(struct resource));
1302 switch (e820.map[i].type) {
1303 case E820_RAM: res->name = "System RAM"; break;
1304 case E820_ACPI: res->name = "ACPI Tables"; break;
1305 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1306 default: res->name = "reserved";
1308 res->start = e820.map[i].addr;
1309 res->end = res->start + e820.map[i].size - 1;
1310 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1311 request_resource(&iomem_resource, res);
1312 if (e820.map[i].type == E820_RAM) {
1313 /*
1314 * We don't know which RAM region contains kernel data,
1315 * so we try it repeatedly and let the resource manager
1316 * test it.
1317 */
1318 request_resource(res, code_resource);
1319 request_resource(res, data_resource);
1322 #endif
1325 /*
1326 * Request address space for all standard resources
1327 */
1328 static void __init register_memory(void)
1330 #ifndef CONFIG_XEN
1331 unsigned long gapstart, gapsize;
1332 unsigned long long last;
1333 #endif
1334 int i;
1336 /* Nothing to do if not running in dom0. */
1337 if (!(xen_start_info->flags & SIF_INITDOMAIN))
1338 return;
1340 if (efi_enabled)
1341 efi_initialize_iomem_resources(&code_resource, &data_resource);
1342 else
1343 legacy_init_iomem_resources(&code_resource, &data_resource);
1345 /* EFI systems may still have VGA */
1346 request_resource(&iomem_resource, &video_ram_resource);
1348 /* request I/O space for devices used on all i[345]86 PCs */
1349 for (i = 0; i < STANDARD_IO_RESOURCES; i++)
1350 request_resource(&ioport_resource, &standard_io_resources[i]);
1352 #ifndef CONFIG_XEN
1353 /*
1354 * Search for the bigest gap in the low 32 bits of the e820
1355 * memory space.
1356 */
1357 last = 0x100000000ull;
1358 gapstart = 0x10000000;
1359 gapsize = 0x400000;
1360 i = e820.nr_map;
1361 while (--i >= 0) {
1362 unsigned long long start = e820.map[i].addr;
1363 unsigned long long end = start + e820.map[i].size;
1365 /*
1366 * Since "last" is at most 4GB, we know we'll
1367 * fit in 32 bits if this condition is true
1368 */
1369 if (last > end) {
1370 unsigned long gap = last - end;
1372 if (gap > gapsize) {
1373 gapsize = gap;
1374 gapstart = end;
1377 if (start < last)
1378 last = start;
1381 /*
1382 * Start allocating dynamic PCI memory a bit into the gap,
1383 * aligned up to the nearest megabyte.
1385 * Question: should we try to pad it up a bit (do something
1386 * like " + (gapsize >> 3)" in there too?). We now have the
1387 * technology.
1388 */
1389 pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
1391 printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
1392 pci_mem_start, gapstart, gapsize);
1393 #endif
1396 /* Use inline assembly to define this because the nops are defined
1397 as inline assembly strings in the include files and we cannot
1398 get them easily into strings. */
1399 asm("\t.data\nintelnops: "
1400 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
1401 GENERIC_NOP7 GENERIC_NOP8);
1402 asm("\t.data\nk8nops: "
1403 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
1404 K8_NOP7 K8_NOP8);
1405 asm("\t.data\nk7nops: "
1406 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
1407 K7_NOP7 K7_NOP8);
1409 extern unsigned char intelnops[], k8nops[], k7nops[];
1410 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
1411 NULL,
1412 intelnops,
1413 intelnops + 1,
1414 intelnops + 1 + 2,
1415 intelnops + 1 + 2 + 3,
1416 intelnops + 1 + 2 + 3 + 4,
1417 intelnops + 1 + 2 + 3 + 4 + 5,
1418 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
1419 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1420 };
1421 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
1422 NULL,
1423 k8nops,
1424 k8nops + 1,
1425 k8nops + 1 + 2,
1426 k8nops + 1 + 2 + 3,
1427 k8nops + 1 + 2 + 3 + 4,
1428 k8nops + 1 + 2 + 3 + 4 + 5,
1429 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
1430 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1431 };
1432 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
1433 NULL,
1434 k7nops,
1435 k7nops + 1,
1436 k7nops + 1 + 2,
1437 k7nops + 1 + 2 + 3,
1438 k7nops + 1 + 2 + 3 + 4,
1439 k7nops + 1 + 2 + 3 + 4 + 5,
1440 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
1441 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
1442 };
1443 static struct nop {
1444 int cpuid;
1445 unsigned char **noptable;
1446 } noptypes[] = {
1447 { X86_FEATURE_K8, k8_nops },
1448 { X86_FEATURE_K7, k7_nops },
1449 { -1, NULL }
1450 };
1452 /* Replace instructions with better alternatives for this CPU type.
1454 This runs before SMP is initialized to avoid SMP problems with
1455 self modifying code. This implies that assymetric systems where
1456 APs have less capabilities than the boot processor are not handled.
1457 In this case boot with "noreplacement". */
1458 void apply_alternatives(void *start, void *end)
1460 struct alt_instr *a;
1461 int diff, i, k;
1462 unsigned char **noptable = intel_nops;
1463 for (i = 0; noptypes[i].cpuid >= 0; i++) {
1464 if (boot_cpu_has(noptypes[i].cpuid)) {
1465 noptable = noptypes[i].noptable;
1466 break;
1469 for (a = start; (void *)a < end; a++) {
1470 if (!boot_cpu_has(a->cpuid))
1471 continue;
1472 BUG_ON(a->replacementlen > a->instrlen);
1473 memcpy(a->instr, a->replacement, a->replacementlen);
1474 diff = a->instrlen - a->replacementlen;
1475 /* Pad the rest with nops */
1476 for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
1477 k = diff;
1478 if (k > ASM_NOP_MAX)
1479 k = ASM_NOP_MAX;
1480 memcpy(a->instr + i, noptable[k], k);
1485 static int no_replacement __initdata = 0;
1487 void __init alternative_instructions(void)
1489 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
1490 if (no_replacement)
1491 return;
1492 apply_alternatives(__alt_instructions, __alt_instructions_end);
1495 static int __init noreplacement_setup(char *s)
1497 no_replacement = 1;
1498 return 0;
1501 __setup("noreplacement", noreplacement_setup);
1503 static char * __init machine_specific_memory_setup(void);
1505 #ifdef CONFIG_MCA
1506 static void set_mca_bus(int x)
1508 MCA_bus = x;
1510 #else
1511 static void set_mca_bus(int x) { }
1512 #endif
1514 /*
1515 * Determine if we were loaded by an EFI loader. If so, then we have also been
1516 * passed the efi memmap, systab, etc., so we should use these data structures
1517 * for initialization. Note, the efi init code path is determined by the
1518 * global efi_enabled. This allows the same kernel image to be used on existing
1519 * systems (with a traditional BIOS) as well as on EFI systems.
1520 */
1521 void __init setup_arch(char **cmdline_p)
1523 int i, j, k, fpp;
1524 physdev_op_t op;
1525 unsigned long max_low_pfn;
1527 /* Force a quick death if the kernel panics. */
1528 extern int panic_timeout;
1529 if (panic_timeout == 0)
1530 panic_timeout = 1;
1532 /* Register a call for panic conditions. */
1533 notifier_chain_register(&panic_notifier_list, &xen_panic_block);
1535 HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
1536 HYPERVISOR_vm_assist(VMASST_CMD_enable,
1537 VMASST_TYPE_writable_pagetables);
1539 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
1540 early_cpu_init();
1542 /*
1543 * FIXME: This isn't an official loader_type right
1544 * now but does currently work with elilo.
1545 * If we were configured as an EFI kernel, check to make
1546 * sure that we were loaded correctly from elilo and that
1547 * the system table is valid. If not, then initialize normally.
1548 */
1549 #ifdef CONFIG_EFI
1550 if ((LOADER_TYPE == 0x50) && EFI_SYSTAB)
1551 efi_enabled = 1;
1552 #endif
1554 /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
1555 properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
1556 */
1557 ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
1558 drive_info = DRIVE_INFO;
1559 screen_info = SCREEN_INFO;
1560 edid_info = EDID_INFO;
1561 apm_info.bios = APM_BIOS_INFO;
1562 ist_info = IST_INFO;
1563 saved_videomode = VIDEO_MODE;
1564 if( SYS_DESC_TABLE.length != 0 ) {
1565 set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2);
1566 machine_id = SYS_DESC_TABLE.table[0];
1567 machine_submodel_id = SYS_DESC_TABLE.table[1];
1568 BIOS_revision = SYS_DESC_TABLE.table[2];
1570 bootloader_type = LOADER_TYPE;
1572 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
1573 /* This is drawn from a dump from vgacon:startup in standard Linux. */
1574 screen_info.orig_video_mode = 3;
1575 screen_info.orig_video_isVGA = 1;
1576 screen_info.orig_video_lines = 25;
1577 screen_info.orig_video_cols = 80;
1578 screen_info.orig_video_ega_bx = 3;
1579 screen_info.orig_video_points = 16;
1580 #endif
1582 #ifdef CONFIG_BLK_DEV_RAM
1583 rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
1584 rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
1585 rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
1586 #endif
1587 ARCH_SETUP
1588 if (efi_enabled)
1589 efi_init();
1590 else {
1591 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1592 print_memory_map(machine_specific_memory_setup());
1595 copy_edd();
1597 if (!MOUNT_ROOT_RDONLY)
1598 root_mountflags &= ~MS_RDONLY;
1599 init_mm.start_code = (unsigned long) _text;
1600 init_mm.end_code = (unsigned long) _etext;
1601 init_mm.end_data = (unsigned long) _edata;
1602 init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) +
1603 xen_start_info->nr_pt_frames) << PAGE_SHIFT;
1605 /* XEN: This is nonsense: kernel may not even be contiguous in RAM. */
1606 /*code_resource.start = virt_to_phys(_text);*/
1607 /*code_resource.end = virt_to_phys(_etext)-1;*/
1608 /*data_resource.start = virt_to_phys(_etext);*/
1609 /*data_resource.end = virt_to_phys(_edata)-1;*/
1611 parse_cmdline_early(cmdline_p);
1613 max_low_pfn = setup_memory();
1615 /*
1616 * NOTE: before this point _nobody_ is allowed to allocate
1617 * any memory using the bootmem allocator. Although the
1618 * alloctor is now initialised only the first 8Mb of the kernel
1619 * virtual address space has been mapped. All allocations before
1620 * paging_init() has completed must use the alloc_bootmem_low_pages()
1621 * variant (which allocates DMA'able memory) and care must be taken
1622 * not to exceed the 8Mb limit.
1623 */
1625 #ifdef CONFIG_SMP
1626 smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
1627 #endif
1628 paging_init();
1629 remapped_pgdat_init();
1630 zone_sizes_init();
1632 #ifdef CONFIG_X86_FIND_SMP_CONFIG
1633 /*
1634 * Find and reserve possible boot-time SMP configuration:
1635 */
1636 find_smp_config();
1637 #endif
1639 /* Make sure we have a correctly sized P->M table. */
1640 if (max_pfn != xen_start_info->nr_pages) {
1641 phys_to_machine_mapping = alloc_bootmem_low_pages(
1642 max_pfn * sizeof(unsigned long));
1644 if (max_pfn > xen_start_info->nr_pages) {
1645 /* set to INVALID_P2M_ENTRY */
1646 memset(phys_to_machine_mapping, ~0,
1647 max_pfn * sizeof(unsigned long));
1648 memcpy(phys_to_machine_mapping,
1649 (unsigned long *)xen_start_info->mfn_list,
1650 xen_start_info->nr_pages * sizeof(unsigned long));
1651 } else {
1652 struct xen_memory_reservation reservation = {
1653 .extent_start = (unsigned long *)xen_start_info->mfn_list + max_pfn,
1654 .nr_extents = xen_start_info->nr_pages - max_pfn,
1655 .extent_order = 0,
1656 .domid = DOMID_SELF
1657 };
1659 memcpy(phys_to_machine_mapping,
1660 (unsigned long *)xen_start_info->mfn_list,
1661 max_pfn * sizeof(unsigned long));
1662 BUG_ON(HYPERVISOR_memory_op(
1663 XENMEM_decrease_reservation,
1664 &reservation) !=
1665 (xen_start_info->nr_pages - max_pfn));
1667 free_bootmem(
1668 __pa(xen_start_info->mfn_list),
1669 PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
1670 sizeof(unsigned long))));
1674 /*
1675 * Initialise the list of the frames that specify the list of
1676 * frames that make up the p2m table. Used by save/restore
1677 */
1678 pfn_to_mfn_frame_list_list = alloc_bootmem_low_pages(PAGE_SIZE);
1679 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
1680 virt_to_mfn(pfn_to_mfn_frame_list_list);
1682 fpp = PAGE_SIZE/sizeof(unsigned long);
1683 for ( i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++ )
1685 if ( (j % fpp) == 0 )
1687 k++;
1688 BUG_ON(k>=16);
1689 pfn_to_mfn_frame_list[k] = alloc_bootmem_low_pages(PAGE_SIZE);
1690 pfn_to_mfn_frame_list_list[k] =
1691 virt_to_mfn(pfn_to_mfn_frame_list[k]);
1692 j=0;
1694 pfn_to_mfn_frame_list[k][j] =
1695 virt_to_mfn(&phys_to_machine_mapping[i]);
1697 HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
1699 /*
1700 * NOTE: at this point the bootmem allocator is fully available.
1701 */
1703 #ifdef CONFIG_EARLY_PRINTK
1705 char *s = strstr(*cmdline_p, "earlyprintk=");
1706 if (s) {
1707 extern void setup_early_printk(char *);
1709 setup_early_printk(s);
1710 printk("early console enabled\n");
1713 #endif
1715 if (xen_start_info->flags & SIF_INITDOMAIN)
1716 dmi_scan_machine();
1718 #ifdef CONFIG_X86_GENERICARCH
1719 generic_apic_probe(*cmdline_p);
1720 #endif
1721 if (efi_enabled)
1722 efi_map_memmap();
1724 op.cmd = PHYSDEVOP_SET_IOPL;
1725 op.u.set_iopl.iopl = 1;
1726 HYPERVISOR_physdev_op(&op);
1728 #ifdef CONFIG_ACPI_BOOT
1729 if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
1730 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
1731 acpi_disabled = 1;
1732 acpi_ht = 0;
1734 #endif
1736 #ifdef CONFIG_ACPI_BOOT
1737 /*
1738 * Parse the ACPI tables for possible boot-time SMP configuration.
1739 */
1740 acpi_boot_table_init();
1741 acpi_boot_init();
1742 #endif
1744 #ifdef CONFIG_X86_LOCAL_APIC
1745 if (smp_found_config)
1746 get_smp_config();
1747 #endif
1749 /* XXX Disable irqdebug until we have a way to avoid interrupt
1750 * conflicts. */
1751 noirqdebug_setup("");
1753 register_memory();
1755 if (xen_start_info->flags & SIF_INITDOMAIN) {
1756 if (!(xen_start_info->flags & SIF_PRIVILEGED))
1757 panic("Xen granted us console access "
1758 "but not privileged status");
1760 #ifdef CONFIG_VT
1761 #if defined(CONFIG_VGA_CONSOLE)
1762 if (!efi_enabled ||
1763 (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
1764 conswitchp = &vga_con;
1765 #elif defined(CONFIG_DUMMY_CONSOLE)
1766 conswitchp = &dummy_con;
1767 #endif
1768 #endif
1769 } else {
1770 #ifdef CONFIG_XEN_PRIVILEGED_GUEST
1771 extern const struct consw xennull_con;
1772 extern int console_use_vt;
1773 #if defined(CONFIG_VGA_CONSOLE)
1774 /* disable VGA driver */
1775 ORIG_VIDEO_ISVGA = VIDEO_TYPE_VLFB;
1776 #endif
1777 conswitchp = &xennull_con;
1778 console_use_vt = 0;
1779 #endif
1783 static int
1784 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
1786 HYPERVISOR_crash();
1787 /* we're never actually going to get here... */
1788 return NOTIFY_DONE;
1791 #include "setup_arch_post.h"
1792 /*
1793 * Local Variables:
1794 * mode:c
1795 * c-file-style:"k&r"
1796 * c-basic-offset:8
1797 * End:
1798 */