ia64/xen-unstable

view xen/arch/x86/e820.c @ 19835:edfdeb150f27

Fix buildsystem to detect udev > version 124

udev removed the udevinfo symlink from versions higher than 123 and
xen's build-system could not detect if udev is in place and has the
required version.

Signed-off-by: Marc-A. Dahlhaus <mad@wol.de>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 25 13:02:37 2009 +0100 (2009-06-25)
parents 42fe00c6f8b4
children
line source
1 #include <xen/config.h>
2 #include <xen/init.h>
3 #include <xen/lib.h>
4 #include <xen/mm.h>
5 #include <xen/compat.h>
6 #include <xen/dmi.h>
7 #include <asm/e820.h>
8 #include <asm/page.h>
9 #include <asm/processor.h>
10 #include <asm/mtrr.h>
11 #include <asm/msr.h>
13 /* opt_mem: Limit of physical RAM. Any RAM beyond this point is ignored. */
14 unsigned long long opt_mem;
15 static void parse_mem(char *s) { opt_mem = parse_size_and_unit(s, NULL); }
16 custom_param("mem", parse_mem);
18 /* opt_nomtrr_check: Don't clip ram to highest cacheable MTRR. */
19 static int __initdata e820_mtrr_clip = -1;
20 boolean_param("e820-mtrr-clip", e820_mtrr_clip);
22 /* opt_e820_verbose: Be verbose about clipping, the original e820, &c */
23 static int __initdata e820_verbose;
24 boolean_param("e820-verbose", e820_verbose);
26 struct e820map e820;
28 /*
29 * This function checks if the entire range <start,end> is mapped with type.
30 *
31 * Note: this function only works correct if the e820 table is sorted and
32 * not-overlapping, which is the case
33 */
34 int __init e820_all_mapped(u64 start, u64 end, unsigned type)
35 {
36 int i;
38 for (i = 0; i < e820.nr_map; i++) {
39 struct e820entry *ei = &e820.map[i];
41 if (type && ei->type != type)
42 continue;
43 /* is the region (part) in overlap with the current region ?*/
44 if (ei->addr >= end || ei->addr + ei->size <= start)
45 continue;
47 /* if the region is at the beginning of <start,end> we move
48 * start to the end of the region since it's ok until there
49 */
50 if (ei->addr <= start)
51 start = ei->addr + ei->size;
52 /*
53 * if start is now at or beyond end, we're done, full
54 * coverage
55 */
56 if (start >= end)
57 return 1;
58 }
59 return 0;
60 }
62 static void __init add_memory_region(unsigned long long start,
63 unsigned long long size, int type)
64 {
65 int x;
67 /*if (!efi_enabled)*/ {
68 x = e820.nr_map;
70 if (x == E820MAX) {
71 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
72 return;
73 }
75 e820.map[x].addr = start;
76 e820.map[x].size = size;
77 e820.map[x].type = type;
78 e820.nr_map++;
79 }
80 } /* add_memory_region */
82 static void __init print_e820_memory_map(struct e820entry *map, int entries)
83 {
84 int i;
86 for (i = 0; i < entries; i++) {
87 printk(" %016Lx - %016Lx ",
88 (unsigned long long)(map[i].addr),
89 (unsigned long long)(map[i].addr + map[i].size));
90 switch (map[i].type) {
91 case E820_RAM:
92 printk("(usable)\n");
93 break;
94 case E820_RESERVED:
95 printk("(reserved)\n");
96 break;
97 case E820_ACPI:
98 printk("(ACPI data)\n");
99 break;
100 case E820_NVS:
101 printk("(ACPI NVS)\n");
102 break;
103 case E820_UNUSABLE:
104 printk("(unusable)\n");
105 break;
106 default:
107 printk("type %u\n", map[i].type);
108 break;
109 }
110 }
111 }
113 /*
114 * Sanitize the BIOS e820 map.
115 *
116 * Some e820 responses include overlapping entries. The following
117 * replaces the original e820 map with a new one, removing overlaps.
118 *
119 */
120 struct change_member {
121 struct e820entry *pbios; /* pointer to original bios entry */
122 unsigned long long addr; /* address for this change point */
123 };
124 static struct change_member change_point_list[2*E820MAX] __initdata;
125 static struct change_member *change_point[2*E820MAX] __initdata;
126 static struct e820entry *overlap_list[E820MAX] __initdata;
127 static struct e820entry new_bios[E820MAX] __initdata;
129 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
130 {
131 struct change_member *change_tmp;
132 unsigned long current_type, last_type;
133 unsigned long long last_addr;
134 int chgidx, still_changing;
135 int overlap_entries;
136 int new_bios_entry;
137 int old_nr, new_nr, chg_nr;
138 int i;
140 /*
141 Visually we're performing the following (1,2,3,4 = memory types)...
143 Sample memory map (w/overlaps):
144 ____22__________________
145 ______________________4_
146 ____1111________________
147 _44_____________________
148 11111111________________
149 ____________________33__
150 ___________44___________
151 __________33333_________
152 ______________22________
153 ___________________2222_
154 _________111111111______
155 _____________________11_
156 _________________4______
158 Sanitized equivalent (no overlap):
159 1_______________________
160 _44_____________________
161 ___1____________________
162 ____22__________________
163 ______11________________
164 _________1______________
165 __________3_____________
166 ___________44___________
167 _____________33_________
168 _______________2________
169 ________________1_______
170 _________________4______
171 ___________________2____
172 ____________________33__
173 ______________________4_
174 */
176 /* if there's only one memory region, don't bother */
177 if (*pnr_map < 2)
178 return -1;
180 old_nr = *pnr_map;
182 /* bail out if we find any unreasonable addresses in bios map */
183 for (i=0; i<old_nr; i++)
184 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
185 return -1;
187 /* create pointers for initial change-point information (for sorting) */
188 for (i=0; i < 2*old_nr; i++)
189 change_point[i] = &change_point_list[i];
191 /* record all known change-points (starting and ending addresses),
192 omitting those that are for empty memory regions */
193 chgidx = 0;
194 for (i=0; i < old_nr; i++) {
195 if (biosmap[i].size != 0) {
196 change_point[chgidx]->addr = biosmap[i].addr;
197 change_point[chgidx++]->pbios = &biosmap[i];
198 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
199 change_point[chgidx++]->pbios = &biosmap[i];
200 }
201 }
202 chg_nr = chgidx; /* true number of change-points */
204 /* sort change-point list by memory addresses (low -> high) */
205 still_changing = 1;
206 while (still_changing) {
207 still_changing = 0;
208 for (i=1; i < chg_nr; i++) {
209 /* if <current_addr> > <last_addr>, swap */
210 /* or, if current=<start_addr> & last=<end_addr>, swap */
211 if ((change_point[i]->addr < change_point[i-1]->addr) ||
212 ((change_point[i]->addr == change_point[i-1]->addr) &&
213 (change_point[i]->addr == change_point[i]->pbios->addr) &&
214 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
215 )
216 {
217 change_tmp = change_point[i];
218 change_point[i] = change_point[i-1];
219 change_point[i-1] = change_tmp;
220 still_changing=1;
221 }
222 }
223 }
225 /* create a new bios memory map, removing overlaps */
226 overlap_entries=0; /* number of entries in the overlap table */
227 new_bios_entry=0; /* index for creating new bios map entries */
228 last_type = 0; /* start with undefined memory type */
229 last_addr = 0; /* start with 0 as last starting address */
230 /* loop through change-points, determining affect on the new bios map */
231 for (chgidx=0; chgidx < chg_nr; chgidx++)
232 {
233 /* keep track of all overlapping bios entries */
234 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
235 {
236 /* add map entry to overlap list (> 1 entry implies an overlap) */
237 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
238 }
239 else
240 {
241 /* remove entry from list (order independent, so swap with last) */
242 for (i=0; i<overlap_entries; i++)
243 {
244 if (overlap_list[i] == change_point[chgidx]->pbios)
245 overlap_list[i] = overlap_list[overlap_entries-1];
246 }
247 overlap_entries--;
248 }
249 /* if there are overlapping entries, decide which "type" to use */
250 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
251 current_type = 0;
252 for (i=0; i<overlap_entries; i++)
253 if (overlap_list[i]->type > current_type)
254 current_type = overlap_list[i]->type;
255 /* continue building up new bios map based on this information */
256 if (current_type != last_type) {
257 if (last_type != 0) {
258 new_bios[new_bios_entry].size =
259 change_point[chgidx]->addr - last_addr;
260 /* move forward only if the new size was non-zero */
261 if (new_bios[new_bios_entry].size != 0)
262 if (++new_bios_entry >= E820MAX)
263 break; /* no more space left for new bios entries */
264 }
265 if (current_type != 0) {
266 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
267 new_bios[new_bios_entry].type = current_type;
268 last_addr=change_point[chgidx]->addr;
269 }
270 last_type = current_type;
271 }
272 }
273 new_nr = new_bios_entry; /* retain count for new bios entries */
275 /* copy new bios mapping into original location */
276 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
277 *pnr_map = new_nr;
279 return 0;
280 }
282 /*
283 * Copy the BIOS e820 map into a safe place.
284 *
285 * Sanity-check it while we're at it..
286 *
287 * If we're lucky and live on a modern system, the setup code
288 * will have given us a memory map that we can use to properly
289 * set up memory. If we aren't, we'll fake a memory map.
290 *
291 * We check to see that the memory map contains at least 2 elements
292 * before we'll use it, because the detection code in setup.S may
293 * not be perfect and most every PC known to man has two memory
294 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
295 * thinkpad 560x, for example, does not cooperate with the memory
296 * detection code.)
297 */
298 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
299 {
300 /* Only one memory region (or negative)? Ignore it */
301 if (nr_map < 2)
302 return -1;
304 do {
305 unsigned long long start = biosmap->addr;
306 unsigned long long size = biosmap->size;
307 unsigned long long end = start + size;
308 unsigned long type = biosmap->type;
310 /* Overflow in 64 bits? Ignore the memory map. */
311 if (start > end)
312 return -1;
314 /*
315 * Some BIOSes claim RAM in the 640k - 1M region.
316 * Not right. Fix it up.
317 */
318 if (type == E820_RAM) {
319 if (start < 0x100000ULL && end > 0xA0000ULL) {
320 if (start < 0xA0000ULL)
321 add_memory_region(start, 0xA0000ULL-start, type);
322 if (end <= 0x100000ULL)
323 continue;
324 start = 0x100000ULL;
325 size = end - start;
326 }
327 }
328 add_memory_region(start, size, type);
329 } while (biosmap++,--nr_map);
330 return 0;
331 }
334 /*
335 * Find the highest page frame number we have available
336 */
337 static unsigned long __init find_max_pfn(void)
338 {
339 int i;
340 unsigned long max_pfn = 0;
342 #if 0
343 if (efi_enabled) {
344 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
345 return;
346 }
347 #endif
349 for (i = 0; i < e820.nr_map; i++) {
350 unsigned long start, end;
351 /* RAM? */
352 if (e820.map[i].type != E820_RAM)
353 continue;
354 start = PFN_UP(e820.map[i].addr);
355 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
356 if (start >= end)
357 continue;
358 if (end > max_pfn)
359 max_pfn = end;
360 }
362 return max_pfn;
363 }
365 static void __init clip_to_limit(uint64_t limit, char *warnmsg)
366 {
367 int i;
368 char _warnmsg[160];
369 uint64_t old_limit = 0;
371 for ( i = 0; i < e820.nr_map; i++ )
372 {
373 if ( (e820.map[i].type != E820_RAM) ||
374 ((e820.map[i].addr + e820.map[i].size) <= limit) )
375 continue;
376 old_limit = e820.map[i].addr + e820.map[i].size;
377 if ( e820.map[i].addr < limit )
378 {
379 e820.map[i].size = limit - e820.map[i].addr;
380 }
381 else
382 {
383 memmove(&e820.map[i], &e820.map[i+1],
384 (e820.nr_map - i - 1) * sizeof(struct e820entry));
385 e820.nr_map--;
386 }
387 }
389 if ( old_limit )
390 {
391 if ( warnmsg )
392 {
393 snprintf(_warnmsg, sizeof(_warnmsg), warnmsg, (long)(limit>>30));
394 printk("WARNING: %s\n", _warnmsg);
395 }
396 printk("Truncating RAM from %lukB to %lukB\n",
397 (unsigned long)(old_limit >> 10), (unsigned long)(limit >> 10));
398 }
399 }
401 /* Conservative estimate of top-of-RAM by looking for MTRR WB regions. */
402 #define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg))
403 #define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1)
404 static uint64_t mtrr_top_of_ram(void)
405 {
406 uint32_t eax, ebx, ecx, edx;
407 uint64_t mtrr_cap, mtrr_def, addr_mask, base, mask, top;
408 unsigned int i, phys_bits = 36;
410 /* By default we check only Intel systems. */
411 if ( e820_mtrr_clip == -1 )
412 {
413 char vendor[13];
414 cpuid(0x00000000, &eax,
415 (uint32_t *)&vendor[0],
416 (uint32_t *)&vendor[8],
417 (uint32_t *)&vendor[4]);
418 vendor[12] = '\0';
419 e820_mtrr_clip = !strcmp(vendor, "GenuineIntel");
420 }
422 if ( !e820_mtrr_clip )
423 return 0;
425 if ( e820_verbose )
426 printk("Checking MTRR ranges...\n");
428 /* Does the CPU support architectural MTRRs? */
429 cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
430 if ( !test_bit(X86_FEATURE_MTRR & 31, &edx) )
431 return 0;
433 /* Find the physical address size for this CPU. */
434 cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
435 if ( eax >= 0x80000008 )
436 {
437 cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
438 phys_bits = (uint8_t)eax;
439 }
440 addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1);
442 rdmsrl(MSR_MTRRcap, mtrr_cap);
443 rdmsrl(MSR_MTRRdefType, mtrr_def);
445 if ( e820_verbose )
446 printk(" MTRR cap: %"PRIx64" type: %"PRIx64"\n", mtrr_cap, mtrr_def);
448 /* MTRRs enabled, and default memory type is not writeback? */
449 if ( !test_bit(11, &mtrr_def) || ((uint8_t)mtrr_def == MTRR_TYPE_WRBACK) )
450 return 0;
452 /*
453 * Find end of highest WB-type range. This is a conservative estimate
454 * of the highest WB address since overlapping UC/WT ranges dominate.
455 */
456 top = 0;
457 for ( i = 0; i < (uint8_t)mtrr_cap; i++ )
458 {
459 rdmsrl(MSR_MTRRphysBase(i), base);
460 rdmsrl(MSR_MTRRphysMask(i), mask);
462 if ( e820_verbose )
463 printk(" MTRR[%d]: base %"PRIx64" mask %"PRIx64"\n",
464 i, base, mask);
466 if ( !test_bit(11, &mask) || ((uint8_t)base != MTRR_TYPE_WRBACK) )
467 continue;
468 base &= addr_mask;
469 mask &= addr_mask;
470 top = max_t(uint64_t, top, ((base | ~mask) & addr_mask) + PAGE_SIZE);
471 }
473 return top;
474 }
476 static void __init reserve_dmi_region(void)
477 {
478 u32 base, len;
479 if ( (dmi_get_table(&base, &len) == 0) && ((base + len) > base) &&
480 reserve_e820_ram(&e820, base, base + len) )
481 printk("WARNING: DMI table located in E820 RAM %08x-%08x. Fixed.\n",
482 base, base+len);
483 }
485 static void __init machine_specific_memory_setup(
486 struct e820entry *raw, int *raw_nr)
487 {
488 uint64_t top_of_ram;
490 char nr = (char)*raw_nr;
491 sanitize_e820_map(raw, &nr);
492 *raw_nr = nr;
493 (void)copy_e820_map(raw, nr);
495 if ( opt_mem )
496 clip_to_limit(opt_mem, NULL);
498 #ifdef __i386__
499 clip_to_limit((1ULL << 30) * MACHPHYS_MBYTES,
500 "Only the first %lu GB of the physical memory map "
501 "can be accessed by Xen in 32-bit mode.");
502 #else
503 {
504 unsigned long limit, mpt_limit, pft_limit;
506 limit = DIRECTMAP_VIRT_END - DIRECTMAP_VIRT_START;
507 mpt_limit = ((RDWR_MPT_VIRT_END - RDWR_MPT_VIRT_START)
508 / sizeof(unsigned long)) << PAGE_SHIFT;
509 pft_limit = ((FRAMETABLE_VIRT_END - FRAMETABLE_VIRT_START)
510 / sizeof(struct page_info)) << PAGE_SHIFT;
511 if ( limit > mpt_limit )
512 limit = mpt_limit;
513 if ( limit > pft_limit )
514 limit = pft_limit;
515 clip_to_limit(limit,
516 "Only the first %lu GB of the physical "
517 "memory map can be accessed by Xen.");
518 }
519 #endif
521 reserve_dmi_region();
523 top_of_ram = mtrr_top_of_ram();
524 if ( top_of_ram )
525 clip_to_limit(top_of_ram, "MTRRs do not cover all of memory.");
526 }
528 int __init e820_change_range_type(
529 struct e820map *e820, uint64_t s, uint64_t e,
530 uint32_t orig_type, uint32_t new_type)
531 {
532 uint64_t rs = 0, re = 0;
533 int i;
535 for ( i = 0; i < e820->nr_map; i++ )
536 {
537 /* Have we found the e820 region that includes the specified range? */
538 rs = e820->map[i].addr;
539 re = rs + e820->map[i].size;
540 if ( (s >= rs) && (e <= re) )
541 break;
542 }
544 if ( (i == e820->nr_map) || (e820->map[i].type != orig_type) )
545 return 0;
547 if ( (s == rs) && (e == re) )
548 {
549 e820->map[i].type = new_type;
550 }
551 else if ( (s == rs) || (e == re) )
552 {
553 if ( (e820->nr_map + 1) > ARRAY_SIZE(e820->map) )
554 goto overflow;
556 memmove(&e820->map[i+1], &e820->map[i],
557 (e820->nr_map-i) * sizeof(e820->map[0]));
558 e820->nr_map++;
560 if ( s == rs )
561 {
562 e820->map[i].size = e - s;
563 e820->map[i].type = new_type;
564 e820->map[i+1].addr = e;
565 e820->map[i+1].size = re - e;
566 }
567 else
568 {
569 e820->map[i].size = s - rs;
570 e820->map[i+1].addr = s;
571 e820->map[i+1].size = e - s;
572 e820->map[i+1].type = new_type;
573 }
574 }
575 else if ( e820->nr_map+1 < ARRAY_SIZE(e820->map) )
576 {
577 if ( (e820->nr_map + 2) > ARRAY_SIZE(e820->map) )
578 goto overflow;
580 memmove(&e820->map[i+2], &e820->map[i],
581 (e820->nr_map-i) * sizeof(e820->map[0]));
582 e820->nr_map += 2;
584 e820->map[i].size = s - rs;
585 e820->map[i+1].addr = s;
586 e820->map[i+1].size = e - s;
587 e820->map[i+1].type = new_type;
588 e820->map[i+2].addr = e;
589 e820->map[i+2].size = re - e;
590 }
592 /* Finally, look for any opportunities to merge adjacent e820 entries. */
593 for ( i = 0; i < (e820->nr_map - 1); i++ )
594 {
595 if ( (e820->map[i].type != e820->map[i+1].type) ||
596 ((e820->map[i].addr + e820->map[i].size) != e820->map[i+1].addr) )
597 continue;
598 e820->map[i].size += e820->map[i+1].size;
599 memmove(&e820->map[i+1], &e820->map[i+2],
600 (e820->nr_map-i-2) * sizeof(e820->map[0]));
601 e820->nr_map--;
602 i--;
603 }
605 return 1;
607 overflow:
608 printk("Overflow in e820 while reserving region %"PRIx64"-%"PRIx64"\n",
609 s, e);
610 return 0;
611 }
613 /* Set E820_RAM area (@s,@e) as RESERVED in specified e820 map. */
614 int __init reserve_e820_ram(struct e820map *e820, uint64_t s, uint64_t e)
615 {
616 return e820_change_range_type(e820, s, e, E820_RAM, E820_RESERVED);
617 }
619 unsigned long __init init_e820(
620 const char *str, struct e820entry *raw, int *raw_nr)
621 {
622 if ( e820_verbose )
623 {
624 printk("Initial %s RAM map:\n", str);
625 print_e820_memory_map(raw, *raw_nr);
626 }
628 machine_specific_memory_setup(raw, raw_nr);
630 printk("%s RAM map:\n", str);
631 print_e820_memory_map(e820.map, e820.nr_map);
633 return find_max_pfn();
634 }