direct-io.hg

view xen/arch/x86/e820.c @ 15521:1f348e70a5af

Re-factor e820 truncation code and reintroduce clipping for 32-on-64
guests (to 166GB).
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Tue Jul 10 11:10:38 2007 +0100 (2007-07-10)
parents ecb89c6ce615
children 13483c74ce04
line source
1 #include <xen/config.h>
2 #include <xen/init.h>
3 #include <xen/lib.h>
4 #include <xen/compat.h>
5 #include <asm/e820.h>
6 #include <asm/page.h>
8 /* opt_mem: Limit of physical RAM. Any RAM beyond this point is ignored. */
9 unsigned long long opt_mem;
10 static void parse_mem(char *s) { opt_mem = parse_size_and_unit(s, NULL); }
11 custom_param("mem", parse_mem);
13 struct e820map e820;
15 static void __init add_memory_region(unsigned long long start,
16 unsigned long long size, int type)
17 {
18 int x;
20 /*if (!efi_enabled)*/ {
21 x = e820.nr_map;
23 if (x == E820MAX) {
24 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
25 return;
26 }
28 e820.map[x].addr = start;
29 e820.map[x].size = size;
30 e820.map[x].type = type;
31 e820.nr_map++;
32 }
33 } /* add_memory_region */
35 static void __init print_e820_memory_map(struct e820entry *map, int entries)
36 {
37 int i;
39 for (i = 0; i < entries; i++) {
40 printk(" %016Lx - %016Lx ",
41 (unsigned long long)(map[i].addr),
42 (unsigned long long)(map[i].addr + map[i].size));
43 switch (map[i].type) {
44 case E820_RAM: printk("(usable)\n");
45 break;
46 case E820_RESERVED:
47 printk("(reserved)\n");
48 break;
49 case E820_ACPI:
50 printk("(ACPI data)\n");
51 break;
52 case E820_NVS:
53 printk("(ACPI NVS)\n");
54 break;
55 default: printk("type %u\n", map[i].type);
56 break;
57 }
58 }
59 }
61 /*
62 * Sanitize the BIOS e820 map.
63 *
64 * Some e820 responses include overlapping entries. The following
65 * replaces the original e820 map with a new one, removing overlaps.
66 *
67 */
68 struct change_member {
69 struct e820entry *pbios; /* pointer to original bios entry */
70 unsigned long long addr; /* address for this change point */
71 };
72 static struct change_member change_point_list[2*E820MAX] __initdata;
73 static struct change_member *change_point[2*E820MAX] __initdata;
74 static struct e820entry *overlap_list[E820MAX] __initdata;
75 static struct e820entry new_bios[E820MAX] __initdata;
77 static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
78 {
79 struct change_member *change_tmp;
80 unsigned long current_type, last_type;
81 unsigned long long last_addr;
82 int chgidx, still_changing;
83 int overlap_entries;
84 int new_bios_entry;
85 int old_nr, new_nr, chg_nr;
86 int i;
88 /*
89 Visually we're performing the following (1,2,3,4 = memory types)...
91 Sample memory map (w/overlaps):
92 ____22__________________
93 ______________________4_
94 ____1111________________
95 _44_____________________
96 11111111________________
97 ____________________33__
98 ___________44___________
99 __________33333_________
100 ______________22________
101 ___________________2222_
102 _________111111111______
103 _____________________11_
104 _________________4______
106 Sanitized equivalent (no overlap):
107 1_______________________
108 _44_____________________
109 ___1____________________
110 ____22__________________
111 ______11________________
112 _________1______________
113 __________3_____________
114 ___________44___________
115 _____________33_________
116 _______________2________
117 ________________1_______
118 _________________4______
119 ___________________2____
120 ____________________33__
121 ______________________4_
122 */
124 /* if there's only one memory region, don't bother */
125 if (*pnr_map < 2)
126 return -1;
128 old_nr = *pnr_map;
130 /* bail out if we find any unreasonable addresses in bios map */
131 for (i=0; i<old_nr; i++)
132 if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
133 return -1;
135 /* create pointers for initial change-point information (for sorting) */
136 for (i=0; i < 2*old_nr; i++)
137 change_point[i] = &change_point_list[i];
139 /* record all known change-points (starting and ending addresses),
140 omitting those that are for empty memory regions */
141 chgidx = 0;
142 for (i=0; i < old_nr; i++) {
143 if (biosmap[i].size != 0) {
144 change_point[chgidx]->addr = biosmap[i].addr;
145 change_point[chgidx++]->pbios = &biosmap[i];
146 change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
147 change_point[chgidx++]->pbios = &biosmap[i];
148 }
149 }
150 chg_nr = chgidx; /* true number of change-points */
152 /* sort change-point list by memory addresses (low -> high) */
153 still_changing = 1;
154 while (still_changing) {
155 still_changing = 0;
156 for (i=1; i < chg_nr; i++) {
157 /* if <current_addr> > <last_addr>, swap */
158 /* or, if current=<start_addr> & last=<end_addr>, swap */
159 if ((change_point[i]->addr < change_point[i-1]->addr) ||
160 ((change_point[i]->addr == change_point[i-1]->addr) &&
161 (change_point[i]->addr == change_point[i]->pbios->addr) &&
162 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
163 )
164 {
165 change_tmp = change_point[i];
166 change_point[i] = change_point[i-1];
167 change_point[i-1] = change_tmp;
168 still_changing=1;
169 }
170 }
171 }
173 /* create a new bios memory map, removing overlaps */
174 overlap_entries=0; /* number of entries in the overlap table */
175 new_bios_entry=0; /* index for creating new bios map entries */
176 last_type = 0; /* start with undefined memory type */
177 last_addr = 0; /* start with 0 as last starting address */
178 /* loop through change-points, determining affect on the new bios map */
179 for (chgidx=0; chgidx < chg_nr; chgidx++)
180 {
181 /* keep track of all overlapping bios entries */
182 if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
183 {
184 /* add map entry to overlap list (> 1 entry implies an overlap) */
185 overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
186 }
187 else
188 {
189 /* remove entry from list (order independent, so swap with last) */
190 for (i=0; i<overlap_entries; i++)
191 {
192 if (overlap_list[i] == change_point[chgidx]->pbios)
193 overlap_list[i] = overlap_list[overlap_entries-1];
194 }
195 overlap_entries--;
196 }
197 /* if there are overlapping entries, decide which "type" to use */
198 /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
199 current_type = 0;
200 for (i=0; i<overlap_entries; i++)
201 if (overlap_list[i]->type > current_type)
202 current_type = overlap_list[i]->type;
203 /* continue building up new bios map based on this information */
204 if (current_type != last_type) {
205 if (last_type != 0) {
206 new_bios[new_bios_entry].size =
207 change_point[chgidx]->addr - last_addr;
208 /* move forward only if the new size was non-zero */
209 if (new_bios[new_bios_entry].size != 0)
210 if (++new_bios_entry >= E820MAX)
211 break; /* no more space left for new bios entries */
212 }
213 if (current_type != 0) {
214 new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
215 new_bios[new_bios_entry].type = current_type;
216 last_addr=change_point[chgidx]->addr;
217 }
218 last_type = current_type;
219 }
220 }
221 new_nr = new_bios_entry; /* retain count for new bios entries */
223 /* copy new bios mapping into original location */
224 memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
225 *pnr_map = new_nr;
227 return 0;
228 }
230 /*
231 * Copy the BIOS e820 map into a safe place.
232 *
233 * Sanity-check it while we're at it..
234 *
235 * If we're lucky and live on a modern system, the setup code
236 * will have given us a memory map that we can use to properly
237 * set up memory. If we aren't, we'll fake a memory map.
238 *
239 * We check to see that the memory map contains at least 2 elements
240 * before we'll use it, because the detection code in setup.S may
241 * not be perfect and most every PC known to man has two memory
242 * regions: one from 0 to 640k, and one from 1mb up. (The IBM
243 * thinkpad 560x, for example, does not cooperate with the memory
244 * detection code.)
245 */
246 static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
247 {
248 /* Only one memory region (or negative)? Ignore it */
249 if (nr_map < 2)
250 return -1;
252 do {
253 unsigned long long start = biosmap->addr;
254 unsigned long long size = biosmap->size;
255 unsigned long long end = start + size;
256 unsigned long type = biosmap->type;
258 /* Overflow in 64 bits? Ignore the memory map. */
259 if (start > end)
260 return -1;
262 /*
263 * Some BIOSes claim RAM in the 640k - 1M region.
264 * Not right. Fix it up.
265 */
266 if (type == E820_RAM) {
267 if (start < 0x100000ULL && end > 0xA0000ULL) {
268 if (start < 0xA0000ULL)
269 add_memory_region(start, 0xA0000ULL-start, type);
270 if (end <= 0x100000ULL)
271 continue;
272 start = 0x100000ULL;
273 size = end - start;
274 }
275 }
276 add_memory_region(start, size, type);
277 } while (biosmap++,--nr_map);
278 return 0;
279 }
282 /*
283 * Find the highest page frame number we have available
284 */
285 static unsigned long __init find_max_pfn(void)
286 {
287 int i;
288 unsigned long max_pfn = 0;
290 #if 0
291 if (efi_enabled) {
292 efi_memmap_walk(efi_find_max_pfn, &max_pfn);
293 return;
294 }
295 #endif
297 for (i = 0; i < e820.nr_map; i++) {
298 unsigned long start, end;
299 /* RAM? */
300 if (e820.map[i].type != E820_RAM)
301 continue;
302 start = PFN_UP(e820.map[i].addr);
303 end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
304 if (start >= end)
305 continue;
306 if (end > max_pfn)
307 max_pfn = end;
308 }
310 return max_pfn;
311 }
313 static void __init clip_to_limit(uint64_t limit, char *warnmsg)
314 {
315 int i;
316 char _warnmsg[160];
318 for ( i = 0; i < e820.nr_map; i++ )
319 {
320 if ( (e820.map[i].addr + e820.map[i].size) <= limit )
321 continue;
322 if ( warnmsg )
323 {
324 snprintf(_warnmsg, sizeof(_warnmsg), warnmsg, (int)(limit>>30));
325 printk("WARNING: %s\n", _warnmsg);
326 }
327 printk("Truncating memory map to %lukB\n",
328 (unsigned long)(limit >> 10));
329 if ( e820.map[i].addr >= limit )
330 {
331 e820.nr_map = i;
332 }
333 else
334 {
335 e820.map[i].size = limit - e820.map[i].addr;
336 e820.nr_map = i + 1;
337 }
338 }
339 }
341 static void __init machine_specific_memory_setup(
342 struct e820entry *raw, int *raw_nr)
343 {
344 char nr = (char)*raw_nr;
345 sanitize_e820_map(raw, &nr);
346 *raw_nr = nr;
347 (void)copy_e820_map(raw, nr);
349 if ( opt_mem )
350 clip_to_limit(opt_mem, NULL);
352 #ifdef __i386__
353 clip_to_limit((1ULL << 30) * MACHPHYS_MBYTES,
354 "Only the first %u GB of the physical memory map "
355 "can be accessed by Xen in 32-bit mode.");
356 #endif
358 #ifdef __x86_64__
359 clip_to_limit((uint64_t)(MACH2PHYS_COMPAT_VIRT_END -
360 __HYPERVISOR_COMPAT_VIRT_START) << 10,
361 "Only the first %u GB of the physical memory map "
362 "can be accessed by 32-on-64 guests.");
363 #endif
364 }
366 unsigned long __init init_e820(
367 const char *str, struct e820entry *raw, int *raw_nr)
368 {
369 machine_specific_memory_setup(raw, raw_nr);
370 printk("%s RAM map:\n", str);
371 print_e820_memory_map(e820.map, e820.nr_map);
372 return find_max_pfn();
373 }