ia64/xen-unstable

view extras/mini-os/mm.c @ 9788:bdcc838b9a72

Add small memory warning message to domain configuration examples.

Signed-off-by: Daniel Stekloff <dsteklof@us.ibm.com>
author stekloff@dyn9047022152.beaverton.ibm.com
date Wed Apr 19 22:58:24 2006 +0100 (2006-04-19)
parents 8286738ab7f9
children dc213d745642
line source
1 /*
2 ****************************************************************************
3 * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
4 * (C) 2005 - Grzegorz Milos - Intel Research Cambridge
5 ****************************************************************************
6 *
7 * File: mm.c
8 * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
9 * Changes: Grzegorz Milos
10 *
11 * Date: Aug 2003, chages Aug 2005
12 *
13 * Environment: Xen Minimal OS
14 * Description: memory management related functions
15 * contains buddy page allocator from Xen.
16 *
17 ****************************************************************************
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this software and associated documentation files (the "Software"), to
20 * deal in the Software without restriction, including without limitation the
21 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
22 * sell copies of the Software, and to permit persons to whom the Software is
23 * furnished to do so, subject to the following conditions:
24 *
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
34 * DEALINGS IN THE SOFTWARE.
35 */
37 #include <os.h>
38 #include <hypervisor.h>
39 #include <mm.h>
40 #include <types.h>
41 #include <lib.h>
42 #include <xmalloc.h>
44 #ifdef MM_DEBUG
45 #define DEBUG(_f, _a...) \
46 printk("MINI_OS(file=mm.c, line=%d) " _f "\n", __LINE__, ## _a)
47 #else
48 #define DEBUG(_f, _a...) ((void)0)
49 #endif
51 unsigned long *phys_to_machine_mapping;
52 extern char *stack;
53 extern char _text, _etext, _edata, _end;
54 extern void page_walk(unsigned long virt_addr);
56 /*********************
57 * ALLOCATION BITMAP
58 * One bit per page of memory. Bit set => page is allocated.
59 */
61 static unsigned long *alloc_bitmap;
62 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
64 #define allocated_in_map(_pn) \
65 (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1))))
67 /*
68 * Hint regarding bitwise arithmetic in map_{alloc,free}:
69 * -(1<<n) sets all bits >= n.
70 * (1<<n)-1 sets all bits < n.
71 * Variable names in map_{alloc,free}:
72 * *_idx == Index into `alloc_bitmap' array.
73 * *_off == Bit offset within an element of the `alloc_bitmap' array.
74 */
76 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
77 {
78 unsigned long start_off, end_off, curr_idx, end_idx;
80 curr_idx = first_page / PAGES_PER_MAPWORD;
81 start_off = first_page & (PAGES_PER_MAPWORD-1);
82 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
83 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
85 if ( curr_idx == end_idx )
86 {
87 alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off);
88 }
89 else
90 {
91 alloc_bitmap[curr_idx] |= -(1<<start_off);
92 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L;
93 alloc_bitmap[curr_idx] |= (1<<end_off)-1;
94 }
95 }
98 static void map_free(unsigned long first_page, unsigned long nr_pages)
99 {
100 unsigned long start_off, end_off, curr_idx, end_idx;
102 curr_idx = first_page / PAGES_PER_MAPWORD;
103 start_off = first_page & (PAGES_PER_MAPWORD-1);
104 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
105 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
107 if ( curr_idx == end_idx )
108 {
109 alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1);
110 }
111 else
112 {
113 alloc_bitmap[curr_idx] &= (1<<start_off)-1;
114 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
115 alloc_bitmap[curr_idx] &= -(1<<end_off);
116 }
117 }
121 /*************************
122 * BINARY BUDDY ALLOCATOR
123 */
125 typedef struct chunk_head_st chunk_head_t;
126 typedef struct chunk_tail_st chunk_tail_t;
128 struct chunk_head_st {
129 chunk_head_t *next;
130 chunk_head_t **pprev;
131 int level;
132 };
134 struct chunk_tail_st {
135 int level;
136 };
138 /* Linked lists of free chunks of different powers-of-two in size. */
139 #define FREELIST_SIZE ((sizeof(void*)<<3)-PAGE_SHIFT)
140 static chunk_head_t *free_head[FREELIST_SIZE];
141 static chunk_head_t free_tail[FREELIST_SIZE];
142 #define FREELIST_EMPTY(_l) ((_l)->next == NULL)
144 #define round_pgdown(_p) ((_p)&PAGE_MASK)
145 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
147 #ifdef MM_DEBUG
148 /*
149 * Prints allocation[0/1] for @nr_pages, starting at @start
150 * address (virtual).
151 */
152 static void print_allocation(void *start, int nr_pages)
153 {
154 unsigned long pfn_start = virt_to_pfn(start);
155 int count;
156 for(count = 0; count < nr_pages; count++)
157 if(allocated_in_map(pfn_start + count)) printk("1");
158 else printk("0");
160 printk("\n");
161 }
163 /*
164 * Prints chunks (making them with letters) for @nr_pages starting
165 * at @start (virtual).
166 */
167 static void print_chunks(void *start, int nr_pages)
168 {
169 char chunks[1001], current='A';
170 int order, count;
171 chunk_head_t *head;
172 unsigned long pfn_start = virt_to_pfn(start);
174 memset(chunks, (int)'_', 1000);
175 if(nr_pages > 1000)
176 {
177 DEBUG("Can only pring 1000 pages. Increase buffer size.");
178 }
180 for(order=0; order < FREELIST_SIZE; order++)
181 {
182 head = free_head[order];
183 while(!FREELIST_EMPTY(head))
184 {
185 for(count = 0; count < 1<< head->level; count++)
186 {
187 if(count + virt_to_pfn(head) - pfn_start < 1000)
188 chunks[count + virt_to_pfn(head) - pfn_start] = current;
189 }
190 head = head->next;
191 current++;
192 }
193 }
194 chunks[nr_pages] = '\0';
195 printk("%s\n", chunks);
196 }
197 #endif
200 /*
201 * Initialise allocator, placing addresses [@min,@max] in free pool.
202 * @min and @max are PHYSICAL addresses.
203 */
204 static void init_page_allocator(unsigned long min, unsigned long max)
205 {
206 int i;
207 unsigned long range, bitmap_size;
208 chunk_head_t *ch;
209 chunk_tail_t *ct;
210 for ( i = 0; i < FREELIST_SIZE; i++ )
211 {
212 free_head[i] = &free_tail[i];
213 free_tail[i].pprev = &free_head[i];
214 free_tail[i].next = NULL;
215 }
217 min = round_pgup (min);
218 max = round_pgdown(max);
220 /* Allocate space for the allocation bitmap. */
221 bitmap_size = (max+1) >> (PAGE_SHIFT+3);
222 bitmap_size = round_pgup(bitmap_size);
223 alloc_bitmap = (unsigned long *)to_virt(min);
224 min += bitmap_size;
225 range = max - min;
227 /* All allocated by default. */
228 memset(alloc_bitmap, ~0, bitmap_size);
229 /* Free up the memory we've been given to play with. */
230 map_free(min>>PAGE_SHIFT, range>>PAGE_SHIFT);
232 /* The buddy lists are addressed in high memory. */
233 min += VIRT_START;
234 max += VIRT_START;
236 while ( range != 0 )
237 {
238 /*
239 * Next chunk is limited by alignment of min, but also
240 * must not be bigger than remaining range.
241 */
242 for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ )
243 if ( min & (1<<i) ) break;
246 ch = (chunk_head_t *)min;
247 min += (1<<i);
248 range -= (1<<i);
249 ct = (chunk_tail_t *)min-1;
250 i -= PAGE_SHIFT;
251 ch->level = i;
252 ch->next = free_head[i];
253 ch->pprev = &free_head[i];
254 ch->next->pprev = &ch->next;
255 free_head[i] = ch;
256 ct->level = i;
257 }
258 }
261 /* Allocate 2^@order contiguous pages. Returns a VIRTUAL address. */
262 unsigned long alloc_pages(int order)
263 {
264 int i;
265 chunk_head_t *alloc_ch, *spare_ch;
266 chunk_tail_t *spare_ct;
269 /* Find smallest order which can satisfy the request. */
270 for ( i = order; i < FREELIST_SIZE; i++ ) {
271 if ( !FREELIST_EMPTY(free_head[i]) )
272 break;
273 }
275 if ( i == FREELIST_SIZE ) goto no_memory;
277 /* Unlink a chunk. */
278 alloc_ch = free_head[i];
279 free_head[i] = alloc_ch->next;
280 alloc_ch->next->pprev = alloc_ch->pprev;
282 /* We may have to break the chunk a number of times. */
283 while ( i != order )
284 {
285 /* Split into two equal parts. */
286 i--;
287 spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT)));
288 spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1;
290 /* Create new header for spare chunk. */
291 spare_ch->level = i;
292 spare_ch->next = free_head[i];
293 spare_ch->pprev = &free_head[i];
294 spare_ct->level = i;
296 /* Link in the spare chunk. */
297 spare_ch->next->pprev = &spare_ch->next;
298 free_head[i] = spare_ch;
299 }
301 map_alloc(to_phys(alloc_ch)>>PAGE_SHIFT, 1<<order);
303 return((unsigned long)alloc_ch);
305 no_memory:
307 printk("Cannot handle page request order %d!\n", order);
309 return 0;
310 }
312 void free_pages(void *pointer, int order)
313 {
314 chunk_head_t *freed_ch, *to_merge_ch;
315 chunk_tail_t *freed_ct;
316 unsigned long mask;
318 /* First free the chunk */
319 map_free(virt_to_pfn(pointer), 1 << order);
321 /* Create free chunk */
322 freed_ch = (chunk_head_t *)pointer;
323 freed_ct = (chunk_tail_t *)((char *)pointer + (1<<(order + PAGE_SHIFT)))-1;
325 /* Now, possibly we can conseal chunks together */
326 while(order < FREELIST_SIZE)
327 {
328 mask = 1 << (order + PAGE_SHIFT);
329 if((unsigned long)freed_ch & mask)
330 {
331 to_merge_ch = (chunk_head_t *)((char *)freed_ch - mask);
332 if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
333 to_merge_ch->level != order)
334 break;
336 /* Merge with predecessor */
337 freed_ch = to_merge_ch;
338 }
339 else
340 {
341 to_merge_ch = (chunk_head_t *)((char *)freed_ch + mask);
342 if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
343 to_merge_ch->level != order)
344 break;
346 /* Merge with successor */
347 freed_ct = (chunk_tail_t *)((char *)to_merge_ch + mask);
348 }
350 /* We are commited to merging, unlink the chunk */
351 *(to_merge_ch->pprev) = to_merge_ch->next;
352 to_merge_ch->next->pprev = to_merge_ch->pprev;
354 order++;
355 }
357 /* Link the new chunk */
358 freed_ch->level = order;
359 freed_ch->next = free_head[order];
360 freed_ch->pprev = &free_head[order];
361 freed_ct->level = order;
363 freed_ch->next->pprev = &freed_ch->next;
364 free_head[order] = freed_ch;
366 }
369 void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn,
370 unsigned long offset, unsigned long level)
371 {
372 unsigned long *tab = (unsigned long *)start_info.pt_base;
373 unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn);
374 unsigned long prot_e, prot_t, pincmd;
375 mmu_update_t mmu_updates[1];
376 struct mmuext_op pin_request;
378 DEBUG("Allocating new L%d pt frame for pt_pfn=%lx, "
379 "prev_l_mfn=%lx, offset=%lx\n",
380 level, *pt_pfn, prev_l_mfn, offset);
382 /* We need to clear the page, otherwise we might fail to map it
383 as a page table page */
384 memset((unsigned long*)pfn_to_virt(*pt_pfn), 0, PAGE_SIZE);
386 if (level == L1_FRAME)
387 {
388 prot_e = L1_PROT;
389 prot_t = L2_PROT;
390 pincmd = MMUEXT_PIN_L1_TABLE;
391 }
392 #if (defined __x86_64__)
393 else if (level == L2_FRAME)
394 {
395 prot_e = L2_PROT;
396 prot_t = L3_PROT;
397 pincmd = MMUEXT_PIN_L2_TABLE;
398 }
399 else if (level == L3_FRAME)
400 {
401 prot_e = L3_PROT;
402 prot_t = L4_PROT;
403 pincmd = MMUEXT_PIN_L3_TABLE;
404 }
405 #endif
406 else
407 {
408 printk("new_pt_frame() called with invalid level number %d\n", level);
409 do_exit();
410 }
412 /* Update the entry */
413 #if (defined __x86_64__)
414 tab = pte_to_virt(tab[l4_table_offset(pt_page)]);
415 tab = pte_to_virt(tab[l3_table_offset(pt_page)]);
416 #endif
417 mmu_updates[0].ptr = (tab[l2_table_offset(pt_page)] & PAGE_MASK) +
418 sizeof(void *)* l1_table_offset(pt_page);
419 mmu_updates[0].val = pfn_to_mfn(*pt_pfn) << PAGE_SHIFT |
420 (prot_e & ~_PAGE_RW);
421 if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
422 {
423 printk("PTE for new page table page could not be updated\n");
424 do_exit();
425 }
427 /* Pin the page to provide correct protection */
428 pin_request.cmd = pincmd;
429 pin_request.arg1.mfn = pfn_to_mfn(*pt_pfn);
430 if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0)
431 {
432 printk("ERROR: pinning failed\n");
433 do_exit();
434 }
436 /* Now fill the new page table page with entries.
437 Update the page directory as well. */
438 mmu_updates[0].ptr = (prev_l_mfn << PAGE_SHIFT) + sizeof(void *) * offset;
439 mmu_updates[0].val = pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t;
440 if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
441 {
442 printk("ERROR: mmu_update failed\n");
443 do_exit();
444 }
446 *pt_pfn += 1;
447 }
449 void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
450 {
451 unsigned long start_address, end_address;
452 unsigned long pfn_to_map, pt_pfn = *start_pfn;
453 static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
454 unsigned long *tab = (unsigned long *)start_info.pt_base;
455 unsigned long mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
456 unsigned long page, offset;
457 int count = 0;
459 #if defined(__x86_64__)
460 pfn_to_map = (start_info.nr_pt_frames - 3) * L1_PAGETABLE_ENTRIES;
461 #else
462 pfn_to_map = (start_info.nr_pt_frames - 1) * L1_PAGETABLE_ENTRIES;
463 #endif
464 start_address = (unsigned long)pfn_to_virt(pfn_to_map);
465 end_address = (unsigned long)pfn_to_virt(*max_pfn);
467 /* We worked out the virtual memory range to map, now mapping loop */
468 printk("Mapping memory range 0x%lx - 0x%lx\n", start_address, end_address);
470 while(start_address < end_address)
471 {
472 tab = (unsigned long *)start_info.pt_base;
473 mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
475 #if defined(__x86_64__)
476 offset = l4_table_offset(start_address);
477 /* Need new L3 pt frame */
478 if(!(start_address & L3_MASK))
479 new_pt_frame(&pt_pfn, mfn, offset, L3_FRAME);
481 page = tab[offset];
482 mfn = pte_to_mfn(page);
483 tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT);
484 offset = l3_table_offset(start_address);
485 /* Need new L2 pt frame */
486 if(!(start_address & L2_MASK))
487 new_pt_frame(&pt_pfn, mfn, offset, L2_FRAME);
489 page = tab[offset];
490 mfn = pte_to_mfn(page);
491 tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT);
492 #endif
493 offset = l2_table_offset(start_address);
494 /* Need new L1 pt frame */
495 if(!(start_address & L1_MASK))
496 new_pt_frame(&pt_pfn, mfn, offset, L1_FRAME);
498 page = tab[offset];
499 mfn = pte_to_mfn(page);
500 offset = l1_table_offset(start_address);
502 mmu_updates[count].ptr = (mfn << PAGE_SHIFT) + sizeof(void *) * offset;
503 mmu_updates[count].val =
504 pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
505 count++;
506 if (count == L1_PAGETABLE_ENTRIES || pfn_to_map == *max_pfn)
507 {
508 if(HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF) < 0)
509 {
510 printk("PTE could not be updated\n");
511 do_exit();
512 }
513 count = 0;
514 }
515 start_address += PAGE_SIZE;
516 }
518 *start_pfn = pt_pfn;
519 }
522 void mem_test(unsigned long *start_add, unsigned long *end_add)
523 {
524 unsigned long mask = 0x10000;
525 unsigned long *pointer;
527 for(pointer = start_add; pointer < end_add; pointer++)
528 {
529 if(!(((unsigned long)pointer) & 0xfffff))
530 {
531 printk("Writing to %lx\n", pointer);
532 page_walk((unsigned long)pointer);
533 }
534 *pointer = (unsigned long)pointer & ~mask;
535 }
537 for(pointer = start_add; pointer < end_add; pointer++)
538 {
539 if(((unsigned long)pointer & ~mask) != *pointer)
540 printk("Read error at 0x%lx. Read: 0x%lx, should read 0x%lx\n",
541 (unsigned long)pointer,
542 *pointer,
543 ((unsigned long)pointer & ~mask));
544 }
546 }
548 void init_mm(void)
549 {
551 unsigned long start_pfn, max_pfn;
553 printk("MM: Init\n");
555 printk(" _text: %p\n", &_text);
556 printk(" _etext: %p\n", &_etext);
557 printk(" _edata: %p\n", &_edata);
558 printk(" stack start: %p\n", &stack);
559 printk(" _end: %p\n", &_end);
561 /* set up minimal memory infos */
562 phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
564 /* First page follows page table pages and 3 more pages (store page etc) */
565 start_pfn = PFN_UP(to_phys(start_info.pt_base)) +
566 start_info.nr_pt_frames + 3;
567 max_pfn = start_info.nr_pages;
569 printk(" start_pfn: %lx\n", start_pfn);
570 printk(" max_pfn: %lx\n", max_pfn);
572 build_pagetable(&start_pfn, &max_pfn);
574 /*
575 * now we can initialise the page allocator
576 */
577 printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n",
578 (u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn),
579 (u_long)to_virt(PFN_PHYS(max_pfn)), PFN_PHYS(max_pfn));
580 init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn));
581 printk("MM: done\n");
582 }