ia64/xen-unstable

view extras/mini-os/mm.c @ 6435:b4b3f6be5226

merge?
author cl349@firebug.cl.cam.ac.uk
date Thu Aug 25 17:27:49 2005 +0000 (2005-08-25)
parents 0610add7c3fe 98a6eb458c78
children 8799d14bef77 9312a3e8a6f8
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
2 ****************************************************************************
3 * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
4 ****************************************************************************
5 *
6 * File: mm.c
7 * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
8 * Changes: Grzegorz Milos
9 *
10 * Date: Aug 2003, chages Aug 2005
11 *
12 * Environment: Xen Minimal OS
13 * Description: memory management related functions
14 * contains buddy page allocator from Xen.
15 *
16 ****************************************************************************
17 * $Id: c-insert.c,v 1.7 2002/11/08 16:04:34 rn Exp $
18 ****************************************************************************
19 * Permission is hereby granted, free of charge, to any person obtaining a copy
20 * of this software and associated documentation files (the "Software"), to
21 * deal in the Software without restriction, including without limitation the
22 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
23 * sell copies of the Software, and to permit persons to whom the Software is
24 * furnished to do so, subject to the following conditions:
25 *
26 * The above copyright notice and this permission notice shall be included in
27 * all copies or substantial portions of the Software.
28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
35 * DEALINGS IN THE SOFTWARE.
36 */
38 #include <os.h>
39 #include <hypervisor.h>
40 #include <mm.h>
41 #include <types.h>
42 #include <lib.h>
45 #ifdef MM_DEBUG
46 #define DEBUG(_f, _a...) \
47 printk("MINI_OS(file=mm.c, line=%d) " _f "\n", __LINE__, ## _a)
48 #else
49 #define DEBUG(_f, _a...) ((void)0)
50 #endif
52 unsigned long *phys_to_machine_mapping;
53 extern char *stack;
54 extern char _text, _etext, _edata, _end;
57 /*********************
58 * ALLOCATION BITMAP
59 * One bit per page of memory. Bit set => page is allocated.
60 */
62 static unsigned long *alloc_bitmap;
63 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
65 #define allocated_in_map(_pn) \
66 (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1))))
69 /*
70 * Hint regarding bitwise arithmetic in map_{alloc,free}:
71 * -(1<<n) sets all bits >= n.
72 * (1<<n)-1 sets all bits < n.
73 * Variable names in map_{alloc,free}:
74 * *_idx == Index into `alloc_bitmap' array.
75 * *_off == Bit offset within an element of the `alloc_bitmap' array.
76 */
78 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
79 {
80 unsigned long start_off, end_off, curr_idx, end_idx;
82 curr_idx = first_page / PAGES_PER_MAPWORD;
83 start_off = first_page & (PAGES_PER_MAPWORD-1);
84 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
85 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
87 if ( curr_idx == end_idx )
88 {
89 alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off);
90 }
91 else
92 {
93 alloc_bitmap[curr_idx] |= -(1<<start_off);
94 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L;
95 alloc_bitmap[curr_idx] |= (1<<end_off)-1;
96 }
97 }
100 static void map_free(unsigned long first_page, unsigned long nr_pages)
101 {
102 unsigned long start_off, end_off, curr_idx, end_idx;
104 curr_idx = first_page / PAGES_PER_MAPWORD;
105 start_off = first_page & (PAGES_PER_MAPWORD-1);
106 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
107 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
109 if ( curr_idx == end_idx )
110 {
111 alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1);
112 }
113 else
114 {
115 alloc_bitmap[curr_idx] &= (1<<start_off)-1;
116 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
117 alloc_bitmap[curr_idx] &= -(1<<end_off);
118 }
119 }
123 /*************************
124 * BINARY BUDDY ALLOCATOR
125 */
127 typedef struct chunk_head_st chunk_head_t;
128 typedef struct chunk_tail_st chunk_tail_t;
130 struct chunk_head_st {
131 chunk_head_t *next;
132 chunk_head_t **pprev;
133 int level;
134 };
136 struct chunk_tail_st {
137 int level;
138 };
140 /* Linked lists of free chunks of different powers-of-two in size. */
141 #define FREELIST_SIZE ((sizeof(void*)<<3)-PAGE_SHIFT)
142 static chunk_head_t *free_head[FREELIST_SIZE];
143 static chunk_head_t free_tail[FREELIST_SIZE];
144 #define FREELIST_EMPTY(_l) ((_l)->next == NULL)
146 #define round_pgdown(_p) ((_p)&PAGE_MASK)
147 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
149 #ifdef MM_DEBUG
150 /*
151 * Prints allocation[0/1] for @nr_pages, starting at @start
152 * address (virtual).
153 */
154 static void print_allocation(void *start, int nr_pages)
155 {
156 unsigned long pfn_start = virt_to_pfn(start);
157 int count;
158 for(count = 0; count < nr_pages; count++)
159 if(allocated_in_map(pfn_start + count)) printk("1");
160 else printk("0");
162 printk("\n");
163 }
165 /*
166 * Prints chunks (making them with letters) for @nr_pages starting
167 * at @start (virtual).
168 */
169 static void print_chunks(void *start, int nr_pages)
170 {
171 char chunks[1001], current='A';
172 int order, count;
173 chunk_head_t *head;
174 unsigned long pfn_start = virt_to_pfn(start);
176 memset(chunks, (int)'_', 1000);
177 if(nr_pages > 1000)
178 {
179 DEBUG("Can only pring 1000 pages. Increase buffer size.");
180 }
182 for(order=0; order < FREELIST_SIZE; order++)
183 {
184 head = free_head[order];
185 while(!FREELIST_EMPTY(head))
186 {
187 for(count = 0; count < 1<< head->level; count++)
188 {
189 if(count + virt_to_pfn(head) - pfn_start < 1000)
190 chunks[count + virt_to_pfn(head) - pfn_start] = current;
191 }
192 head = head->next;
193 current++;
194 }
195 }
196 chunks[nr_pages] = '\0';
197 printk("%s\n", chunks);
198 }
199 #endif
203 /*
204 * Initialise allocator, placing addresses [@min,@max] in free pool.
205 * @min and @max are PHYSICAL addresses.
206 */
207 static void init_page_allocator(unsigned long min, unsigned long max)
208 {
209 int i;
210 unsigned long range, bitmap_size;
211 chunk_head_t *ch;
212 chunk_tail_t *ct;
214 for ( i = 0; i < FREELIST_SIZE; i++ )
215 {
216 free_head[i] = &free_tail[i];
217 free_tail[i].pprev = &free_head[i];
218 free_tail[i].next = NULL;
219 }
221 min = round_pgup (min);
222 max = round_pgdown(max);
224 /* Allocate space for the allocation bitmap. */
225 bitmap_size = (max+1) >> (PAGE_SHIFT+3);
226 bitmap_size = round_pgup(bitmap_size);
227 alloc_bitmap = (unsigned long *)to_virt(min);
228 min += bitmap_size;
229 range = max - min;
231 /* All allocated by default. */
232 memset(alloc_bitmap, ~0, bitmap_size);
233 /* Free up the memory we've been given to play with. */
234 map_free(min>>PAGE_SHIFT, range>>PAGE_SHIFT);
236 /* The buddy lists are addressed in high memory. */
237 min += VIRT_START;
238 max += VIRT_START;
240 while ( range != 0 )
241 {
242 /*
243 * Next chunk is limited by alignment of min, but also
244 * must not be bigger than remaining range.
245 */
246 for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ )
247 if ( min & (1<<i) ) break;
250 ch = (chunk_head_t *)min;
251 min += (1<<i);
252 range -= (1<<i);
253 ct = (chunk_tail_t *)min-1;
254 i -= PAGE_SHIFT;
255 ch->level = i;
256 ch->next = free_head[i];
257 ch->pprev = &free_head[i];
258 ch->next->pprev = &ch->next;
259 free_head[i] = ch;
260 ct->level = i;
261 }
262 }
265 /* Allocate 2^@order contiguous pages. Returns a VIRTUAL address. */
266 unsigned long alloc_pages(int order)
267 {
268 int i;
269 chunk_head_t *alloc_ch, *spare_ch;
270 chunk_tail_t *spare_ct;
273 /* Find smallest order which can satisfy the request. */
274 for ( i = order; i < FREELIST_SIZE; i++ ) {
275 if ( !FREELIST_EMPTY(free_head[i]) )
276 break;
277 }
279 if ( i == FREELIST_SIZE ) goto no_memory;
281 /* Unlink a chunk. */
282 alloc_ch = free_head[i];
283 free_head[i] = alloc_ch->next;
284 alloc_ch->next->pprev = alloc_ch->pprev;
286 /* We may have to break the chunk a number of times. */
287 while ( i != order )
288 {
289 /* Split into two equal parts. */
290 i--;
291 spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT)));
292 spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1;
294 /* Create new header for spare chunk. */
295 spare_ch->level = i;
296 spare_ch->next = free_head[i];
297 spare_ch->pprev = &free_head[i];
298 spare_ct->level = i;
300 /* Link in the spare chunk. */
301 spare_ch->next->pprev = &spare_ch->next;
302 free_head[i] = spare_ch;
303 }
305 map_alloc(to_phys(alloc_ch)>>PAGE_SHIFT, 1<<order);
307 return((unsigned long)alloc_ch);
309 no_memory:
311 printk("Cannot handle page request order %d!\n", order);
313 return 0;
314 }
316 void free_pages(void *pointer, int order)
317 {
318 chunk_head_t *freed_ch, *to_merge_ch;
319 chunk_tail_t *freed_ct;
320 unsigned long mask;
322 /* First free the chunk */
323 map_free(virt_to_pfn(pointer), 1 << order);
325 /* Create free chunk */
326 freed_ch = (chunk_head_t *)pointer;
327 freed_ct = (chunk_tail_t *)((char *)pointer + (1<<(order + PAGE_SHIFT)))-1;
329 /* Now, possibly we can conseal chunks together */
330 while(order < FREELIST_SIZE)
331 {
332 mask = 1 << (order + PAGE_SHIFT);
333 if((unsigned long)freed_ch & mask)
334 {
335 to_merge_ch = (chunk_head_t *)((char *)freed_ch - mask);
336 if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
337 to_merge_ch->level != order)
338 break;
340 /* Merge with predecessor */
341 freed_ch = to_merge_ch;
342 }
343 else
344 {
345 to_merge_ch = (chunk_head_t *)((char *)freed_ch + mask);
346 if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
347 to_merge_ch->level != order)
348 break;
350 /* Merge with successor */
351 freed_ct = (chunk_tail_t *)((char *)to_merge_ch + mask);
352 }
354 /* We are commited to merging, unlink the chunk */
355 *(to_merge_ch->pprev) = to_merge_ch->next;
356 to_merge_ch->next->pprev = to_merge_ch->pprev;
358 order++;
359 }
361 /* Link the new chunk */
362 freed_ch->level = order;
363 freed_ch->next = free_head[order];
364 freed_ch->pprev = &free_head[order];
365 freed_ct->level = order;
367 freed_ch->next->pprev = &freed_ch->next;
368 free_head[order] = freed_ch;
370 }
371 void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
372 {
373 unsigned long pfn_to_map, pt_frame;
374 unsigned long mach_ptd, max_mach_ptd;
375 int count;
376 unsigned long mach_pte, virt_pte;
377 unsigned long *ptd = (unsigned long *)start_info.pt_base;
378 mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
379 struct mmuext_op pin_request;
381 /* Firstly work out what is the first pfn that is not yet in page tables
382 NB. Assuming that builder fills whole pt_frames (which it does at the
383 moment)
384 */
385 pfn_to_map = (start_info.nr_pt_frames - 1) * L1_PAGETABLE_ENTRIES;
386 DEBUG("start_pfn=%ld, first pfn_to_map %ld, max_pfn=%ld",
387 *start_pfn, pfn_to_map, *max_pfn);
389 /* Machine address of page table directory */
390 mach_ptd = phys_to_machine(to_phys(start_info.pt_base));
391 mach_ptd += sizeof(void *) *
392 l2_table_offset((unsigned long)to_virt(PFN_PHYS(pfn_to_map)));
394 max_mach_ptd = sizeof(void *) *
395 l2_table_offset((unsigned long)to_virt(PFN_PHYS(*max_pfn)));
397 /* Check that we are not trying to access Xen region */
398 if(max_mach_ptd > sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START))
399 {
400 printk("WARNING: mini-os will not use all the memory supplied\n");
401 max_mach_ptd = sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START);
402 *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE);
403 }
404 max_mach_ptd += phys_to_machine(to_phys(start_info.pt_base));
405 DEBUG("Max_mach_ptd 0x%lx", max_mach_ptd);
407 pt_frame = *start_pfn;
408 /* Should not happen - no empty, mapped pages */
409 if(pt_frame >= pfn_to_map)
410 {
411 printk("ERROR: Not even a single empty, mapped page\n");
412 *(int*)0=0;
413 }
415 while(mach_ptd < max_mach_ptd)
416 {
417 /* Correct protection needs to be set for the new page table frame */
418 virt_pte = (unsigned long)to_virt(PFN_PHYS(pt_frame));
419 mach_pte = ptd[l2_table_offset(virt_pte)] & ~(PAGE_SIZE-1);
420 mach_pte += sizeof(void *) * l1_table_offset(virt_pte);
421 DEBUG("New page table page: pfn=0x%lx, mfn=0x%lx, virt_pte=0x%lx, "
422 "mach_pte=0x%lx", pt_frame, pfn_to_mfn(pt_frame),
423 virt_pte, mach_pte);
425 /* Update the entry */
426 mmu_updates[0].ptr = mach_pte;
427 mmu_updates[0].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT |
428 (L1_PROT & ~_PAGE_RW);
429 if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
430 {
431 printk("PTE for new page table page could not be updated\n");
432 *(int*)0=0;
433 }
435 /* Pin the page to provide correct protection */
436 pin_request.cmd = MMUEXT_PIN_L1_TABLE;
437 pin_request.mfn = pfn_to_mfn(pt_frame);
438 if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0)
439 {
440 printk("ERROR: pinning failed\n");
441 *(int*)0=0;
442 }
444 /* Now fill the new page table page with entries.
445 Update the page directory as well. */
446 count = 0;
447 mmu_updates[count].ptr = mach_ptd;
448 mmu_updates[count].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT |
449 L2_PROT;
450 count++;
451 mach_ptd += sizeof(void *);
452 mach_pte = phys_to_machine(PFN_PHYS(pt_frame++));
454 for(;count <= L1_PAGETABLE_ENTRIES && pfn_to_map <= *max_pfn; count++)
455 {
456 mmu_updates[count].ptr = mach_pte;
457 mmu_updates[count].val =
458 pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
459 if(count == 1) DEBUG("mach_pte 0x%lx", mach_pte);
460 mach_pte += sizeof(void *);
461 }
462 if(HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF) < 0)
463 {
464 printk("ERROR: mmu_update failed\n");
465 *(int*)0=0;
466 }
467 (*start_pfn)++;
468 }
470 *start_pfn = pt_frame;
471 }
473 void init_mm(void)
474 {
476 unsigned long start_pfn, max_pfn;
478 printk("MM: Init\n");
480 printk(" _text: %p\n", &_text);
481 printk(" _etext: %p\n", &_etext);
482 printk(" _edata: %p\n", &_edata);
483 printk(" stack start: %p\n", &stack);
484 printk(" _end: %p\n", &_end);
486 /* set up minimal memory infos */
487 phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
489 /* First page follows page table pages and 3 more pages (store page etc) */
490 start_pfn = PFN_UP(__pa(start_info.pt_base)) + start_info.nr_pt_frames + 3;
491 max_pfn = start_info.nr_pages;
493 printk(" start_pfn: %lx\n", start_pfn);
494 printk(" max_pfn: %lx\n", max_pfn);
497 build_pagetable(&start_pfn, &max_pfn);
499 #ifdef __i386__
500 /*
501 * now we can initialise the page allocator
502 */
503 printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n",
504 (u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn),
505 (u_long)to_virt(PFN_PHYS(max_pfn)), PFN_PHYS(max_pfn));
506 init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn));
507 #endif
509 printk("MM: done\n");
510 }