ia64/xen-unstable

view extras/mini-os/mm.c @ 6685:549f4256ab3c

Remove control interface in kernels and domain builder.
Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Wed Sep 07 17:43:56 2005 +0000 (2005-09-07)
parents dd668f7527cb
children df1348e72390 cdfa7dd00c44
line source
1 /*
2 ****************************************************************************
3 * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
4 * (C) 2005 - Grzegorz Milos - Intel Research Cambridge
5 ****************************************************************************
6 *
7 * File: mm.c
8 * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
9 * Changes: Grzegorz Milos
10 *
11 * Date: Aug 2003, chages Aug 2005
12 *
13 * Environment: Xen Minimal OS
14 * Description: memory management related functions
15 * contains buddy page allocator from Xen.
16 *
17 ****************************************************************************
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this software and associated documentation files (the "Software"), to
20 * deal in the Software without restriction, including without limitation the
21 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
22 * sell copies of the Software, and to permit persons to whom the Software is
23 * furnished to do so, subject to the following conditions:
24 *
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
34 * DEALINGS IN THE SOFTWARE.
35 */
37 #include <os.h>
38 #include <hypervisor.h>
39 #include <mm.h>
40 #include <types.h>
41 #include <lib.h>
42 #include <xmalloc.h>
44 #ifdef MM_DEBUG
45 #define DEBUG(_f, _a...) \
46 printk("MINI_OS(file=mm.c, line=%d) " _f "\n", __LINE__, ## _a)
47 #else
48 #define DEBUG(_f, _a...) ((void)0)
49 #endif
51 unsigned long *phys_to_machine_mapping;
52 extern char *stack;
53 extern char _text, _etext, _edata, _end;
56 /*********************
57 * ALLOCATION BITMAP
58 * One bit per page of memory. Bit set => page is allocated.
59 */
61 static unsigned long *alloc_bitmap;
62 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
64 #define allocated_in_map(_pn) \
65 (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1))))
68 /*
69 * Hint regarding bitwise arithmetic in map_{alloc,free}:
70 * -(1<<n) sets all bits >= n.
71 * (1<<n)-1 sets all bits < n.
72 * Variable names in map_{alloc,free}:
73 * *_idx == Index into `alloc_bitmap' array.
74 * *_off == Bit offset within an element of the `alloc_bitmap' array.
75 */
77 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
78 {
79 unsigned long start_off, end_off, curr_idx, end_idx;
81 curr_idx = first_page / PAGES_PER_MAPWORD;
82 start_off = first_page & (PAGES_PER_MAPWORD-1);
83 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
84 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
86 if ( curr_idx == end_idx )
87 {
88 alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off);
89 }
90 else
91 {
92 alloc_bitmap[curr_idx] |= -(1<<start_off);
93 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L;
94 alloc_bitmap[curr_idx] |= (1<<end_off)-1;
95 }
96 }
99 static void map_free(unsigned long first_page, unsigned long nr_pages)
100 {
101 unsigned long start_off, end_off, curr_idx, end_idx;
103 curr_idx = first_page / PAGES_PER_MAPWORD;
104 start_off = first_page & (PAGES_PER_MAPWORD-1);
105 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
106 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
108 if ( curr_idx == end_idx )
109 {
110 alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1);
111 }
112 else
113 {
114 alloc_bitmap[curr_idx] &= (1<<start_off)-1;
115 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
116 alloc_bitmap[curr_idx] &= -(1<<end_off);
117 }
118 }
122 /*************************
123 * BINARY BUDDY ALLOCATOR
124 */
126 typedef struct chunk_head_st chunk_head_t;
127 typedef struct chunk_tail_st chunk_tail_t;
129 struct chunk_head_st {
130 chunk_head_t *next;
131 chunk_head_t **pprev;
132 int level;
133 };
135 struct chunk_tail_st {
136 int level;
137 };
139 /* Linked lists of free chunks of different powers-of-two in size. */
140 #define FREELIST_SIZE ((sizeof(void*)<<3)-PAGE_SHIFT)
141 static chunk_head_t *free_head[FREELIST_SIZE];
142 static chunk_head_t free_tail[FREELIST_SIZE];
143 #define FREELIST_EMPTY(_l) ((_l)->next == NULL)
145 #define round_pgdown(_p) ((_p)&PAGE_MASK)
146 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
148 #ifdef MM_DEBUG
149 /*
150 * Prints allocation[0/1] for @nr_pages, starting at @start
151 * address (virtual).
152 */
153 static void print_allocation(void *start, int nr_pages)
154 {
155 unsigned long pfn_start = virt_to_pfn(start);
156 int count;
157 for(count = 0; count < nr_pages; count++)
158 if(allocated_in_map(pfn_start + count)) printk("1");
159 else printk("0");
161 printk("\n");
162 }
164 /*
165 * Prints chunks (making them with letters) for @nr_pages starting
166 * at @start (virtual).
167 */
168 static void print_chunks(void *start, int nr_pages)
169 {
170 char chunks[1001], current='A';
171 int order, count;
172 chunk_head_t *head;
173 unsigned long pfn_start = virt_to_pfn(start);
175 memset(chunks, (int)'_', 1000);
176 if(nr_pages > 1000)
177 {
178 DEBUG("Can only pring 1000 pages. Increase buffer size.");
179 }
181 for(order=0; order < FREELIST_SIZE; order++)
182 {
183 head = free_head[order];
184 while(!FREELIST_EMPTY(head))
185 {
186 for(count = 0; count < 1<< head->level; count++)
187 {
188 if(count + virt_to_pfn(head) - pfn_start < 1000)
189 chunks[count + virt_to_pfn(head) - pfn_start] = current;
190 }
191 head = head->next;
192 current++;
193 }
194 }
195 chunks[nr_pages] = '\0';
196 printk("%s\n", chunks);
197 }
198 #endif
202 /*
203 * Initialise allocator, placing addresses [@min,@max] in free pool.
204 * @min and @max are PHYSICAL addresses.
205 */
206 static void init_page_allocator(unsigned long min, unsigned long max)
207 {
208 int i;
209 unsigned long range, bitmap_size;
210 chunk_head_t *ch;
211 chunk_tail_t *ct;
213 for ( i = 0; i < FREELIST_SIZE; i++ )
214 {
215 free_head[i] = &free_tail[i];
216 free_tail[i].pprev = &free_head[i];
217 free_tail[i].next = NULL;
218 }
220 min = round_pgup (min);
221 max = round_pgdown(max);
223 /* Allocate space for the allocation bitmap. */
224 bitmap_size = (max+1) >> (PAGE_SHIFT+3);
225 bitmap_size = round_pgup(bitmap_size);
226 alloc_bitmap = (unsigned long *)to_virt(min);
227 min += bitmap_size;
228 range = max - min;
230 /* All allocated by default. */
231 memset(alloc_bitmap, ~0, bitmap_size);
232 /* Free up the memory we've been given to play with. */
233 map_free(min>>PAGE_SHIFT, range>>PAGE_SHIFT);
235 /* The buddy lists are addressed in high memory. */
236 min += VIRT_START;
237 max += VIRT_START;
239 while ( range != 0 )
240 {
241 /*
242 * Next chunk is limited by alignment of min, but also
243 * must not be bigger than remaining range.
244 */
245 for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ )
246 if ( min & (1<<i) ) break;
249 ch = (chunk_head_t *)min;
250 min += (1<<i);
251 range -= (1<<i);
252 ct = (chunk_tail_t *)min-1;
253 i -= PAGE_SHIFT;
254 ch->level = i;
255 ch->next = free_head[i];
256 ch->pprev = &free_head[i];
257 ch->next->pprev = &ch->next;
258 free_head[i] = ch;
259 ct->level = i;
260 }
261 }
264 /* Allocate 2^@order contiguous pages. Returns a VIRTUAL address. */
265 unsigned long alloc_pages(int order)
266 {
267 int i;
268 chunk_head_t *alloc_ch, *spare_ch;
269 chunk_tail_t *spare_ct;
272 /* Find smallest order which can satisfy the request. */
273 for ( i = order; i < FREELIST_SIZE; i++ ) {
274 if ( !FREELIST_EMPTY(free_head[i]) )
275 break;
276 }
278 if ( i == FREELIST_SIZE ) goto no_memory;
280 /* Unlink a chunk. */
281 alloc_ch = free_head[i];
282 free_head[i] = alloc_ch->next;
283 alloc_ch->next->pprev = alloc_ch->pprev;
285 /* We may have to break the chunk a number of times. */
286 while ( i != order )
287 {
288 /* Split into two equal parts. */
289 i--;
290 spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT)));
291 spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1;
293 /* Create new header for spare chunk. */
294 spare_ch->level = i;
295 spare_ch->next = free_head[i];
296 spare_ch->pprev = &free_head[i];
297 spare_ct->level = i;
299 /* Link in the spare chunk. */
300 spare_ch->next->pprev = &spare_ch->next;
301 free_head[i] = spare_ch;
302 }
304 map_alloc(to_phys(alloc_ch)>>PAGE_SHIFT, 1<<order);
306 return((unsigned long)alloc_ch);
308 no_memory:
310 printk("Cannot handle page request order %d!\n", order);
312 return 0;
313 }
315 void free_pages(void *pointer, int order)
316 {
317 chunk_head_t *freed_ch, *to_merge_ch;
318 chunk_tail_t *freed_ct;
319 unsigned long mask;
321 /* First free the chunk */
322 map_free(virt_to_pfn(pointer), 1 << order);
324 /* Create free chunk */
325 freed_ch = (chunk_head_t *)pointer;
326 freed_ct = (chunk_tail_t *)((char *)pointer + (1<<(order + PAGE_SHIFT)))-1;
328 /* Now, possibly we can conseal chunks together */
329 while(order < FREELIST_SIZE)
330 {
331 mask = 1 << (order + PAGE_SHIFT);
332 if((unsigned long)freed_ch & mask)
333 {
334 to_merge_ch = (chunk_head_t *)((char *)freed_ch - mask);
335 if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
336 to_merge_ch->level != order)
337 break;
339 /* Merge with predecessor */
340 freed_ch = to_merge_ch;
341 }
342 else
343 {
344 to_merge_ch = (chunk_head_t *)((char *)freed_ch + mask);
345 if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
346 to_merge_ch->level != order)
347 break;
349 /* Merge with successor */
350 freed_ct = (chunk_tail_t *)((char *)to_merge_ch + mask);
351 }
353 /* We are commited to merging, unlink the chunk */
354 *(to_merge_ch->pprev) = to_merge_ch->next;
355 to_merge_ch->next->pprev = to_merge_ch->pprev;
357 order++;
358 }
360 /* Link the new chunk */
361 freed_ch->level = order;
362 freed_ch->next = free_head[order];
363 freed_ch->pprev = &free_head[order];
364 freed_ct->level = order;
366 freed_ch->next->pprev = &freed_ch->next;
367 free_head[order] = freed_ch;
369 }
370 void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
371 {
372 unsigned long pfn_to_map, pt_frame;
373 unsigned long mach_ptd, max_mach_ptd;
374 int count;
375 unsigned long mach_pte, virt_pte;
376 unsigned long *ptd = (unsigned long *)start_info.pt_base;
377 mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
378 struct mmuext_op pin_request;
380 /* Firstly work out what is the first pfn that is not yet in page tables
381 NB. Assuming that builder fills whole pt_frames (which it does at the
382 moment)
383 */
384 pfn_to_map = (start_info.nr_pt_frames - 1) * L1_PAGETABLE_ENTRIES;
385 DEBUG("start_pfn=%ld, first pfn_to_map %ld, max_pfn=%ld",
386 *start_pfn, pfn_to_map, *max_pfn);
388 /* Machine address of page table directory */
389 mach_ptd = phys_to_machine(to_phys(start_info.pt_base));
390 mach_ptd += sizeof(void *) *
391 l2_table_offset((unsigned long)to_virt(PFN_PHYS(pfn_to_map)));
393 max_mach_ptd = sizeof(void *) *
394 l2_table_offset((unsigned long)to_virt(PFN_PHYS(*max_pfn)));
396 /* Check that we are not trying to access Xen region */
397 if(max_mach_ptd > sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START))
398 {
399 printk("WARNING: mini-os will not use all the memory supplied\n");
400 max_mach_ptd = sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START);
401 *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE);
402 }
403 max_mach_ptd += phys_to_machine(to_phys(start_info.pt_base));
404 DEBUG("Max_mach_ptd 0x%lx", max_mach_ptd);
406 pt_frame = *start_pfn;
407 /* Should not happen - no empty, mapped pages */
408 if(pt_frame >= pfn_to_map)
409 {
410 printk("ERROR: Not even a single empty, mapped page\n");
411 *(int*)0=0;
412 }
414 while(mach_ptd < max_mach_ptd)
415 {
416 /* Correct protection needs to be set for the new page table frame */
417 virt_pte = (unsigned long)to_virt(PFN_PHYS(pt_frame));
418 mach_pte = ptd[l2_table_offset(virt_pte)] & ~(PAGE_SIZE-1);
419 mach_pte += sizeof(void *) * l1_table_offset(virt_pte);
420 DEBUG("New page table page: pfn=0x%lx, mfn=0x%lx, virt_pte=0x%lx, "
421 "mach_pte=0x%lx", pt_frame, pfn_to_mfn(pt_frame),
422 virt_pte, mach_pte);
424 /* Update the entry */
425 mmu_updates[0].ptr = mach_pte;
426 mmu_updates[0].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT |
427 (L1_PROT & ~_PAGE_RW);
428 if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
429 {
430 printk("PTE for new page table page could not be updated\n");
431 *(int*)0=0;
432 }
434 /* Pin the page to provide correct protection */
435 pin_request.cmd = MMUEXT_PIN_L1_TABLE;
436 pin_request.mfn = pfn_to_mfn(pt_frame);
437 if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0)
438 {
439 printk("ERROR: pinning failed\n");
440 *(int*)0=0;
441 }
443 /* Now fill the new page table page with entries.
444 Update the page directory as well. */
445 count = 0;
446 mmu_updates[count].ptr = mach_ptd;
447 mmu_updates[count].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT |
448 L2_PROT;
449 count++;
450 mach_ptd += sizeof(void *);
451 mach_pte = phys_to_machine(PFN_PHYS(pt_frame++));
453 for(;count <= L1_PAGETABLE_ENTRIES && pfn_to_map <= *max_pfn; count++)
454 {
455 mmu_updates[count].ptr = mach_pte;
456 mmu_updates[count].val =
457 pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
458 if(count == 1) DEBUG("mach_pte 0x%lx", mach_pte);
459 mach_pte += sizeof(void *);
460 }
461 if(HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF) < 0)
462 {
463 printk("ERROR: mmu_update failed\n");
464 *(int*)0=0;
465 }
466 (*start_pfn)++;
467 }
469 *start_pfn = pt_frame;
470 }
472 void init_mm(void)
473 {
475 unsigned long start_pfn, max_pfn;
477 printk("MM: Init\n");
479 printk(" _text: %p\n", &_text);
480 printk(" _etext: %p\n", &_etext);
481 printk(" _edata: %p\n", &_edata);
482 printk(" stack start: %p\n", &stack);
483 printk(" _end: %p\n", &_end);
485 /* set up minimal memory infos */
486 phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
488 /* First page follows page table pages and 3 more pages (store page etc) */
489 start_pfn = PFN_UP(__pa(start_info.pt_base)) + start_info.nr_pt_frames + 3;
490 max_pfn = start_info.nr_pages;
492 printk(" start_pfn: %lx\n", start_pfn);
493 printk(" max_pfn: %lx\n", max_pfn);
496 build_pagetable(&start_pfn, &max_pfn);
498 #ifdef __i386__
499 /*
500 * now we can initialise the page allocator
501 */
502 printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n",
503 (u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn),
504 (u_long)to_virt(PFN_PHYS(max_pfn)), PFN_PHYS(max_pfn));
505 init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn));
506 #endif
508 printk("MM: done\n");
509 }