ia64/xen-unstable

view extras/mini-os/mm.c @ 7238:971e7c7411b3

Raise an exception if an error appears on the pipes to our children, and make
sure that the child's pipes are closed even under that exception. Move the
handling of POLLHUP to the end of the loop, so that we guarantee to read any
remaining data from the child if POLLHUP and POLLIN appear at the same time.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@ewan
date Thu Oct 06 10:13:11 2005 +0100 (2005-10-06)
parents 06d84bf87159
children c176d2e45117
line source
1 /*
2 ****************************************************************************
3 * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
4 * (C) 2005 - Grzegorz Milos - Intel Research Cambridge
5 ****************************************************************************
6 *
7 * File: mm.c
8 * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
9 * Changes: Grzegorz Milos
10 *
11 * Date: Aug 2003, chages Aug 2005
12 *
13 * Environment: Xen Minimal OS
14 * Description: memory management related functions
15 * contains buddy page allocator from Xen.
16 *
17 ****************************************************************************
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this software and associated documentation files (the "Software"), to
20 * deal in the Software without restriction, including without limitation the
21 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
22 * sell copies of the Software, and to permit persons to whom the Software is
23 * furnished to do so, subject to the following conditions:
24 *
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
34 * DEALINGS IN THE SOFTWARE.
35 */
37 #include <os.h>
38 #include <hypervisor.h>
39 #include <mm.h>
40 #include <types.h>
41 #include <lib.h>
42 #include <xmalloc.h>
44 #ifdef MM_DEBUG
45 #define DEBUG(_f, _a...) \
46 printk("MINI_OS(file=mm.c, line=%d) " _f "\n", __LINE__, ## _a)
47 #else
48 #define DEBUG(_f, _a...) ((void)0)
49 #endif
51 unsigned long *phys_to_machine_mapping;
52 extern char *stack;
53 extern char _text, _etext, _edata, _end;
56 /*********************
57 * ALLOCATION BITMAP
58 * One bit per page of memory. Bit set => page is allocated.
59 */
61 static unsigned long *alloc_bitmap;
62 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
64 #define allocated_in_map(_pn) \
65 (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1))))
68 /*
69 * Hint regarding bitwise arithmetic in map_{alloc,free}:
70 * -(1<<n) sets all bits >= n.
71 * (1<<n)-1 sets all bits < n.
72 * Variable names in map_{alloc,free}:
73 * *_idx == Index into `alloc_bitmap' array.
74 * *_off == Bit offset within an element of the `alloc_bitmap' array.
75 */
77 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
78 {
79 unsigned long start_off, end_off, curr_idx, end_idx;
81 curr_idx = first_page / PAGES_PER_MAPWORD;
82 start_off = first_page & (PAGES_PER_MAPWORD-1);
83 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
84 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
86 if ( curr_idx == end_idx )
87 {
88 alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off);
89 }
90 else
91 {
92 alloc_bitmap[curr_idx] |= -(1<<start_off);
93 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L;
94 alloc_bitmap[curr_idx] |= (1<<end_off)-1;
95 }
96 }
99 static void map_free(unsigned long first_page, unsigned long nr_pages)
100 {
101 unsigned long start_off, end_off, curr_idx, end_idx;
103 curr_idx = first_page / PAGES_PER_MAPWORD;
104 start_off = first_page & (PAGES_PER_MAPWORD-1);
105 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
106 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
108 if ( curr_idx == end_idx )
109 {
110 alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1);
111 }
112 else
113 {
114 alloc_bitmap[curr_idx] &= (1<<start_off)-1;
115 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
116 alloc_bitmap[curr_idx] &= -(1<<end_off);
117 }
118 }
122 /*************************
123 * BINARY BUDDY ALLOCATOR
124 */
126 typedef struct chunk_head_st chunk_head_t;
127 typedef struct chunk_tail_st chunk_tail_t;
129 struct chunk_head_st {
130 chunk_head_t *next;
131 chunk_head_t **pprev;
132 int level;
133 };
135 struct chunk_tail_st {
136 int level;
137 };
139 /* Linked lists of free chunks of different powers-of-two in size. */
140 #define FREELIST_SIZE ((sizeof(void*)<<3)-PAGE_SHIFT)
141 static chunk_head_t *free_head[FREELIST_SIZE];
142 static chunk_head_t free_tail[FREELIST_SIZE];
143 #define FREELIST_EMPTY(_l) ((_l)->next == NULL)
145 #define round_pgdown(_p) ((_p)&PAGE_MASK)
146 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
148 #ifdef MM_DEBUG
149 /*
150 * Prints allocation[0/1] for @nr_pages, starting at @start
151 * address (virtual).
152 */
153 static void print_allocation(void *start, int nr_pages)
154 {
155 unsigned long pfn_start = virt_to_pfn(start);
156 int count;
157 for(count = 0; count < nr_pages; count++)
158 if(allocated_in_map(pfn_start + count)) printk("1");
159 else printk("0");
161 printk("\n");
162 }
164 /*
165 * Prints chunks (making them with letters) for @nr_pages starting
166 * at @start (virtual).
167 */
168 static void print_chunks(void *start, int nr_pages)
169 {
170 char chunks[1001], current='A';
171 int order, count;
172 chunk_head_t *head;
173 unsigned long pfn_start = virt_to_pfn(start);
175 memset(chunks, (int)'_', 1000);
176 if(nr_pages > 1000)
177 {
178 DEBUG("Can only pring 1000 pages. Increase buffer size.");
179 }
181 for(order=0; order < FREELIST_SIZE; order++)
182 {
183 head = free_head[order];
184 while(!FREELIST_EMPTY(head))
185 {
186 for(count = 0; count < 1<< head->level; count++)
187 {
188 if(count + virt_to_pfn(head) - pfn_start < 1000)
189 chunks[count + virt_to_pfn(head) - pfn_start] = current;
190 }
191 head = head->next;
192 current++;
193 }
194 }
195 chunks[nr_pages] = '\0';
196 printk("%s\n", chunks);
197 }
198 #endif
201 /*
202 * Initialise allocator, placing addresses [@min,@max] in free pool.
203 * @min and @max are PHYSICAL addresses.
204 */
205 static void init_page_allocator(unsigned long min, unsigned long max)
206 {
207 int i;
208 unsigned long range, bitmap_size;
209 chunk_head_t *ch;
210 chunk_tail_t *ct;
212 for ( i = 0; i < FREELIST_SIZE; i++ )
213 {
214 free_head[i] = &free_tail[i];
215 free_tail[i].pprev = &free_head[i];
216 free_tail[i].next = NULL;
217 }
219 min = round_pgup (min);
220 max = round_pgdown(max);
222 /* Allocate space for the allocation bitmap. */
223 bitmap_size = (max+1) >> (PAGE_SHIFT+3);
224 bitmap_size = round_pgup(bitmap_size);
225 alloc_bitmap = (unsigned long *)to_virt(min);
226 min += bitmap_size;
227 range = max - min;
229 /* All allocated by default. */
230 memset(alloc_bitmap, ~0, bitmap_size);
231 /* Free up the memory we've been given to play with. */
232 map_free(min>>PAGE_SHIFT, range>>PAGE_SHIFT);
234 /* The buddy lists are addressed in high memory. */
235 min += VIRT_START;
236 max += VIRT_START;
238 while ( range != 0 )
239 {
240 /*
241 * Next chunk is limited by alignment of min, but also
242 * must not be bigger than remaining range.
243 */
244 for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ )
245 if ( min & (1<<i) ) break;
248 ch = (chunk_head_t *)min;
249 min += (1<<i);
250 range -= (1<<i);
251 ct = (chunk_tail_t *)min-1;
252 i -= PAGE_SHIFT;
253 ch->level = i;
254 ch->next = free_head[i];
255 ch->pprev = &free_head[i];
256 ch->next->pprev = &ch->next;
257 free_head[i] = ch;
258 ct->level = i;
259 }
260 }
263 /* Allocate 2^@order contiguous pages. Returns a VIRTUAL address. */
264 unsigned long alloc_pages(int order)
265 {
266 int i;
267 chunk_head_t *alloc_ch, *spare_ch;
268 chunk_tail_t *spare_ct;
271 /* Find smallest order which can satisfy the request. */
272 for ( i = order; i < FREELIST_SIZE; i++ ) {
273 if ( !FREELIST_EMPTY(free_head[i]) )
274 break;
275 }
277 if ( i == FREELIST_SIZE ) goto no_memory;
279 /* Unlink a chunk. */
280 alloc_ch = free_head[i];
281 free_head[i] = alloc_ch->next;
282 alloc_ch->next->pprev = alloc_ch->pprev;
284 /* We may have to break the chunk a number of times. */
285 while ( i != order )
286 {
287 /* Split into two equal parts. */
288 i--;
289 spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT)));
290 spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1;
292 /* Create new header for spare chunk. */
293 spare_ch->level = i;
294 spare_ch->next = free_head[i];
295 spare_ch->pprev = &free_head[i];
296 spare_ct->level = i;
298 /* Link in the spare chunk. */
299 spare_ch->next->pprev = &spare_ch->next;
300 free_head[i] = spare_ch;
301 }
303 map_alloc(to_phys(alloc_ch)>>PAGE_SHIFT, 1<<order);
305 return((unsigned long)alloc_ch);
307 no_memory:
309 printk("Cannot handle page request order %d!\n", order);
311 return 0;
312 }
314 void free_pages(void *pointer, int order)
315 {
316 chunk_head_t *freed_ch, *to_merge_ch;
317 chunk_tail_t *freed_ct;
318 unsigned long mask;
320 /* First free the chunk */
321 map_free(virt_to_pfn(pointer), 1 << order);
323 /* Create free chunk */
324 freed_ch = (chunk_head_t *)pointer;
325 freed_ct = (chunk_tail_t *)((char *)pointer + (1<<(order + PAGE_SHIFT)))-1;
327 /* Now, possibly we can conseal chunks together */
328 while(order < FREELIST_SIZE)
329 {
330 mask = 1 << (order + PAGE_SHIFT);
331 if((unsigned long)freed_ch & mask)
332 {
333 to_merge_ch = (chunk_head_t *)((char *)freed_ch - mask);
334 if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
335 to_merge_ch->level != order)
336 break;
338 /* Merge with predecessor */
339 freed_ch = to_merge_ch;
340 }
341 else
342 {
343 to_merge_ch = (chunk_head_t *)((char *)freed_ch + mask);
344 if(allocated_in_map(virt_to_pfn(to_merge_ch)) ||
345 to_merge_ch->level != order)
346 break;
348 /* Merge with successor */
349 freed_ct = (chunk_tail_t *)((char *)to_merge_ch + mask);
350 }
352 /* We are commited to merging, unlink the chunk */
353 *(to_merge_ch->pprev) = to_merge_ch->next;
354 to_merge_ch->next->pprev = to_merge_ch->pprev;
356 order++;
357 }
359 /* Link the new chunk */
360 freed_ch->level = order;
361 freed_ch->next = free_head[order];
362 freed_ch->pprev = &free_head[order];
363 freed_ct->level = order;
365 freed_ch->next->pprev = &freed_ch->next;
366 free_head[order] = freed_ch;
368 }
369 void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
370 {
371 unsigned long pfn_to_map, pt_frame;
372 unsigned long mach_ptd, max_mach_ptd;
373 int count;
374 unsigned long mach_pte, virt_pte;
375 unsigned long *ptd = (unsigned long *)start_info.pt_base;
376 mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
377 struct mmuext_op pin_request;
379 /* Firstly work out what is the first pfn that is not yet in page tables
380 NB. Assuming that builder fills whole pt_frames (which it does at the
381 moment)
382 */
383 pfn_to_map = (start_info.nr_pt_frames - 1) * L1_PAGETABLE_ENTRIES;
384 DEBUG("start_pfn=%ld, first pfn_to_map %ld, max_pfn=%ld",
385 *start_pfn, pfn_to_map, *max_pfn);
387 /* Machine address of page table directory */
388 mach_ptd = phys_to_machine(to_phys(start_info.pt_base));
389 mach_ptd += sizeof(void *) *
390 l2_table_offset((unsigned long)to_virt(PFN_PHYS(pfn_to_map)));
392 max_mach_ptd = sizeof(void *) *
393 l2_table_offset((unsigned long)to_virt(PFN_PHYS(*max_pfn)));
395 /* Check that we are not trying to access Xen region */
396 if(max_mach_ptd > sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START))
397 {
398 printk("WARNING: mini-os will not use all the memory supplied\n");
399 max_mach_ptd = sizeof(void *) * l2_table_offset(HYPERVISOR_VIRT_START);
400 *max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE);
401 }
402 max_mach_ptd += phys_to_machine(to_phys(start_info.pt_base));
403 DEBUG("Max_mach_ptd 0x%lx", max_mach_ptd);
405 pt_frame = *start_pfn;
406 /* Should not happen - no empty, mapped pages */
407 if(pt_frame >= pfn_to_map)
408 {
409 printk("ERROR: Not even a single empty, mapped page\n");
410 *(int*)0=0;
411 }
413 while(mach_ptd < max_mach_ptd)
414 {
415 /* Correct protection needs to be set for the new page table frame */
416 virt_pte = (unsigned long)to_virt(PFN_PHYS(pt_frame));
417 mach_pte = ptd[l2_table_offset(virt_pte)] & ~(PAGE_SIZE-1);
418 mach_pte += sizeof(void *) * l1_table_offset(virt_pte);
419 DEBUG("New page table page: pfn=0x%lx, mfn=0x%lx, virt_pte=0x%lx, "
420 "mach_pte=0x%lx", pt_frame, pfn_to_mfn(pt_frame),
421 virt_pte, mach_pte);
423 /* Update the entry */
424 mmu_updates[0].ptr = mach_pte;
425 mmu_updates[0].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT |
426 (L1_PROT & ~_PAGE_RW);
427 if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
428 {
429 printk("PTE for new page table page could not be updated\n");
430 *(int*)0=0;
431 }
433 /* Pin the page to provide correct protection */
434 pin_request.cmd = MMUEXT_PIN_L1_TABLE;
435 pin_request.arg1.mfn = pfn_to_mfn(pt_frame);
436 if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0)
437 {
438 printk("ERROR: pinning failed\n");
439 *(int*)0=0;
440 }
442 /* Now fill the new page table page with entries.
443 Update the page directory as well. */
444 count = 0;
445 mmu_updates[count].ptr = mach_ptd;
446 mmu_updates[count].val = pfn_to_mfn(pt_frame) << PAGE_SHIFT |
447 L2_PROT;
448 count++;
449 mach_ptd += sizeof(void *);
450 mach_pte = phys_to_machine(PFN_PHYS(pt_frame++));
452 for(;count <= L1_PAGETABLE_ENTRIES && pfn_to_map <= *max_pfn; count++)
453 {
454 mmu_updates[count].ptr = mach_pte;
455 mmu_updates[count].val =
456 pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
457 if(count == 1) DEBUG("mach_pte 0x%lx", mach_pte);
458 mach_pte += sizeof(void *);
459 }
460 if(HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF) < 0)
461 {
462 printk("ERROR: mmu_update failed\n");
463 *(int*)0=0;
464 }
465 (*start_pfn)++;
466 }
468 *start_pfn = pt_frame;
469 }
471 void init_mm(void)
472 {
474 unsigned long start_pfn, max_pfn;
476 printk("MM: Init\n");
478 printk(" _text: %p\n", &_text);
479 printk(" _etext: %p\n", &_etext);
480 printk(" _edata: %p\n", &_edata);
481 printk(" stack start: %p\n", &stack);
482 printk(" _end: %p\n", &_end);
484 /* set up minimal memory infos */
485 phys_to_machine_mapping = (unsigned long *)start_info.mfn_list;
487 /* First page follows page table pages and 3 more pages (store page etc) */
488 start_pfn = PFN_UP(to_phys(start_info.pt_base)) + start_info.nr_pt_frames + 3;
489 max_pfn = start_info.nr_pages;
491 printk(" start_pfn: %lx\n", start_pfn);
492 printk(" max_pfn: %lx\n", max_pfn);
495 #ifdef __i386__
496 build_pagetable(&start_pfn, &max_pfn);
497 #endif
499 /*
500 * now we can initialise the page allocator
501 */
502 printk("MM: Initialise page allocator for %lx(%lx)-%lx(%lx)\n",
503 (u_long)to_virt(PFN_PHYS(start_pfn)), PFN_PHYS(start_pfn),
504 (u_long)to_virt(PFN_PHYS(max_pfn)), PFN_PHYS(max_pfn));
505 init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn));
506 printk("MM: done\n");
507 }