ia64/xen-unstable

view xen-2.4.16/common/memory.c @ 123:946e29624b77

bitkeeper revision 1.22.2.1 (3e428abeBVF1J00dtGMWKqQjaRgvkA)

mm.h, memory.c:
Fix compiler warning.
author kaf24@labyrinth.cl.cam.ac.uk
date Thu Feb 06 16:18:06 2003 +0000 (2003-02-06)
parents 658b3aeca0e5
children b591e70eec1d
line source
1 /******************************************************************************
2 * memory.c
3 *
4 * Copyright (c) 2002 K A Fraser
5 *
6 * A description of the page table API:
7 *
8 * Domains trap to process_page_updates with a list of update requests.
9 * This is a list of (ptr, val) pairs, where the requested operation
10 * is *ptr = val.
11 *
12 * Reference counting of pages:
13 * ----------------------------
14 * Each page has two refcounts: tot_count and type_count.
15 *
16 * TOT_COUNT is the obvious reference count. It counts all uses of a
17 * physical page frame by a domain, including uses as a page directory,
18 * a page table, or simple mappings via a PTE. This count prevents a
19 * domain from releasing a frame back to the hypervisor's free pool when
20 * it is still referencing it!
21 *
22 * TYPE_COUNT is more subtle. A frame can be put to one of three
23 * mutually-exclusive uses: it might be used as a page directory, or a
24 * page table, or it may be mapped writeable by the domain [of course, a
25 * frame may not be used in any of these three ways!].
26 * So, type_count is a count of the number of times a frame is being
27 * referred to in its current incarnation. Therefore, a page can only
28 * change its type when its type count is zero.
29 *
30 * Pinning the page type:
31 * ----------------------
32 * The type of a page can be pinned/unpinned with the commands
33 * PGEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
34 * pinning is not reference counted, so it can't be nested).
35 * This is useful to prevent a page's type count falling to zero, at which
36 * point safety checks would need to be carried out next time the count
37 * is increased again.
38 *
39 * A further note on writeable page mappings:
40 * ------------------------------------------
41 * For simplicity, the count of writeable mappings for a page may not
42 * correspond to reality. The 'writeable count' is incremented for every
43 * PTE which maps the page with the _PAGE_RW flag set. However, for
44 * write access to be possible the page directory entry must also have
45 * its _PAGE_RW bit set. We do not check this as it complicates the
46 * reference counting considerably [consider the case of multiple
47 * directory entries referencing a single page table, some with the RW
48 * bit set, others not -- it starts getting a bit messy].
49 * In normal use, this simplification shouldn't be a problem.
50 * However, the logic can be added if required.
51 *
52 * One more note on read-only page mappings:
53 * -----------------------------------------
54 * We want domains to be able to map pages for read-only access. The
55 * main reason is that page tables and directories should be readable
56 * by a domain, but it would not be safe for them to be writeable.
57 * However, domains have free access to rings 1 & 2 of the Intel
58 * privilege model. In terms of page protection, these are considered
59 * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
60 * read-only restrictions are respected in supervisor mode -- if the
61 * bit is clear then any mapped page is writeable.
62 *
63 * We get round this by always setting the WP bit and disallowing
64 * updates to it. This is very unlikely to cause a problem for guest
65 * OS's, which will generally use the WP bit to simplify copy-on-write
66 * implementation (in that case, OS wants a fault when it writes to
67 * an application-supplied buffer).
68 */
71 /*
72 * THE FOLLOWING ARE ISSUES IF GUEST OPERATING SYSTEMS BECOME SMP-CAPABLE.
73 * [THAT IS, THEY'RE NOT A PROBLEM NOW, AND MAY NOT EVER BE.]
74 * -----------------------------------------------------------------------
75 *
76 * *********
77 * UPDATE 15/7/02: Interface has changed --updates now specify physical
78 * address of page-table entry, rather than specifying a virtual address,
79 * so hypervisor no longer "walks" the page tables. Therefore the
80 * solution below cannot work. Another possibility is to add a new entry
81 * to our "struct page" which says to which top-level page table each
82 * lower-level page table or writeable mapping belongs. If it belongs to more
83 * than one, we'd probably just flush on all processors running the domain.
84 * *********
85 *
86 * ** 1 **
87 * The problem involves creating new page tables which might be mapped
88 * writeable in the TLB of another processor. As an example, a domain might be
89 * running in two contexts (ie. on two processors) simultaneously, using the
90 * same top-level page table in both contexts. Now, if context 1 sends an
91 * update request [make page P read-only, add a reference to page P as a page
92 * table], that will succeed if there was only one writeable mapping of P.
93 * However, that mapping may persist in the TLB of context 2.
94 *
95 * Solution: when installing a new page table, we must flush foreign TLBs as
96 * necessary. Naive solution is to flush on any processor running our domain.
97 * Cleverer solution is to flush on any processor running same top-level page
98 * table, but this will sometimes fail (consider two different top-level page
99 * tables which have a shared lower-level page table).
100 *
101 * A better solution: when squashing a write reference, check how many times
102 * that lowest-level table entry is referenced by ORing refcounts of tables
103 * down the page-table hierarchy. If results is != 1, we require flushing all
104 * instances of current domain if a new table is installed (because the
105 * lowest-level entry may be referenced by many top-level page tables).
106 * However, common case will be that result == 1, so we only need to flush
107 * processors with the same top-level page table. Make choice at
108 * table-installation time based on a `flush_level' flag, which is
109 * FLUSH_NONE, FLUSH_PAGETABLE, FLUSH_DOMAIN. A flush reduces this
110 * to FLUSH_NONE, while squashed write mappings can only promote up
111 * to more aggressive flush types.
112 *
113 * ** 2 **
114 * Same problem occurs when removing a page table, at level 1 say, then
115 * making it writeable. Need a TLB flush between otherwise another processor
116 * might write an illegal mapping into the old table, while yet another
117 * processor can use the illegal mapping because of a stale level-2 TLB
118 * entry. So, removal of a table reference sets 'flush_level' appropriately,
119 * and a flush occurs on next addition of a fresh write mapping.
120 *
121 * BETTER SOLUTION FOR BOTH 1 AND 2:
122 * When type_refcnt goes to zero, leave old type in place (don't set to
123 * PGT_none). Then, only flush if making a page table of a page with
124 * (cnt=0,type=PGT_writeable), or when adding a write mapping for a page
125 * with (cnt=0, type=PGT_pagexxx). A TLB flush will cause all pages
126 * with refcnt==0 to be reset to PGT_none. Need an array for the purpose,
127 * added to when a type_refcnt goes to zero, and emptied on a TLB flush.
128 * Either have per-domain table, or force TLB flush at end of each
129 * call to 'process_page_updates'.
130 * Most OSes will always keep a writeable reference hanging around, and
131 * page table structure is fairly static, so this mechanism should be
132 * fairly cheap.
133 *
134 * MAYBE EVEN BETTER? [somewhat dubious: not for first cut of the code]:
135 * If we need to force an intermediate flush, those other processors
136 * spin until we complete, then do a single TLB flush. They can spin on
137 * the lock protecting 'process_page_updates', and continue when that
138 * is freed. Saves cost of setting up and servicing an IPI: later
139 * communication is synchronous. Processors trying to install the domain
140 * or domain&pagetable would also enter the spin.
141 *
142 * ** 3 **
143 * Indeed, this problem generalises to reusing page tables at different
144 * levels of the hierarchy (conceptually, the guest OS can use the
145 * hypervisor to introduce illegal table entries by proxy). Consider
146 * unlinking a level-1 page table and reintroducing at level 2 with no
147 * TLB flush. Hypervisor can add a reference to some other level-1 table
148 * with the RW bit set. This is fine in the level-2 context, but some
149 * other processor may still be using that table in level-1 context
150 * (due to a stale TLB entry). At level 1 it may look like the
151 * processor has write access to the other level-1 page table! Therefore
152 * can add illegal values there with impunity :-(
153 *
154 * Fortunately, the solution above generalises to this extended problem.
155 */
157 /*
158 * UPDATE 12.11.02.: We no longer have struct page and mem_map. These
159 * have been replaced by struct pfn_info and frame_table respectively.
160 *
161 * system_free_list is a list_head linking all system owned free pages.
162 * it is initialized in init_frametable.
163 *
164 * Boris Dragovic.
165 */
167 #include <xeno/config.h>
168 #include <xeno/init.h>
169 #include <xeno/lib.h>
170 #include <xeno/mm.h>
171 #include <xeno/sched.h>
172 #include <xeno/errno.h>
173 #include <asm/page.h>
174 #include <asm/io.h>
175 #include <asm/uaccess.h>
176 #include <asm/domain_page.h>
178 #if 1
179 #define MEM_LOG(_f, _a...) printk("DOM%d: (file=memory.c, line=%d) " _f "\n", current->domain, __LINE__, ## _a )
180 #else
181 #define MEM_LOG(_f, _a...) ((void)0)
182 #endif
184 /* Domain 0 is allowed to submit requests on behalf of others. */
185 #define DOMAIN_OKAY(_f) \
186 ((((_f) & PG_domain_mask) == current->domain) || (current->domain == 0))
188 /* 'get' checks parameter for validity before inc'ing refcnt. */
189 static int get_l2_table(unsigned long page_nr);
190 static int get_l1_table(unsigned long page_nr);
191 static int get_page(unsigned long page_nr, int writeable);
192 static int inc_page_refcnt(unsigned long page_nr, unsigned int type);
193 /* 'put' does no checking because if refcnt not zero, entity must be valid. */
194 static void put_l2_table(unsigned long page_nr);
195 static void put_l1_table(unsigned long page_nr);
196 static void put_page(unsigned long page_nr, int writeable);
197 static int dec_page_refcnt(unsigned long page_nr, unsigned int type);
199 static int mod_l2_entry(unsigned long, l2_pgentry_t);
200 static int mod_l1_entry(unsigned long, l1_pgentry_t);
202 /* frame table size and its size in pages */
203 frame_table_t * frame_table;
204 unsigned long frame_table_size;
205 unsigned long max_page;
207 struct list_head free_list;
208 unsigned int free_pfns;
210 static int tlb_flush[NR_CPUS];
212 /*
213 * init_frametable:
214 * Initialise per-frame memory information. This goes directly after
215 * MAX_MONITOR_ADDRESS in physical memory.
216 */
217 void __init init_frametable(unsigned long nr_pages)
218 {
219 struct pfn_info *pf;
220 unsigned long page_index;
222 memset(tlb_flush, 0, sizeof(tlb_flush));
224 max_page = nr_pages;
225 frame_table_size = nr_pages * sizeof(struct pfn_info);
226 frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
227 frame_table = (frame_table_t *)FRAMETABLE_VIRT_START;
228 memset(frame_table, 0, frame_table_size);
230 free_pfns = 0;
232 /* Put all domain-allocatable memory on a free list. */
233 INIT_LIST_HEAD(&free_list);
234 for( page_index = nr_pages - 1;
235 page_index >= (__pa(frame_table) + frame_table_size) >> PAGE_SHIFT;
236 page_index -= 2 )
237 {
238 pf = list_entry(&frame_table[page_index].list, struct pfn_info, list);
239 list_add_tail(&pf->list, &free_list);
240 free_pfns++;
241 }
242 }
245 /* Return original refcnt, or -1 on error. */
246 static int inc_page_refcnt(unsigned long page_nr, unsigned int type)
247 {
248 struct pfn_info *page;
249 unsigned long flags;
251 if ( page_nr >= max_page )
252 {
253 MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
254 return -1;
255 }
256 page = frame_table + page_nr;
257 flags = page->flags;
258 if ( !DOMAIN_OKAY(flags) )
259 {
260 MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask);
261 return -1;
262 }
263 if ( (flags & PG_type_mask) != type )
264 {
265 if ( page_type_count(page) != 0 )
266 {
267 MEM_LOG("Page %08lx bad type/count (%08lx!=%08x) cnt=%ld",
268 page_nr << PAGE_SHIFT,
269 flags & PG_type_mask, type, page_type_count(page));
270 return -1;
271 }
273 page->flags |= type;
274 }
276 get_page_tot(page);
277 return get_page_type(page);
278 }
280 /* Return new refcnt, or -1 on error. */
281 static int dec_page_refcnt(unsigned long page_nr, unsigned int type)
282 {
283 struct pfn_info *page;
284 int ret;
286 if ( page_nr >= max_page )
287 {
288 MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
289 return -1;
290 }
291 page = frame_table + page_nr;
292 if ( !DOMAIN_OKAY(page->flags) ||
293 ((page->flags & PG_type_mask) != type) )
294 {
295 MEM_LOG("Bad page type/domain (dom=%ld) (type %ld != expected %d)",
296 page->flags & PG_domain_mask, page->flags & PG_type_mask,
297 type);
298 return -1;
299 }
300 ASSERT(page_type_count(page) != 0);
301 if ( (ret = put_page_type(page)) == 0 ) page->flags &= ~PG_type_mask;
302 put_page_tot(page);
303 return ret;
304 }
307 /* We allow a L2 table to map itself, to achieve a linear pagetable. */
308 /* NB. There's no need for a put_twisted_l2_table() function!! */
309 static int get_twisted_l2_table(unsigned long entry_pfn, l2_pgentry_t l2e)
310 {
311 unsigned long l2v = l2_pgentry_val(l2e);
313 /* Clearly the mapping must be read-only :-) */
314 if ( (l2v & _PAGE_RW) )
315 {
316 MEM_LOG("Attempt to install twisted L2 entry with write permissions");
317 return -1;
318 }
320 /* This is a sufficient final check. */
321 if ( (l2v >> PAGE_SHIFT) != entry_pfn )
322 {
323 MEM_LOG("L2 tables may not map _other_ L2 tables!\n");
324 return -1;
325 }
327 /* We don't bump the reference counts. */
328 return 0;
329 }
332 static int get_l2_table(unsigned long page_nr)
333 {
334 l2_pgentry_t *p_l2_entry, l2_entry;
335 int i, ret=0;
337 ret = inc_page_refcnt(page_nr, PGT_l2_page_table);
338 if ( ret != 0 ) return (ret < 0) ? ret : 0;
340 /* NEW level-2 page table! Deal with every PDE in the table. */
341 p_l2_entry = map_domain_mem(page_nr << PAGE_SHIFT);
342 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
343 {
344 l2_entry = *p_l2_entry++;
345 if ( !(l2_pgentry_val(l2_entry) & _PAGE_PRESENT) ) continue;
346 if ( (l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) )
347 {
348 MEM_LOG("Bad L2 page type settings %04lx",
349 l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE));
350 ret = -1;
351 goto out;
352 }
353 /* Assume we're mapping an L1 table, falling back to twisted L2. */
354 ret = get_l1_table(l2_pgentry_to_pagenr(l2_entry));
355 if ( ret ) ret = get_twisted_l2_table(page_nr, l2_entry);
356 if ( ret ) goto out;
357 }
359 /* Now we simply slap in our high mapping. */
360 memcpy(p_l2_entry,
361 idle_pg_table[smp_processor_id()] + DOMAIN_ENTRIES_PER_L2_PAGETABLE,
362 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
363 p_l2_entry[(PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT) -
364 DOMAIN_ENTRIES_PER_L2_PAGETABLE] =
365 mk_l2_pgentry(__pa(current->mm.perdomain_pt) | __PAGE_HYPERVISOR);
367 out:
368 unmap_domain_mem(p_l2_entry);
369 return ret;
370 }
372 static int get_l1_table(unsigned long page_nr)
373 {
374 l1_pgentry_t *p_l1_entry, l1_entry;
375 int i, ret;
377 /* Update ref count for page pointed at by PDE. */
378 ret = inc_page_refcnt(page_nr, PGT_l1_page_table);
379 if ( ret != 0 ) return (ret < 0) ? ret : 0;
381 /* NEW level-1 page table! Deal with every PTE in the table. */
382 p_l1_entry = map_domain_mem(page_nr << PAGE_SHIFT);
383 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
384 {
385 l1_entry = *p_l1_entry++;
386 if ( !(l1_pgentry_val(l1_entry) & _PAGE_PRESENT) ) continue;
387 if ( (l1_pgentry_val(l1_entry) &
388 (_PAGE_GLOBAL|_PAGE_PAT)) )
389 {
390 MEM_LOG("Bad L1 page type settings %04lx",
391 l1_pgentry_val(l1_entry) &
392 (_PAGE_GLOBAL|_PAGE_PAT));
393 ret = -1;
394 goto out;
395 }
396 ret = get_page(l1_pgentry_to_pagenr(l1_entry),
397 l1_pgentry_val(l1_entry) & _PAGE_RW);
398 if ( ret ) goto out;
399 }
401 out:
402 /* Make sure we unmap the right page! */
403 unmap_domain_mem(p_l1_entry-1);
404 return ret;
405 }
407 static int get_page(unsigned long page_nr, int writeable)
408 {
409 struct pfn_info *page;
410 unsigned long flags;
412 /* Update ref count for page pointed at by PTE. */
413 if ( page_nr >= max_page )
414 {
415 MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
416 return(-1);
417 }
418 page = frame_table + page_nr;
419 flags = page->flags;
420 if ( !DOMAIN_OKAY(flags) )
421 {
422 MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask);
423 return(-1);
424 }
426 if ( writeable )
427 {
428 if ( (flags & PG_type_mask) != PGT_writeable_page )
429 {
430 if ( page_type_count(page) != 0 )
431 {
432 MEM_LOG("Bad page type/count (%08lx!=%08x) cnt=%ld",
433 flags & PG_type_mask, PGT_writeable_page,
434 page_type_count(page));
435 return(-1);
436 }
437 page->flags |= PGT_writeable_page;
438 }
439 get_page_type(page);
440 }
442 get_page_tot(page);
444 return(0);
445 }
447 static void put_l2_table(unsigned long page_nr)
448 {
449 l2_pgentry_t *p_l2_entry, l2_entry;
450 int i;
452 if ( dec_page_refcnt(page_nr, PGT_l2_page_table) ) return;
454 /* We had last reference to level-2 page table. Free the PDEs. */
455 p_l2_entry = map_domain_mem(page_nr << PAGE_SHIFT);
456 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
457 {
458 l2_entry = *p_l2_entry++;
459 if ( (l2_pgentry_val(l2_entry) & _PAGE_PRESENT) )
460 put_l1_table(l2_pgentry_to_pagenr(l2_entry));
461 }
463 unmap_domain_mem(p_l2_entry);
464 }
466 static void put_l1_table(unsigned long page_nr)
467 {
468 l1_pgentry_t *p_l1_entry, l1_entry;
469 int i;
471 if ( dec_page_refcnt(page_nr, PGT_l1_page_table) ) return;
473 /* We had last reference to level-1 page table. Free the PTEs. */
474 p_l1_entry = map_domain_mem(page_nr << PAGE_SHIFT);
475 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
476 {
477 l1_entry = *p_l1_entry++;
478 if ( (l1_pgentry_val(l1_entry) & _PAGE_PRESENT) )
479 {
480 put_page(l1_pgentry_to_pagenr(l1_entry),
481 l1_pgentry_val(l1_entry) & _PAGE_RW);
482 }
483 }
485 /* Make sure we unmap the right page! */
486 unmap_domain_mem(p_l1_entry-1);
487 }
489 static void put_page(unsigned long page_nr, int writeable)
490 {
491 struct pfn_info *page;
492 ASSERT(page_nr < max_page);
493 page = frame_table + page_nr;
494 ASSERT(DOMAIN_OKAY(page->flags));
495 ASSERT((!writeable) ||
496 ((page_type_count(page) != 0) &&
497 ((page->flags & PG_type_mask) == PGT_writeable_page)));
498 if ( writeable && (put_page_type(page) == 0) )
499 {
500 tlb_flush[smp_processor_id()] = 1;
501 page->flags &= ~PG_type_mask;
502 }
503 put_page_tot(page);
504 }
507 static int mod_l2_entry(unsigned long pa, l2_pgentry_t new_l2_entry)
508 {
509 l2_pgentry_t *p_l2_entry, old_l2_entry;
511 p_l2_entry = map_domain_mem(pa);
512 old_l2_entry = *p_l2_entry;
514 if ( (((unsigned long)p_l2_entry & (PAGE_SIZE-1)) >> 2) >=
515 DOMAIN_ENTRIES_PER_L2_PAGETABLE )
516 {
517 MEM_LOG("Illegal L2 update attempt in hypervisor area %p",
518 p_l2_entry);
519 goto fail;
520 }
522 if ( (l2_pgentry_val(new_l2_entry) & _PAGE_PRESENT) )
523 {
524 if ( (l2_pgentry_val(new_l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) )
525 {
526 MEM_LOG("Bad L2 entry val %04lx",
527 l2_pgentry_val(new_l2_entry) &
528 (_PAGE_GLOBAL|_PAGE_PSE));
529 goto fail;
530 }
531 /* Differ in mapping (bits 12-31) or presence (bit 0)? */
532 if ( ((l2_pgentry_val(old_l2_entry) ^
533 l2_pgentry_val(new_l2_entry)) & 0xfffff001) != 0 )
534 {
535 if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) )
536 {
537 put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
538 }
540 /* Assume we're mapping an L1 table, falling back to twisted L2. */
541 if ( get_l1_table(l2_pgentry_to_pagenr(new_l2_entry)) &&
542 get_twisted_l2_table(pa >> PAGE_SHIFT, new_l2_entry) )
543 goto fail;
544 }
545 }
546 else if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) )
547 {
548 put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
549 }
551 *p_l2_entry = new_l2_entry;
552 unmap_domain_mem(p_l2_entry);
553 return 0;
555 fail:
556 unmap_domain_mem(p_l2_entry);
557 return -1;
558 }
561 static int mod_l1_entry(unsigned long pa, l1_pgentry_t new_l1_entry)
562 {
563 l1_pgentry_t *p_l1_entry, old_l1_entry;
565 p_l1_entry = map_domain_mem(pa);
566 old_l1_entry = *p_l1_entry;
568 if ( (l1_pgentry_val(new_l1_entry) & _PAGE_PRESENT) )
569 {
570 if ( (l1_pgentry_val(new_l1_entry) &
571 (_PAGE_GLOBAL|_PAGE_PAT)) )
572 {
574 MEM_LOG("Bad L1 entry val %04lx",
575 l1_pgentry_val(new_l1_entry) &
576 (_PAGE_GLOBAL|_PAGE_PAT));
577 goto fail;
578 }
579 /*
580 * Differ in mapping (bits 12-31), writeable (bit 1), or
581 * presence (bit 0)?
582 */
583 if ( ((l1_pgentry_val(old_l1_entry) ^
584 l1_pgentry_val(new_l1_entry)) & 0xfffff003) != 0 )
585 {
586 if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) )
587 {
588 put_page(l1_pgentry_to_pagenr(old_l1_entry),
589 l1_pgentry_val(old_l1_entry) & _PAGE_RW);
590 }
592 if ( get_page(l1_pgentry_to_pagenr(new_l1_entry),
593 l1_pgentry_val(new_l1_entry) & _PAGE_RW) ){
594 goto fail;
595 }
596 }
597 }
598 else if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) )
599 {
600 put_page(l1_pgentry_to_pagenr(old_l1_entry),
601 l1_pgentry_val(old_l1_entry) & _PAGE_RW);
602 }
604 *p_l1_entry = new_l1_entry;
605 unmap_domain_mem(p_l1_entry);
606 return 0;
608 fail:
609 unmap_domain_mem(p_l1_entry);
610 return -1;
611 }
614 static int do_extended_command(unsigned long ptr, unsigned long val)
615 {
616 int err = 0;
617 unsigned long pfn = ptr >> PAGE_SHIFT;
618 struct pfn_info *page = frame_table + pfn;
620 switch ( (val & PGEXT_CMD_MASK) )
621 {
622 case PGEXT_PIN_L1_TABLE:
623 err = get_l1_table(pfn);
624 goto mark_as_pinned;
625 case PGEXT_PIN_L2_TABLE:
626 err = get_l2_table(pfn);
627 mark_as_pinned:
628 if ( err )
629 {
630 MEM_LOG("Error while pinning pfn %08lx", pfn);
631 break;
632 }
633 put_page_type(page);
634 put_page_tot(page);
635 if ( !(page->type_count & REFCNT_PIN_BIT) )
636 {
637 page->type_count |= REFCNT_PIN_BIT;
638 page->tot_count |= REFCNT_PIN_BIT;
639 }
640 else
641 {
642 MEM_LOG("Pfn %08lx already pinned", pfn);
643 err = 1;
644 }
645 break;
647 case PGEXT_UNPIN_TABLE:
648 if ( !DOMAIN_OKAY(page->flags) )
649 {
650 err = 1;
651 MEM_LOG("Page %08lx bad domain (dom=%ld)",
652 ptr, page->flags & PG_domain_mask);
653 }
654 else if ( (page->type_count & REFCNT_PIN_BIT) )
655 {
656 page->type_count &= ~REFCNT_PIN_BIT;
657 page->tot_count &= ~REFCNT_PIN_BIT;
658 get_page_type(page);
659 get_page_tot(page);
660 ((page->flags & PG_type_mask) == PGT_l1_page_table) ?
661 put_l1_table(pfn) : put_l2_table(pfn);
662 }
663 else
664 {
665 err = 1;
666 MEM_LOG("Pfn %08lx not pinned", pfn);
667 }
668 break;
670 case PGEXT_NEW_BASEPTR:
671 err = get_l2_table(pfn);
672 if ( !err )
673 {
674 put_l2_table(pagetable_val(current->mm.pagetable) >> PAGE_SHIFT);
675 current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
676 }
677 else
678 {
679 MEM_LOG("Error while installing new baseptr %08lx %d", ptr, err);
680 }
681 /* fall through */
683 case PGEXT_TLB_FLUSH:
684 tlb_flush[smp_processor_id()] = 1;
685 break;
687 case PGEXT_INVLPG:
688 __flush_tlb_one(val & ~PGEXT_CMD_MASK);
689 break;
691 default:
692 MEM_LOG("Invalid extended pt command 0x%08lx", val & PGEXT_CMD_MASK);
693 err = 1;
694 break;
695 }
697 return err;
698 }
701 int do_process_page_updates(page_update_request_t *ureqs, int count)
702 {
703 page_update_request_t req;
704 unsigned long flags, pfn;
705 struct pfn_info *page;
706 int err = 0, i;
708 for ( i = 0; i < count; i++ )
709 {
710 if ( copy_from_user(&req, ureqs, sizeof(req)) )
711 {
712 kill_domain_with_errmsg("Cannot read page update request");
713 }
715 pfn = req.ptr >> PAGE_SHIFT;
716 if ( pfn >= max_page )
717 {
718 MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page);
719 kill_domain_with_errmsg("Page update request out of range");
720 }
722 err = 1;
724 /* Least significant bits of 'ptr' demux the operation type. */
725 switch ( req.ptr & (sizeof(l1_pgentry_t)-1) )
726 {
727 /*
728 * PGREQ_NORMAL: Normal update to any level of page table.
729 */
730 case PGREQ_NORMAL:
731 page = frame_table + pfn;
732 flags = page->flags;
734 if ( DOMAIN_OKAY(flags) )
735 {
736 switch ( (flags & PG_type_mask) )
737 {
738 case PGT_l1_page_table:
739 err = mod_l1_entry(req.ptr, mk_l1_pgentry(req.val));
740 break;
741 case PGT_l2_page_table:
742 err = mod_l2_entry(req.ptr, mk_l2_pgentry(req.val));
743 break;
744 default:
745 MEM_LOG("Update to non-pt page %08lx", req.ptr);
746 break;
747 }
748 }
749 else
750 {
751 MEM_LOG("Bad domain normal update (dom %d, pfn %ld)",
752 current->domain, pfn);
753 }
754 break;
756 case PGREQ_MPT_UPDATE:
757 page = frame_table + pfn;
758 if ( DOMAIN_OKAY(page->flags) )
759 {
760 machine_to_phys_mapping[pfn] = req.val;
761 err = 0;
762 }
763 else
764 {
765 MEM_LOG("Bad domain MPT update (dom %d, pfn %ld)",
766 current->domain, pfn);
767 }
768 break;
770 /*
771 * PGREQ_EXTENDED_COMMAND: Extended command is specified
772 * in the least-siginificant bits of the 'value' field.
773 */
774 case PGREQ_EXTENDED_COMMAND:
775 req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
776 err = do_extended_command(req.ptr, req.val);
777 break;
779 case PGREQ_UNCHECKED_UPDATE:
780 req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
781 if ( current->domain == 0 )
782 {
783 unsigned long *ptr = map_domain_mem(req.ptr);
784 *ptr = req.val;
785 unmap_domain_mem(ptr);
786 err = 0;
787 }
788 else
789 {
790 MEM_LOG("Bad unchecked update attempt");
791 }
792 break;
794 default:
795 MEM_LOG("Invalid page update command %08lx", req.ptr);
796 break;
797 }
799 if ( err )
800 {
801 kill_domain_with_errmsg("Illegal page update request");
802 }
804 ureqs++;
805 }
807 if ( tlb_flush[smp_processor_id()] )
808 {
809 tlb_flush[smp_processor_id()] = 0;
810 __asm__ __volatile__ (
811 "movl %%eax,%%cr3" : :
812 "a" (pagetable_val(current->mm.pagetable)));
814 }
816 return(0);
817 }