ia64/xen-unstable

view xen-2.4.16/common/memory.c @ 86:4a10fe9b20ec

bitkeeper revision 1.15 (3e24a984iRiWWcgfKCxu2p5q3YbxXw)

Many files:
First half of support for per-domain GDTs and LDTs
author kaf24@labyrinth.cl.cam.ac.uk
date Wed Jan 15 00:21:24 2003 +0000 (2003-01-15)
parents c3e6a52cd801
children 336647fd8f40 f7ff141acc2a a8063692097a
line source
1 /******************************************************************************
2 * memory.c
3 *
4 * Copyright (c) 2002 K A Fraser
5 *
6 * A description of the page table API:
7 *
8 * Domains trap to process_page_updates with a list of update requests.
9 * This is a list of (ptr, val) pairs, where the requested operation
10 * is *ptr = val.
11 *
12 * Reference counting of pages:
13 * ----------------------------
14 * Each page has two refcounts: tot_count and type_count.
15 *
16 * TOT_COUNT is the obvious reference count. It counts all uses of a
17 * physical page frame by a domain, including uses as a page directory,
18 * a page table, or simple mappings via a PTE. This count prevents a
19 * domain from releasing a frame back to the hypervisor's free pool when
20 * it is still referencing it!
21 *
22 * TYPE_COUNT is more subtle. A frame can be put to one of three
23 * mutually-exclusive uses: it might be used as a page directory, or a
24 * page table, or it may be mapped writeable by the domain [of course, a
25 * frame may not be used in any of these three ways!].
26 * So, type_count is a count of the number of times a frame is being
27 * referred to in its current incarnation. Therefore, a page can only
28 * change its type when its type count is zero.
29 *
30 * Pinning the page type:
31 * ----------------------
32 * The type of a page can be pinned/unpinned with the commands
33 * PGEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
34 * pinning is not reference counted, so it can't be nested).
35 * This is useful to prevent a page's type count falling to zero, at which
36 * point safety checks would need to be carried out next time the count
37 * is increased again.
38 *
39 * A further note on writeable page mappings:
40 * ------------------------------------------
41 * For simplicity, the count of writeable mappings for a page may not
42 * correspond to reality. The 'writeable count' is incremented for every
43 * PTE which maps the page with the _PAGE_RW flag set. However, for
44 * write access to be possible the page directory entry must also have
45 * its _PAGE_RW bit set. We do not check this as it complicates the
46 * reference counting considerably [consider the case of multiple
47 * directory entries referencing a single page table, some with the RW
48 * bit set, others not -- it starts getting a bit messy].
49 * In normal use, this simplification shouldn't be a problem.
50 * However, the logic can be added if required.
51 *
52 * One more note on read-only page mappings:
53 * -----------------------------------------
54 * We want domains to be able to map pages for read-only access. The
55 * main reason is that page tables and directories should be readable
56 * by a domain, but it would not be safe for them to be writeable.
57 * However, domains have free access to rings 1 & 2 of the Intel
58 * privilege model. In terms of page protection, these are considered
59 * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
60 * read-only restrictions are respected in supervisor mode -- if the
61 * bit is clear then any mapped page is writeable.
62 *
63 * We get round this by always setting the WP bit and disallowing
64 * updates to it. This is very unlikely to cause a problem for guest
65 * OS's, which will generally use the WP bit to simplify copy-on-write
66 * implementation (in that case, OS wants a fault when it writes to
67 * an application-supplied buffer).
68 */
71 /*
72 * THE FOLLOWING ARE ISSUES IF GUEST OPERATING SYSTEMS BECOME SMP-CAPABLE.
73 * [THAT IS, THEY'RE NOT A PROBLEM NOW, AND MAY NOT EVER BE.]
74 * -----------------------------------------------------------------------
75 *
76 * *********
77 * UPDATE 15/7/02: Interface has changed --updates now specify physical
78 * address of page-table entry, rather than specifying a virtual address,
79 * so hypervisor no longer "walks" the page tables. Therefore the
80 * solution below cannot work. Another possibility is to add a new entry
81 * to our "struct page" which says to which top-level page table each
82 * lower-level page table or writeable mapping belongs. If it belongs to more
83 * than one, we'd probably just flush on all processors running the domain.
84 * *********
85 *
86 * ** 1 **
87 * The problem involves creating new page tables which might be mapped
88 * writeable in the TLB of another processor. As an example, a domain might be
89 * running in two contexts (ie. on two processors) simultaneously, using the
90 * same top-level page table in both contexts. Now, if context 1 sends an
91 * update request [make page P read-only, add a reference to page P as a page
92 * table], that will succeed if there was only one writeable mapping of P.
93 * However, that mapping may persist in the TLB of context 2.
94 *
95 * Solution: when installing a new page table, we must flush foreign TLBs as
96 * necessary. Naive solution is to flush on any processor running our domain.
97 * Cleverer solution is to flush on any processor running same top-level page
98 * table, but this will sometimes fail (consider two different top-level page
99 * tables which have a shared lower-level page table).
100 *
101 * A better solution: when squashing a write reference, check how many times
102 * that lowest-level table entry is referenced by ORing refcounts of tables
103 * down the page-table hierarchy. If results is != 1, we require flushing all
104 * instances of current domain if a new table is installed (because the
105 * lowest-level entry may be referenced by many top-level page tables).
106 * However, common case will be that result == 1, so we only need to flush
107 * processors with the same top-level page table. Make choice at
108 * table-installation time based on a `flush_level' flag, which is
109 * FLUSH_NONE, FLUSH_PAGETABLE, FLUSH_DOMAIN. A flush reduces this
110 * to FLUSH_NONE, while squashed write mappings can only promote up
111 * to more aggressive flush types.
112 *
113 * ** 2 **
114 * Same problem occurs when removing a page table, at level 1 say, then
115 * making it writeable. Need a TLB flush between otherwise another processor
116 * might write an illegal mapping into the old table, while yet another
117 * processor can use the illegal mapping because of a stale level-2 TLB
118 * entry. So, removal of a table reference sets 'flush_level' appropriately,
119 * and a flush occurs on next addition of a fresh write mapping.
120 *
121 * BETTER SOLUTION FOR BOTH 1 AND 2:
122 * When type_refcnt goes to zero, leave old type in place (don't set to
123 * PGT_none). Then, only flush if making a page table of a page with
124 * (cnt=0,type=PGT_writeable), or when adding a write mapping for a page
125 * with (cnt=0, type=PGT_pagexxx). A TLB flush will cause all pages
126 * with refcnt==0 to be reset to PGT_none. Need an array for the purpose,
127 * added to when a type_refcnt goes to zero, and emptied on a TLB flush.
128 * Either have per-domain table, or force TLB flush at end of each
129 * call to 'process_page_updates'.
130 * Most OSes will always keep a writeable reference hanging around, and
131 * page table structure is fairly static, so this mechanism should be
132 * fairly cheap.
133 *
134 * MAYBE EVEN BETTER? [somewhat dubious: not for first cut of the code]:
135 * If we need to force an intermediate flush, those other processors
136 * spin until we complete, then do a single TLB flush. They can spin on
137 * the lock protecting 'process_page_updates', and continue when that
138 * is freed. Saves cost of setting up and servicing an IPI: later
139 * communication is synchronous. Processors trying to install the domain
140 * or domain&pagetable would also enter the spin.
141 *
142 * ** 3 **
143 * Indeed, this problem generalises to reusing page tables at different
144 * levels of the hierarchy (conceptually, the guest OS can use the
145 * hypervisor to introduce illegal table entries by proxy). Consider
146 * unlinking a level-1 page table and reintroducing at level 2 with no
147 * TLB flush. Hypervisor can add a reference to some other level-1 table
148 * with the RW bit set. This is fine in the level-2 context, but some
149 * other processor may still be using that table in level-1 context
150 * (due to a stale TLB entry). At level 1 it may look like the
151 * processor has write access to the other level-1 page table! Therefore
152 * can add illegal values there with impunity :-(
153 *
154 * Fortunately, the solution above generalises to this extended problem.
155 */
157 /*
158 * UPDATE 12.11.02.: We no longer have struct page and mem_map. These
159 * have been replaced by struct pfn_info and frame_table respectively.
160 *
161 * system_free_list is a list_head linking all system owned free pages.
162 * it is initialized in init_frametable.
163 *
164 * Boris Dragovic.
165 */
167 #include <xeno/config.h>
168 #include <xeno/init.h>
169 #include <xeno/lib.h>
170 #include <xeno/mm.h>
171 #include <xeno/sched.h>
172 #include <xeno/errno.h>
173 #include <asm/page.h>
174 #include <asm/io.h>
175 #include <asm/uaccess.h>
176 #include <asm/domain_page.h>
178 #if 0
179 #define MEM_LOG(_f, _a...) printk("DOM%d: (file=memory.c, line=%d) " _f "\n", current->domain, __LINE__, ## _a )
180 #else
181 #define MEM_LOG(_f, _a...) ((void)0)
182 #endif
184 /* 'get' checks parameter for validity before inc'ing refcnt. */
185 static int get_l2_table(unsigned long page_nr);
186 static int get_l1_table(unsigned long page_nr);
187 static int get_page(unsigned long page_nr, int writeable);
188 static int inc_page_refcnt(unsigned long page_nr, unsigned int type);
189 /* 'put' does no checking because if refcnt not zero, entity must be valid. */
190 static int put_l2_table(unsigned long page_nr);
191 static void put_l1_table(unsigned long page_nr);
192 static void put_page(unsigned long page_nr, int writeable);
193 static int dec_page_refcnt(unsigned long page_nr, unsigned int type);
195 static int mod_l2_entry(unsigned long, l2_pgentry_t);
196 static int mod_l1_entry(unsigned long, l1_pgentry_t);
198 /* frame table size and its size in pages */
199 frame_table_t * frame_table;
200 unsigned long frame_table_size;
201 unsigned long max_page;
203 struct list_head free_list;
204 unsigned int free_pfns;
206 static int tlb_flush[NR_CPUS];
208 /*
209 * init_frametable:
210 * Initialise per-frame memory information. This goes directly after
211 * MAX_MONITOR_ADDRESS in physical memory.
212 */
213 void __init init_frametable(unsigned long nr_pages)
214 {
215 struct pfn_info *pf;
216 unsigned long page_index;
218 memset(tlb_flush, 0, sizeof(tlb_flush));
220 max_page = nr_pages;
221 frame_table_size = nr_pages * sizeof(struct pfn_info);
222 frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
223 free_pfns = nr_pages -
224 ((MAX_MONITOR_ADDRESS + frame_table_size) >> PAGE_SHIFT);
226 frame_table = phys_to_virt(MAX_MONITOR_ADDRESS);
227 memset(frame_table, 0, frame_table_size);
229 /* Put all domain-allocatable memory on a free list. */
230 INIT_LIST_HEAD(&free_list);
231 for( page_index = (MAX_MONITOR_ADDRESS + frame_table_size) >> PAGE_SHIFT;
232 page_index < nr_pages;
233 page_index++ )
234 {
235 pf = list_entry(&frame_table[page_index].list, struct pfn_info, list);
236 list_add_tail(&pf->list, &free_list);
237 }
238 }
241 /* Return original refcnt, or -1 on error. */
242 static int inc_page_refcnt(unsigned long page_nr, unsigned int type)
243 {
244 struct pfn_info *page;
245 unsigned long flags;
247 if ( page_nr >= max_page )
248 {
249 MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
250 return(-1);
251 }
252 page = frame_table + page_nr;
253 flags = page->flags;
254 if ( (flags & PG_domain_mask) != current->domain )
255 {
256 MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask);
257 return(-1);
258 }
259 if ( (flags & PG_type_mask) != type )
260 {
261 if ( page_type_count(page) != 0 )
262 {
263 MEM_LOG("Page %08lx bad type/count (%08lx!=%08x) cnt=%ld",
264 page_nr << PAGE_SHIFT,
265 flags & PG_type_mask, type, page_type_count(page));
266 return(-1);
267 }
268 page->flags |= type;
269 }
271 get_page_tot(page);
272 return(get_page_type(page));
273 }
275 /* Return new refcnt, or -1 on error. */
276 static int dec_page_refcnt(unsigned long page_nr, unsigned int type)
277 {
278 struct pfn_info *page;
279 int ret;
281 if ( page_nr >= max_page )
282 {
283 MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
284 return(-1);
285 }
286 page = frame_table + page_nr;
287 if ( (page->flags & (PG_type_mask | PG_domain_mask)) !=
288 (type | current->domain) )
289 {
290 MEM_LOG("Bad page type/domain (dom=%ld) (type %ld != expected %d)",
291 page->flags & PG_domain_mask, page->flags & PG_type_mask,
292 type);
293 return(-1);
294 }
295 ASSERT(page_type_count(page) != 0);
296 if ( (ret = put_page_type(page)) == 0 ) page->flags &= ~PG_type_mask;
297 put_page_tot(page);
298 return(ret);
299 }
302 static int get_l2_table(unsigned long page_nr)
303 {
304 l2_pgentry_t *p_l2_entry, l2_entry;
305 int i, ret=0;
307 ret = inc_page_refcnt(page_nr, PGT_l2_page_table);
308 if ( ret != 0 ) return((ret < 0) ? ret : 0);
310 /* NEW level-2 page table! Deal with every PDE in the table. */
311 p_l2_entry = map_domain_mem(page_nr << PAGE_SHIFT);
312 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
313 {
314 l2_entry = *p_l2_entry++;
315 if ( !(l2_pgentry_val(l2_entry) & _PAGE_PRESENT) ) continue;
316 if ( (l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) )
317 {
318 MEM_LOG("Bad L2 page type settings %04lx",
319 l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE));
320 return(-1);
321 }
322 ret = get_l1_table(l2_pgentry_to_pagenr(l2_entry));
323 if ( ret ) return(ret);
324 p_l2_entry = map_domain_mem((page_nr << PAGE_SHIFT) +
325 ((i+1) * sizeof(l2_pgentry_t)));
326 }
328 /* Now we simply slap in our high mapping. */
329 memcpy(p_l2_entry,
330 idle_pg_table[smp_processor_id()] + DOMAIN_ENTRIES_PER_L2_PAGETABLE,
331 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
332 p_l2_entry[(PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT) -
333 DOMAIN_ENTRIES_PER_L2_PAGETABLE] =
334 mk_l2_pgentry(__pa(current->mm.perdomain_pt) | __PAGE_HYPERVISOR);
336 return(ret);
337 }
339 static int get_l1_table(unsigned long page_nr)
340 {
341 l1_pgentry_t *p_l1_entry, l1_entry;
342 int i, ret;
344 /* Update ref count for page pointed at by PDE. */
345 ret = inc_page_refcnt(page_nr, PGT_l1_page_table);
346 if ( ret != 0 ) return((ret < 0) ? ret : 0);
348 /* NEW level-1 page table! Deal with every PTE in the table. */
349 p_l1_entry = map_domain_mem(page_nr << PAGE_SHIFT);
350 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
351 {
352 l1_entry = *p_l1_entry++;
353 if ( !(l1_pgentry_val(l1_entry) & _PAGE_PRESENT) ) continue;
354 if ( (l1_pgentry_val(l1_entry) &
355 (_PAGE_GLOBAL|_PAGE_PAT)) )
356 {
357 MEM_LOG("Bad L1 page type settings %04lx",
358 l1_pgentry_val(l1_entry) &
359 (_PAGE_GLOBAL|_PAGE_PAT));
360 return(-1);
361 }
362 ret = get_page(l1_pgentry_to_pagenr(l1_entry),
363 l1_pgentry_val(l1_entry) & _PAGE_RW);
364 if ( ret ) return(ret);
365 }
367 return(ret);
368 }
370 static int get_page(unsigned long page_nr, int writeable)
371 {
372 struct pfn_info *page;
373 unsigned long flags;
375 /* Update ref count for page pointed at by PTE. */
376 if ( page_nr >= max_page )
377 {
378 MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
379 return(-1);
380 }
381 page = frame_table + page_nr;
382 flags = page->flags;
383 if ( (flags & PG_domain_mask) != current->domain )
384 {
385 MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask);
386 return(-1);
387 }
389 if ( writeable )
390 {
391 if ( (flags & PG_type_mask) != PGT_writeable_page )
392 {
393 if ( page_type_count(page) != 0 )
394 {
395 MEM_LOG("Bad page type/count (%08lx!=%08x) cnt=%ld",
396 flags & PG_type_mask, PGT_writeable_page,
397 page_type_count(page));
398 return(-1);
399 }
400 page->flags |= PGT_writeable_page;
401 }
402 get_page_type(page);
403 }
405 get_page_tot(page);
407 return(0);
408 }
410 static int put_l2_table(unsigned long page_nr)
411 {
412 l2_pgentry_t *p_l2_entry, l2_entry;
413 int i, ret;
415 ret = dec_page_refcnt(page_nr, PGT_l2_page_table);
416 if ( ret != 0 ) return((ret < 0) ? ret : 0);
418 /* We had last reference to level-2 page table. Free the PDEs. */
419 p_l2_entry = map_domain_mem(page_nr << PAGE_SHIFT);
420 for ( i = 0; i < HYPERVISOR_ENTRIES_PER_L2_PAGETABLE; i++ )
421 {
422 l2_entry = *p_l2_entry++;
423 if ( (l2_pgentry_val(l2_entry) & _PAGE_PRESENT) )
424 {
425 put_l1_table(l2_pgentry_to_pagenr(l2_entry));
426 p_l2_entry = map_domain_mem((page_nr << PAGE_SHIFT) +
427 ((i+1) * sizeof(l2_pgentry_t)));
428 }
429 }
431 return(0);
432 }
434 static void put_l1_table(unsigned long page_nr)
435 {
436 l1_pgentry_t *p_l1_entry, l1_entry;
437 int i;
439 if ( dec_page_refcnt(page_nr, PGT_l1_page_table) != 0 ) return;
441 /* We had last reference to level-1 page table. Free the PTEs. */
442 p_l1_entry = map_domain_mem(page_nr << PAGE_SHIFT);
443 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
444 {
445 l1_entry = *p_l1_entry++;
446 if ( (l1_pgentry_val(l1_entry) & _PAGE_PRESENT) )
447 {
448 put_page(l1_pgentry_to_pagenr(l1_entry),
449 l1_pgentry_val(l1_entry) & _PAGE_RW);
450 }
451 }
452 }
454 static void put_page(unsigned long page_nr, int writeable)
455 {
456 struct pfn_info *page;
457 ASSERT(page_nr < max_page);
458 page = frame_table + page_nr;
459 ASSERT((page->flags & PG_domain_mask) == current->domain);
460 ASSERT((!writeable) ||
461 ((page_type_count(page) != 0) &&
462 ((page->flags & PG_type_mask) == PGT_writeable_page)));
463 if ( writeable && (put_page_type(page) == 0) )
464 {
465 tlb_flush[smp_processor_id()] = 1;
466 page->flags &= ~PG_type_mask;
467 }
468 put_page_tot(page);
469 }
472 static int mod_l2_entry(unsigned long pa, l2_pgentry_t new_l2_entry)
473 {
474 l2_pgentry_t *p_l2_entry, old_l2_entry;
476 p_l2_entry = map_domain_mem(pa);
477 old_l2_entry = *p_l2_entry;
479 if ( (((unsigned long)p_l2_entry & (PAGE_SIZE-1)) >> 2) >=
480 DOMAIN_ENTRIES_PER_L2_PAGETABLE )
481 {
482 MEM_LOG("Illegal L2 update attempt in hypervisor area %p",
483 p_l2_entry);
484 goto fail;
485 }
487 /*
488 * Write the new value while pointer is still valid. The mapping cache
489 * entry for p_l2_entry may get clobbered by {put,get}_l1_table.
490 */
491 *p_l2_entry = new_l2_entry;
493 if ( (l2_pgentry_val(new_l2_entry) & _PAGE_PRESENT) )
494 {
495 if ( (l2_pgentry_val(new_l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) )
496 {
497 MEM_LOG("Bad L2 entry val %04lx",
498 l2_pgentry_val(new_l2_entry) &
499 (_PAGE_GLOBAL|_PAGE_PSE));
500 goto fail;
501 }
502 /* Differ in mapping (bits 12-31) or presence (bit 0)? */
503 if ( ((l2_pgentry_val(old_l2_entry) ^
504 l2_pgentry_val(new_l2_entry)) & 0xfffff001) != 0 )
505 {
506 if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) )
507 {
508 put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
509 }
511 if ( get_l1_table(l2_pgentry_to_pagenr(new_l2_entry)) )
512 goto fail;
513 }
514 }
515 else if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) )
516 {
517 put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
518 }
520 return(0);
522 fail:
523 /*
524 * On failure we put the old value back. We need to regrab the
525 * mapping of the physical page frame.
526 */
527 p_l2_entry = map_domain_mem(pa);
528 *p_l2_entry = old_l2_entry;
529 return(-1);
530 }
533 static int mod_l1_entry(unsigned long pa, l1_pgentry_t new_l1_entry)
534 {
535 l1_pgentry_t *p_l1_entry, old_l1_entry;
537 p_l1_entry = map_domain_mem(pa);
538 old_l1_entry = *p_l1_entry;
540 if ( (l1_pgentry_val(new_l1_entry) & _PAGE_PRESENT) )
541 {
542 if ( (l1_pgentry_val(new_l1_entry) &
543 (_PAGE_GLOBAL|_PAGE_PAT)) )
544 {
545 MEM_LOG("Bad L1 entry val %04lx",
546 l1_pgentry_val(new_l1_entry) &
547 (_PAGE_GLOBAL|_PAGE_PAT));
548 goto fail;
549 }
550 /*
551 * Differ in mapping (bits 12-31), writeable (bit 1), or
552 * presence (bit 0)?
553 */
554 if ( ((l1_pgentry_val(old_l1_entry) ^
555 l1_pgentry_val(new_l1_entry)) & 0xfffff003) != 0 )
556 {
557 if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) )
558 {
559 put_page(l1_pgentry_to_pagenr(old_l1_entry),
560 l1_pgentry_val(old_l1_entry) & _PAGE_RW);
561 }
563 if ( get_page(l1_pgentry_to_pagenr(new_l1_entry),
564 l1_pgentry_val(new_l1_entry) & _PAGE_RW) )
565 goto fail;
566 }
567 }
568 else if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) )
569 {
570 put_page(l1_pgentry_to_pagenr(old_l1_entry),
571 l1_pgentry_val(old_l1_entry) & _PAGE_RW);
572 }
574 /* p_l1_entry is still valid here */
575 *p_l1_entry = new_l1_entry;
577 return(0);
578 fail:
579 return(-1);
580 }
583 static int do_extended_command(unsigned long ptr, unsigned long val)
584 {
585 int err = 0;
586 unsigned long pfn = ptr >> PAGE_SHIFT;
587 struct pfn_info *page = frame_table + pfn;
589 switch ( (val & PGEXT_CMD_MASK) )
590 {
591 case PGEXT_PIN_L1_TABLE:
592 err = get_l1_table(pfn);
593 goto mark_as_pinned;
594 case PGEXT_PIN_L2_TABLE:
595 err = get_l2_table(pfn);
596 mark_as_pinned:
597 if ( err )
598 {
599 MEM_LOG("Error while pinning pfn %08lx", pfn);
600 break;
601 }
602 put_page_type(page);
603 put_page_tot(page);
604 if ( !(page->type_count & REFCNT_PIN_BIT) )
605 {
606 page->type_count |= REFCNT_PIN_BIT;
607 page->tot_count |= REFCNT_PIN_BIT;
608 }
609 else
610 {
611 MEM_LOG("Pfn %08lx already pinned", pfn);
612 err = 1;
613 }
614 break;
616 case PGEXT_UNPIN_TABLE:
617 if ( (page->flags & PG_domain_mask) != current->domain )
618 {
619 err = 1;
620 MEM_LOG("Page %08lx bad domain (dom=%ld)",
621 ptr, page->flags & PG_domain_mask);
622 }
623 else if ( (page->type_count & REFCNT_PIN_BIT) )
624 {
625 page->type_count &= ~REFCNT_PIN_BIT;
626 page->tot_count &= ~REFCNT_PIN_BIT;
627 get_page_type(page);
628 get_page_tot(page);
629 ((page->flags & PG_type_mask) == PGT_l1_page_table) ?
630 put_l1_table(pfn) : put_l2_table(pfn);
631 }
632 else
633 {
634 err = 1;
635 MEM_LOG("Pfn %08lx not pinned", pfn);
636 }
637 break;
639 case PGEXT_NEW_BASEPTR:
640 err = get_l2_table(pfn);
641 if ( !err )
642 {
643 put_l2_table(pagetable_val(current->mm.pagetable) >> PAGE_SHIFT);
644 current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
645 }
646 else
647 {
648 MEM_LOG("Error while installing new baseptr %08lx %d", ptr, err);
649 }
650 /* fall through */
652 case PGEXT_TLB_FLUSH:
653 tlb_flush[smp_processor_id()] = 1;
654 break;
656 case PGEXT_INVLPG:
657 __flush_tlb_one(val & ~PGEXT_CMD_MASK);
658 break;
660 default:
661 MEM_LOG("Invalid extended pt command 0x%08lx", val & PGEXT_CMD_MASK);
662 err = 1;
663 break;
664 }
666 return err;
667 }
669 /* Apply updates to page table @pagetable_id within the current domain. */
670 int do_process_page_updates(page_update_request_t *updates, int count)
671 {
672 page_update_request_t cur;
673 unsigned long flags, pfn;
674 struct pfn_info *page;
675 int err = 0, i;
677 for ( i = 0; i < count; i++ )
678 {
679 if ( copy_from_user(&cur, updates, sizeof(cur)) )
680 {
681 kill_domain_with_errmsg("Cannot read page update request");
682 }
684 pfn = cur.ptr >> PAGE_SHIFT;
685 if ( pfn >= max_page )
686 {
687 MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page);
688 kill_domain_with_errmsg("Page update request out of range");
689 }
691 err = 1;
693 /* Least significant bits of 'ptr' demux the operation type. */
694 switch ( cur.ptr & (sizeof(l1_pgentry_t)-1) )
695 {
697 /*
698 * PGREQ_NORMAL: Normal update to any level of page table.
699 */
700 case PGREQ_NORMAL:
701 page = frame_table + pfn;
702 flags = page->flags;
703 if ( (flags & PG_domain_mask) == current->domain )
704 {
705 switch ( (flags & PG_type_mask) )
706 {
707 case PGT_l1_page_table:
708 err = mod_l1_entry(cur.ptr, mk_l1_pgentry(cur.val));
709 break;
710 case PGT_l2_page_table:
711 err = mod_l2_entry(cur.ptr, mk_l2_pgentry(cur.val));
712 break;
713 default:
714 MEM_LOG("Update to non-pt page %08lx", cur.ptr);
715 break;
716 }
717 }
718 break;
720 /*
721 * PGREQ_UNCHECKED_UPDATE: Make an unchecked update to a
722 * bottom-level page-table entry.
723 * Restrictions apply:
724 * 1. Update only allowed by domain 0.
725 * 2. Update must be to a level-1 pte belonging to dom0.
726 */
727 case PGREQ_UNCHECKED_UPDATE:
728 cur.ptr &= ~(sizeof(l1_pgentry_t) - 1);
729 page = frame_table + pfn;
730 flags = page->flags;
731 if ( (flags | current->domain) == PGT_l1_page_table )
732 {
734 *(unsigned long *)map_domain_mem(cur.ptr) = cur.val;
735 err = 0;
736 }
737 else
738 {
739 MEM_LOG("UNCHECKED_UPDATE: Bad domain %d, or"
740 " bad pte type %08lx", current->domain, flags);
741 }
742 break;
744 /*
745 * PGREQ_EXTENDED_COMMAND: Extended command is specified
746 * in the least-siginificant bits of the 'value' field.
747 */
748 case PGREQ_EXTENDED_COMMAND:
749 cur.ptr &= ~(sizeof(l1_pgentry_t) - 1);
750 err = do_extended_command(cur.ptr, cur.val);
751 break;
753 default:
754 MEM_LOG("Invalid page update command %08lx", cur.ptr);
755 break;
756 }
758 if ( err )
759 {
760 kill_domain_with_errmsg("Illegal page update request");
761 }
763 updates++;
764 }
766 if ( tlb_flush[smp_processor_id()] )
767 {
768 tlb_flush[smp_processor_id()] = 0;
769 __asm__ __volatile__ (
770 "movl %%eax,%%cr3" : :
771 "a" (pagetable_val(current->mm.pagetable)));
772 }
774 return(0);
775 }