ia64/xen-unstable

view xen/common/memory.c @ 789:9c21e28c40a0

bitkeeper revision 1.479.1.1 (3f819a2fnjjfq6SkNEPt-IcLHQFovA)

entry.S, memory.c:
Fix bug which caused extra entries into Xenolinux event dispatcher.
author kaf24@scramble.cl.cam.ac.uk
date Mon Oct 06 16:37:03 2003 +0000 (2003-10-06)
parents 8e974c88fa7a
children 22057fc2142f 0fee6fe6955a
line source
1 /******************************************************************************
2 * memory.c
3 *
4 * Copyright (c) 2002 K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * A description of the page table API:
23 *
24 * Domains trap to process_page_updates with a list of update requests.
25 * This is a list of (ptr, val) pairs, where the requested operation
26 * is *ptr = val.
27 *
28 * Reference counting of pages:
29 * ----------------------------
30 * Each page has two refcounts: tot_count and type_count.
31 *
32 * TOT_COUNT is the obvious reference count. It counts all uses of a
33 * physical page frame by a domain, including uses as a page directory,
34 * a page table, or simple mappings via a PTE. This count prevents a
35 * domain from releasing a frame back to the hypervisor's free pool when
36 * it is still referencing it!
37 *
38 * TYPE_COUNT is more subtle. A frame can be put to one of three
39 * mutually-exclusive uses: it might be used as a page directory, or a
40 * page table, or it may be mapped writeable by the domain [of course, a
41 * frame may not be used in any of these three ways!].
42 * So, type_count is a count of the number of times a frame is being
43 * referred to in its current incarnation. Therefore, a page can only
44 * change its type when its type count is zero.
45 *
46 * Pinning the page type:
47 * ----------------------
48 * The type of a page can be pinned/unpinned with the commands
49 * PGEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
50 * pinning is not reference counted, so it can't be nested).
51 * This is useful to prevent a page's type count falling to zero, at which
52 * point safety checks would need to be carried out next time the count
53 * is increased again.
54 *
55 * A further note on writeable page mappings:
56 * ------------------------------------------
57 * For simplicity, the count of writeable mappings for a page may not
58 * correspond to reality. The 'writeable count' is incremented for every
59 * PTE which maps the page with the _PAGE_RW flag set. However, for
60 * write access to be possible the page directory entry must also have
61 * its _PAGE_RW bit set. We do not check this as it complicates the
62 * reference counting considerably [consider the case of multiple
63 * directory entries referencing a single page table, some with the RW
64 * bit set, others not -- it starts getting a bit messy].
65 * In normal use, this simplification shouldn't be a problem.
66 * However, the logic can be added if required.
67 *
68 * One more note on read-only page mappings:
69 * -----------------------------------------
70 * We want domains to be able to map pages for read-only access. The
71 * main reason is that page tables and directories should be readable
72 * by a domain, but it would not be safe for them to be writeable.
73 * However, domains have free access to rings 1 & 2 of the Intel
74 * privilege model. In terms of page protection, these are considered
75 * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
76 * read-only restrictions are respected in supervisor mode -- if the
77 * bit is clear then any mapped page is writeable.
78 *
79 * We get round this by always setting the WP bit and disallowing
80 * updates to it. This is very unlikely to cause a problem for guest
81 * OS's, which will generally use the WP bit to simplify copy-on-write
82 * implementation (in that case, OS wants a fault when it writes to
83 * an application-supplied buffer).
84 */
87 /*
88 * THE FOLLOWING ARE ISSUES IF GUEST OPERATING SYSTEMS BECOME SMP-CAPABLE.
89 * -----------------------------------------------------------------------
90 *
91 * *********
92 * UPDATE 15/7/02: Interface has changed --updates now specify physical
93 * address of page-table entry, rather than specifying a virtual address,
94 * so hypervisor no longer "walks" the page tables. Therefore the
95 * solution below cannot work. Another possibility is to add a new entry
96 * to our "struct page" which says to which top-level page table each
97 * lower-level page table or writeable mapping belongs. If it belongs to more
98 * than one, we'd probably just flush on all processors running the domain.
99 * *********
100 *
101 * The problem involves creating new page tables which might be mapped
102 * writeable in the TLB of another processor. As an example, a domain might be
103 * running in two contexts (ie. on two processors) simultaneously, using the
104 * same top-level page table in both contexts. Now, if context 1 sends an
105 * update request [make page P read-only, add a reference to page P as a page
106 * table], that will succeed if there was only one writeable mapping of P.
107 * However, that mapping may persist in the TLB of context 2.
108 *
109 * Solution: when installing a new page table, we must flush foreign TLBs as
110 * necessary. Naive solution is to flush on any processor running our domain.
111 * Cleverer solution is to flush on any processor running same top-level page
112 * table, but this will sometimes fail (consider two different top-level page
113 * tables which have a shared lower-level page table).
114 *
115 * A better solution: when squashing a write reference, check how many times
116 * that lowest-level table entry is referenced by ORing refcounts of tables
117 * down the page-table hierarchy. If results is != 1, we require flushing all
118 * instances of current domain if a new table is installed (because the
119 * lowest-level entry may be referenced by many top-level page tables).
120 * However, common case will be that result == 1, so we only need to flush
121 * processors with the same top-level page table. Make choice at
122 * table-installation time based on a `flush_level' flag, which is
123 * FLUSH_NONE, FLUSH_PAGETABLE, FLUSH_DOMAIN. A flush reduces this
124 * to FLUSH_NONE, while squashed write mappings can only promote up
125 * to more aggressive flush types.
126 */
128 #include <xeno/config.h>
129 #include <xeno/init.h>
130 #include <xeno/lib.h>
131 #include <xeno/mm.h>
132 #include <xeno/sched.h>
133 #include <xeno/errno.h>
134 #include <xeno/perfc.h>
135 #include <asm/page.h>
136 #include <asm/flushtlb.h>
137 #include <asm/io.h>
138 #include <asm/uaccess.h>
139 #include <asm/domain_page.h>
141 #if 0
142 #define MEM_LOG(_f, _a...) printk("DOM%d: (file=memory.c, line=%d) " _f "\n", current->domain, __LINE__, ## _a )
143 #else
144 #define MEM_LOG(_f, _a...) ((void)0)
145 #endif
147 /* Domain 0 is allowed to submit requests on behalf of others. */
148 #define DOMAIN_OKAY(_f) \
149 ((((_f) & PG_domain_mask) == current->domain) || (current->domain == 0))
151 /* 'get' checks parameter for validity before inc'ing refcnt. */
152 static int get_l2_table(unsigned long page_nr);
153 static int get_l1_table(unsigned long page_nr);
154 static int get_page(unsigned long page_nr, int writeable);
155 static int inc_page_refcnt(unsigned long page_nr, unsigned int type);
156 /* 'put' does no checking because if refcnt not zero, entity must be valid. */
157 static void put_l2_table(unsigned long page_nr);
158 static void put_l1_table(unsigned long page_nr);
159 static void put_page(unsigned long page_nr, int writeable);
160 static int dec_page_refcnt(unsigned long page_nr, unsigned int type);
162 static int mod_l2_entry(unsigned long, l2_pgentry_t);
163 static int mod_l1_entry(unsigned long, l1_pgentry_t);
165 /* frame table size and its size in pages */
166 frame_table_t * frame_table;
167 unsigned long frame_table_size;
168 unsigned long max_page;
170 struct list_head free_list;
171 spinlock_t free_list_lock = SPIN_LOCK_UNLOCKED;
172 unsigned int free_pfns;
174 /* Used to defer flushing of memory structures. */
175 static int flush_tlb[NR_CPUS] __cacheline_aligned;
178 /*
179 * init_frametable:
180 * Initialise per-frame memory information. This goes directly after
181 * MAX_MONITOR_ADDRESS in physical memory.
182 */
183 void __init init_frametable(unsigned long nr_pages)
184 {
185 struct pfn_info *pf;
186 unsigned long page_index;
187 unsigned long flags;
189 memset(flush_tlb, 0, sizeof(flush_tlb));
191 max_page = nr_pages;
192 frame_table_size = nr_pages * sizeof(struct pfn_info);
193 frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK;
194 frame_table = (frame_table_t *)FRAMETABLE_VIRT_START;
195 memset(frame_table, 0, frame_table_size);
197 free_pfns = 0;
199 /* Put all domain-allocatable memory on a free list. */
200 spin_lock_irqsave(&free_list_lock, flags);
201 INIT_LIST_HEAD(&free_list);
202 for( page_index = (__pa(frame_table) + frame_table_size) >> PAGE_SHIFT;
203 page_index < nr_pages;
204 page_index++ )
205 {
206 pf = list_entry(&frame_table[page_index].list, struct pfn_info, list);
207 list_add_tail(&pf->list, &free_list);
208 free_pfns++;
209 }
210 spin_unlock_irqrestore(&free_list_lock, flags);
211 }
214 static void __invalidate_shadow_ldt(void)
215 {
216 int i;
217 unsigned long pfn;
218 struct pfn_info *page;
220 current->mm.shadow_ldt_mapcnt = 0;
222 for ( i = 16; i < 32; i++ )
223 {
224 pfn = l1_pgentry_to_pagenr(current->mm.perdomain_pt[i]);
225 if ( pfn == 0 ) continue;
226 current->mm.perdomain_pt[i] = mk_l1_pgentry(0);
227 page = frame_table + pfn;
228 ASSERT((page->flags & PG_type_mask) == PGT_ldt_page);
229 ASSERT((page->flags & PG_domain_mask) == current->domain);
230 ASSERT((page->type_count != 0) && (page->tot_count != 0));
231 put_page_type(page);
232 put_page_tot(page);
233 }
234 }
235 static inline void invalidate_shadow_ldt(void)
236 {
237 if ( current->mm.shadow_ldt_mapcnt != 0 )
238 __invalidate_shadow_ldt();
239 }
242 /* Return original refcnt, or -1 on error. */
243 static int inc_page_refcnt(unsigned long page_nr, unsigned int type)
244 {
245 struct pfn_info *page;
246 unsigned long flags;
248 if ( page_nr >= max_page )
249 {
250 MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
251 return -1;
252 }
253 page = frame_table + page_nr;
254 flags = page->flags;
255 if ( !DOMAIN_OKAY(flags) )
256 {
257 MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask);
258 return -1;
259 }
260 if ( (flags & PG_type_mask) != type )
261 {
262 if ( page_type_count(page) != 0 )
263 {
264 MEM_LOG("Page %08lx bad type/count (%08lx!=%08x) cnt=%ld",
265 page_nr << PAGE_SHIFT,
266 flags & PG_type_mask, type, page_type_count(page));
267 return -1;
268 }
270 if ( flags & PG_need_flush )
271 {
272 flush_tlb[smp_processor_id()] = 1;
273 page->flags &= ~PG_need_flush;
274 perfc_incrc(need_flush_tlb_flush);
275 }
277 page->flags &= ~PG_type_mask;
278 page->flags |= type;
279 }
281 get_page_tot(page);
282 return get_page_type(page);
283 }
286 /* Return new refcnt, or -1 on error. */
287 static int dec_page_refcnt(unsigned long page_nr, unsigned int type)
288 {
289 struct pfn_info *page;
291 if ( page_nr >= max_page )
292 {
293 MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
294 return -1;
295 }
296 page = frame_table + page_nr;
297 if ( !DOMAIN_OKAY(page->flags) ||
298 ((page->flags & PG_type_mask) != type) )
299 {
300 MEM_LOG("Bad page type/domain (dom=%ld) (type %ld != expected %d)",
301 page->flags & PG_domain_mask, page->flags & PG_type_mask,
302 type);
303 return -1;
304 }
305 ASSERT(page_type_count(page) != 0);
306 put_page_tot(page);
307 return put_page_type(page);
308 }
311 /* We allow a L2 table to map itself, to achieve a linear pagetable. */
312 /* NB. There's no need for a put_twisted_l2_table() function!! */
313 static int get_twisted_l2_table(unsigned long entry_pfn, l2_pgentry_t l2e)
314 {
315 unsigned long l2v = l2_pgentry_val(l2e);
317 /* Clearly the mapping must be read-only :-) */
318 if ( (l2v & _PAGE_RW) )
319 {
320 MEM_LOG("Attempt to install twisted L2 entry with write permissions");
321 return -1;
322 }
324 /* This is a sufficient final check. */
325 if ( (l2v >> PAGE_SHIFT) != entry_pfn )
326 {
327 MEM_LOG("L2 tables may not map _other_ L2 tables!\n");
328 return -1;
329 }
331 /* We don't bump the reference counts. */
332 return 0;
333 }
336 static int get_l2_table(unsigned long page_nr)
337 {
338 l2_pgentry_t *p_l2_entry, l2_entry;
339 int i, ret=0;
341 ret = inc_page_refcnt(page_nr, PGT_l2_page_table);
342 if ( ret != 0 ) return (ret < 0) ? ret : 0;
344 /* NEW level-2 page table! Deal with every PDE in the table. */
345 p_l2_entry = map_domain_mem(page_nr << PAGE_SHIFT);
346 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
347 {
348 l2_entry = *p_l2_entry++;
349 if ( !(l2_pgentry_val(l2_entry) & _PAGE_PRESENT) ) continue;
350 if ( (l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) )
351 {
352 MEM_LOG("Bad L2 page type settings %04lx",
353 l2_pgentry_val(l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE));
354 ret = -1;
355 goto out;
356 }
357 /* Assume we're mapping an L1 table, falling back to twisted L2. */
358 ret = get_l1_table(l2_pgentry_to_pagenr(l2_entry));
359 if ( ret ) ret = get_twisted_l2_table(page_nr, l2_entry);
360 if ( ret ) goto out;
361 }
363 /* Now we simply slap in our high mapping. */
364 memcpy(p_l2_entry,
365 idle_pg_table[smp_processor_id()] + DOMAIN_ENTRIES_PER_L2_PAGETABLE,
366 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
367 p_l2_entry[(PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT) -
368 DOMAIN_ENTRIES_PER_L2_PAGETABLE] =
369 mk_l2_pgentry(__pa(current->mm.perdomain_pt) | __PAGE_HYPERVISOR);
371 out:
372 unmap_domain_mem(p_l2_entry);
373 return ret;
374 }
377 static int get_l1_table(unsigned long page_nr)
378 {
379 l1_pgentry_t *p_l1_entry, l1_entry;
380 int i, ret;
382 /* Update ref count for page pointed at by PDE. */
383 ret = inc_page_refcnt(page_nr, PGT_l1_page_table);
384 if ( ret != 0 ) return (ret < 0) ? ret : 0;
386 /* NEW level-1 page table! Deal with every PTE in the table. */
387 p_l1_entry = map_domain_mem(page_nr << PAGE_SHIFT);
388 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
389 {
390 l1_entry = *p_l1_entry++;
391 if ( !(l1_pgentry_val(l1_entry) & _PAGE_PRESENT) ) continue;
392 if ( (l1_pgentry_val(l1_entry) &
393 (_PAGE_GLOBAL|_PAGE_PAT)) )
394 {
395 MEM_LOG("Bad L1 page type settings %04lx",
396 l1_pgentry_val(l1_entry) &
397 (_PAGE_GLOBAL|_PAGE_PAT));
398 ret = -1;
399 goto out;
400 }
401 ret = get_page(l1_pgentry_to_pagenr(l1_entry),
402 l1_pgentry_val(l1_entry) & _PAGE_RW);
403 if ( ret ) goto out;
404 }
406 out:
407 /* Make sure we unmap the right page! */
408 unmap_domain_mem(p_l1_entry-1);
409 return ret;
410 }
413 static int get_page(unsigned long page_nr, int writeable)
414 {
415 struct pfn_info *page;
416 unsigned long flags;
418 /* Update ref count for page pointed at by PTE. */
419 if ( page_nr >= max_page )
420 {
421 MEM_LOG("Page out of range (%08lx>%08lx)", page_nr, max_page);
422 return(-1);
423 }
424 page = frame_table + page_nr;
425 flags = page->flags;
426 if ( !DOMAIN_OKAY(flags) )
427 {
428 MEM_LOG("Bad page domain (%ld)", flags & PG_domain_mask);
429 return(-1);
430 }
432 if ( writeable )
433 {
434 if ( (flags & PG_type_mask) != PGT_writeable_page )
435 {
436 if ( page_type_count(page) != 0 )
437 {
438 MEM_LOG("Bad page type/count (%08lx!=%08x) cnt=%ld",
439 flags & PG_type_mask, PGT_writeable_page,
440 page_type_count(page));
441 return(-1);
442 }
443 page->flags &= ~PG_type_mask;
444 page->flags |= PGT_writeable_page;
445 }
446 page->flags |= PG_need_flush;
447 get_page_type(page);
448 }
450 get_page_tot(page);
452 return(0);
453 }
456 static void put_l2_table(unsigned long page_nr)
457 {
458 l2_pgentry_t *p_l2_entry, l2_entry;
459 int i;
461 if ( dec_page_refcnt(page_nr, PGT_l2_page_table) ) return;
463 /* We had last reference to level-2 page table. Free the PDEs. */
464 p_l2_entry = map_domain_mem(page_nr << PAGE_SHIFT);
465 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
466 {
467 l2_entry = *p_l2_entry++;
468 if ( (l2_pgentry_val(l2_entry) & _PAGE_PRESENT) )
469 put_l1_table(l2_pgentry_to_pagenr(l2_entry));
470 }
472 unmap_domain_mem(p_l2_entry);
473 }
476 static void put_l1_table(unsigned long page_nr)
477 {
478 l1_pgentry_t *p_l1_entry, l1_entry;
479 int i;
481 if ( dec_page_refcnt(page_nr, PGT_l1_page_table) ) return;
483 /* We had last reference to level-1 page table. Free the PTEs. */
484 p_l1_entry = map_domain_mem(page_nr << PAGE_SHIFT);
485 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
486 {
487 l1_entry = *p_l1_entry++;
488 if ( (l1_pgentry_val(l1_entry) & _PAGE_PRESENT) )
489 {
490 put_page(l1_pgentry_to_pagenr(l1_entry),
491 l1_pgentry_val(l1_entry) & _PAGE_RW);
492 }
493 }
495 /* Make sure we unmap the right page! */
496 unmap_domain_mem(p_l1_entry-1);
497 }
500 static void put_page(unsigned long page_nr, int writeable)
501 {
502 struct pfn_info *page;
503 ASSERT(page_nr < max_page);
504 page = frame_table + page_nr;
505 ASSERT(DOMAIN_OKAY(page->flags));
506 ASSERT((!writeable) ||
507 ((page_type_count(page) != 0) &&
508 ((page->flags & PG_type_mask) == PGT_writeable_page) &&
509 ((page->flags & PG_need_flush) == PG_need_flush)));
510 if ( writeable )
511 {
512 put_page_type(page);
513 }
514 else if ( unlikely(((page->flags & PG_type_mask) == PGT_ldt_page) &&
515 (page_type_count(page) != 0)) )
516 {
517 /* We expect this is rare so we just blow the entire shadow LDT. */
518 invalidate_shadow_ldt();
519 }
520 put_page_tot(page);
521 }
524 static int mod_l2_entry(unsigned long pa, l2_pgentry_t new_l2_entry)
525 {
526 l2_pgentry_t *p_l2_entry, old_l2_entry;
528 p_l2_entry = map_domain_mem(pa);
529 old_l2_entry = *p_l2_entry;
531 if ( (((unsigned long)p_l2_entry & (PAGE_SIZE-1)) >> 2) >=
532 DOMAIN_ENTRIES_PER_L2_PAGETABLE )
533 {
534 MEM_LOG("Illegal L2 update attempt in hypervisor area %p",
535 p_l2_entry);
536 goto fail;
537 }
539 if ( (l2_pgentry_val(new_l2_entry) & _PAGE_PRESENT) )
540 {
541 if ( (l2_pgentry_val(new_l2_entry) & (_PAGE_GLOBAL|_PAGE_PSE)) )
542 {
543 MEM_LOG("Bad L2 entry val %04lx",
544 l2_pgentry_val(new_l2_entry) &
545 (_PAGE_GLOBAL|_PAGE_PSE));
546 goto fail;
547 }
548 /* Differ in mapping (bits 12-31) or presence (bit 0)? */
549 if ( ((l2_pgentry_val(old_l2_entry) ^
550 l2_pgentry_val(new_l2_entry)) & 0xfffff001) != 0 )
551 {
552 if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) )
553 {
554 put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
555 }
557 /* Assume we're mapping an L1 table, falling back to twisted L2. */
558 if ( get_l1_table(l2_pgentry_to_pagenr(new_l2_entry)) &&
559 get_twisted_l2_table(pa >> PAGE_SHIFT, new_l2_entry) )
560 goto fail;
561 }
562 }
563 else if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) )
564 {
565 put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
566 }
568 *p_l2_entry = new_l2_entry;
569 unmap_domain_mem(p_l2_entry);
570 return 0;
572 fail:
573 unmap_domain_mem(p_l2_entry);
574 return -1;
575 }
578 static int mod_l1_entry(unsigned long pa, l1_pgentry_t new_l1_entry)
579 {
580 l1_pgentry_t *p_l1_entry, old_l1_entry;
582 p_l1_entry = map_domain_mem(pa);
583 old_l1_entry = *p_l1_entry;
585 if ( (l1_pgentry_val(new_l1_entry) & _PAGE_PRESENT) )
586 {
587 if ( (l1_pgentry_val(new_l1_entry) &
588 (_PAGE_GLOBAL|_PAGE_PAT)) )
589 {
591 MEM_LOG("Bad L1 entry val %04lx",
592 l1_pgentry_val(new_l1_entry) &
593 (_PAGE_GLOBAL|_PAGE_PAT));
594 goto fail;
595 }
596 /*
597 * Differ in mapping (bits 12-31), writeable (bit 1), or
598 * presence (bit 0)?
599 */
600 if ( ((l1_pgentry_val(old_l1_entry) ^
601 l1_pgentry_val(new_l1_entry)) & 0xfffff003) != 0 )
602 {
603 if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) )
604 {
605 put_page(l1_pgentry_to_pagenr(old_l1_entry),
606 l1_pgentry_val(old_l1_entry) & _PAGE_RW);
607 }
609 if ( get_page(l1_pgentry_to_pagenr(new_l1_entry),
610 l1_pgentry_val(new_l1_entry) & _PAGE_RW) ){
611 goto fail;
612 }
613 }
614 }
615 else if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) )
616 {
617 put_page(l1_pgentry_to_pagenr(old_l1_entry),
618 l1_pgentry_val(old_l1_entry) & _PAGE_RW);
619 }
621 *p_l1_entry = new_l1_entry;
622 unmap_domain_mem(p_l1_entry);
623 return 0;
625 fail:
626 unmap_domain_mem(p_l1_entry);
627 return -1;
628 }
631 static int do_extended_command(unsigned long ptr, unsigned long val)
632 {
633 int err = 0;
634 unsigned int cmd = val & PGEXT_CMD_MASK;
635 unsigned long pfn = ptr >> PAGE_SHIFT;
636 struct pfn_info *page = frame_table + pfn;
638 /* 'ptr' must be in range except where it isn't a machine address. */
639 if ( (pfn >= max_page) && (cmd != PGEXT_SET_LDT) )
640 return 1;
642 switch ( cmd )
643 {
644 case PGEXT_PIN_L1_TABLE:
645 err = get_l1_table(pfn);
646 goto mark_as_pinned;
647 case PGEXT_PIN_L2_TABLE:
648 err = get_l2_table(pfn);
649 mark_as_pinned:
650 if ( err )
651 {
652 MEM_LOG("Error while pinning pfn %08lx", pfn);
653 break;
654 }
655 put_page_type(page);
656 put_page_tot(page);
657 if ( !(page->type_count & REFCNT_PIN_BIT) )
658 {
659 page->type_count |= REFCNT_PIN_BIT;
660 page->tot_count |= REFCNT_PIN_BIT;
661 }
662 else
663 {
664 MEM_LOG("Pfn %08lx already pinned", pfn);
665 err = 1;
666 }
667 break;
669 case PGEXT_UNPIN_TABLE:
670 if ( !DOMAIN_OKAY(page->flags) )
671 {
672 err = 1;
673 MEM_LOG("Page %08lx bad domain (dom=%ld)",
674 ptr, page->flags & PG_domain_mask);
675 }
676 else if ( (page->type_count & REFCNT_PIN_BIT) )
677 {
678 page->type_count &= ~REFCNT_PIN_BIT;
679 page->tot_count &= ~REFCNT_PIN_BIT;
680 get_page_type(page);
681 get_page_tot(page);
682 ((page->flags & PG_type_mask) == PGT_l1_page_table) ?
683 put_l1_table(pfn) : put_l2_table(pfn);
684 }
685 else
686 {
687 err = 1;
688 MEM_LOG("Pfn %08lx not pinned", pfn);
689 }
690 break;
692 case PGEXT_NEW_BASEPTR:
693 err = get_l2_table(pfn);
694 if ( !err )
695 {
696 put_l2_table(pagetable_val(current->mm.pagetable) >> PAGE_SHIFT);
697 current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
698 invalidate_shadow_ldt();
699 flush_tlb[smp_processor_id()] = 1;
700 }
701 else
702 {
703 MEM_LOG("Error while installing new baseptr %08lx %d", ptr, err);
704 }
705 break;
707 case PGEXT_TLB_FLUSH:
708 flush_tlb[smp_processor_id()] = 1;
709 break;
711 case PGEXT_INVLPG:
712 __flush_tlb_one(val & ~PGEXT_CMD_MASK);
713 break;
715 case PGEXT_SET_LDT:
716 {
717 unsigned long ents = val >> PGEXT_CMD_SHIFT;
718 if ( ((ptr & (PAGE_SIZE-1)) != 0) ||
719 (ents > 8192) ||
720 ((ptr+ents*LDT_ENTRY_SIZE) < ptr) ||
721 ((ptr+ents*LDT_ENTRY_SIZE) > PAGE_OFFSET) )
722 {
723 err = 1;
724 MEM_LOG("Bad args to SET_LDT: ptr=%08lx, ents=%08lx", ptr, ents);
725 }
726 else if ( (current->mm.ldt_ents != ents) ||
727 (current->mm.ldt_base != ptr) )
728 {
729 if ( current->mm.ldt_ents != 0 )
730 {
731 invalidate_shadow_ldt();
732 flush_tlb[smp_processor_id()] = 1;
733 }
734 current->mm.ldt_base = ptr;
735 current->mm.ldt_ents = ents;
736 load_LDT();
737 }
738 break;
739 }
741 default:
742 MEM_LOG("Invalid extended pt command 0x%08lx", val & PGEXT_CMD_MASK);
743 err = 1;
744 break;
745 }
747 return err;
748 }
751 int do_process_page_updates(page_update_request_t *ureqs, int count)
752 {
753 page_update_request_t req;
754 unsigned long flags, pfn, *ptr;
755 struct pfn_info *page;
756 int err = 0, i;
757 unsigned int cmd;
759 for ( i = 0; i < count; i++ )
760 {
761 if ( copy_from_user(&req, ureqs, sizeof(req)) )
762 {
763 kill_domain_with_errmsg("Cannot read page update request");
764 }
766 cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
768 /* All normal commands must have 'ptr' in range. */
769 pfn = req.ptr >> PAGE_SHIFT;
770 if ( (pfn >= max_page) && (cmd != PGREQ_EXTENDED_COMMAND) )
771 {
772 MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page);
773 kill_domain_with_errmsg("Page update request out of range");
774 }
776 err = 1;
778 /* Least significant bits of 'ptr' demux the operation type. */
779 spin_lock_irq(&current->page_lock);
780 switch ( cmd )
781 {
782 /*
783 * PGREQ_NORMAL: Normal update to any level of page table.
784 */
785 case PGREQ_NORMAL:
786 page = frame_table + pfn;
787 flags = page->flags;
789 if ( DOMAIN_OKAY(flags) )
790 {
791 switch ( (flags & PG_type_mask) )
792 {
793 case PGT_l1_page_table:
794 err = mod_l1_entry(req.ptr, mk_l1_pgentry(req.val));
795 break;
796 case PGT_l2_page_table:
797 err = mod_l2_entry(req.ptr, mk_l2_pgentry(req.val));
798 break;
799 default:
800 MEM_LOG("Update to non-pt page %08lx", req.ptr);
801 ptr = map_domain_mem(req.ptr);
802 *ptr = req.val;
803 unmap_domain_mem(ptr);
804 err = 0;
805 break;
806 }
807 }
808 else
809 {
810 MEM_LOG("Bad domain normal update (dom %d, pfn %ld)",
811 current->domain, pfn);
812 }
813 break;
815 case PGREQ_MPT_UPDATE:
816 page = frame_table + pfn;
817 if ( DOMAIN_OKAY(page->flags) )
818 {
819 machine_to_phys_mapping[pfn] = req.val;
820 err = 0;
821 }
822 else
823 {
824 MEM_LOG("Bad domain MPT update (dom %d, pfn %ld)",
825 current->domain, pfn);
826 }
827 break;
829 /*
830 * PGREQ_EXTENDED_COMMAND: Extended command is specified
831 * in the least-siginificant bits of the 'value' field.
832 */
833 case PGREQ_EXTENDED_COMMAND:
834 req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
835 err = do_extended_command(req.ptr, req.val);
836 break;
838 case PGREQ_UNCHECKED_UPDATE:
839 req.ptr &= ~(sizeof(l1_pgentry_t) - 1);
840 if ( current->domain == 0 )
841 {
842 ptr = map_domain_mem(req.ptr);
843 *ptr = req.val;
844 unmap_domain_mem(ptr);
845 err = 0;
846 }
847 else
848 {
849 MEM_LOG("Bad unchecked update attempt");
850 }
851 break;
853 default:
854 MEM_LOG("Invalid page update command %08lx", req.ptr);
855 break;
856 }
857 spin_unlock_irq(&current->page_lock);
859 if ( err )
860 {
861 kill_domain_with_errmsg("Illegal page update request");
862 }
864 ureqs++;
865 }
867 if ( flush_tlb[smp_processor_id()] )
868 {
869 flush_tlb[smp_processor_id()] = 0;
870 __write_cr3_counted(pagetable_val(current->mm.pagetable));
872 }
874 return 0;
875 }