ia64/xen-unstable

view xen/arch/x86/mm.c @ 8431:8b322047c80f

Remove unused local variables (from removed REASIGN_PAGE
function).

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Dec 21 20:17:22 2005 +0100 (2005-12-21)
parents a4de51a2629f
children d966b7a00959
line source
1 /******************************************************************************
2 * arch/x86/mm.c
3 *
4 * Copyright (c) 2002-2005 K A Fraser
5 * Copyright (c) 2004 Christian Limpach
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
22 /*
23 * A description of the x86 page table API:
24 *
25 * Domains trap to do_mmu_update with a list of update requests.
26 * This is a list of (ptr, val) pairs, where the requested operation
27 * is *ptr = val.
28 *
29 * Reference counting of pages:
30 * ----------------------------
31 * Each page has two refcounts: tot_count and type_count.
32 *
33 * TOT_COUNT is the obvious reference count. It counts all uses of a
34 * physical page frame by a domain, including uses as a page directory,
35 * a page table, or simple mappings via a PTE. This count prevents a
36 * domain from releasing a frame back to the free pool when it still holds
37 * a reference to it.
38 *
39 * TYPE_COUNT is more subtle. A frame can be put to one of three
40 * mutually-exclusive uses: it might be used as a page directory, or a
41 * page table, or it may be mapped writable by the domain [of course, a
42 * frame may not be used in any of these three ways!].
43 * So, type_count is a count of the number of times a frame is being
44 * referred to in its current incarnation. Therefore, a page can only
45 * change its type when its type count is zero.
46 *
47 * Pinning the page type:
48 * ----------------------
49 * The type of a page can be pinned/unpinned with the commands
50 * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
51 * pinning is not reference counted, so it can't be nested).
52 * This is useful to prevent a page's type count falling to zero, at which
53 * point safety checks would need to be carried out next time the count
54 * is increased again.
55 *
56 * A further note on writable page mappings:
57 * -----------------------------------------
58 * For simplicity, the count of writable mappings for a page may not
59 * correspond to reality. The 'writable count' is incremented for every
60 * PTE which maps the page with the _PAGE_RW flag set. However, for
61 * write access to be possible the page directory entry must also have
62 * its _PAGE_RW bit set. We do not check this as it complicates the
63 * reference counting considerably [consider the case of multiple
64 * directory entries referencing a single page table, some with the RW
65 * bit set, others not -- it starts getting a bit messy].
66 * In normal use, this simplification shouldn't be a problem.
67 * However, the logic can be added if required.
68 *
69 * One more note on read-only page mappings:
70 * -----------------------------------------
71 * We want domains to be able to map pages for read-only access. The
72 * main reason is that page tables and directories should be readable
73 * by a domain, but it would not be safe for them to be writable.
74 * However, domains have free access to rings 1 & 2 of the Intel
75 * privilege model. In terms of page protection, these are considered
76 * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
77 * read-only restrictions are respected in supervisor mode -- if the
78 * bit is clear then any mapped page is writable.
79 *
80 * We get round this by always setting the WP bit and disallowing
81 * updates to it. This is very unlikely to cause a problem for guest
82 * OS's, which will generally use the WP bit to simplify copy-on-write
83 * implementation (in that case, OS wants a fault when it writes to
84 * an application-supplied buffer).
85 */
87 #include <xen/config.h>
88 #include <xen/init.h>
89 #include <xen/kernel.h>
90 #include <xen/lib.h>
91 #include <xen/mm.h>
92 #include <xen/sched.h>
93 #include <xen/errno.h>
94 #include <xen/perfc.h>
95 #include <xen/irq.h>
96 #include <xen/softirq.h>
97 #include <xen/domain_page.h>
98 #include <xen/event.h>
99 #include <asm/shadow.h>
100 #include <asm/page.h>
101 #include <asm/flushtlb.h>
102 #include <asm/io.h>
103 #include <asm/uaccess.h>
104 #include <asm/ldt.h>
105 #include <asm/x86_emulate.h>
107 #ifdef VERBOSE
108 #define MEM_LOG(_f, _a...) \
109 printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
110 current->domain->domain_id , __LINE__ , ## _a )
111 #else
112 #define MEM_LOG(_f, _a...) ((void)0)
113 #endif
115 /*
116 * Both do_mmuext_op() and do_mmu_update():
117 * We steal the m.s.b. of the @count parameter to indicate whether this
118 * invocation of do_mmu_update() is resuming a previously preempted call.
119 */
120 #define MMU_UPDATE_PREEMPTED (~(~0U>>1))
122 static void free_l2_table(struct pfn_info *page);
123 static void free_l1_table(struct pfn_info *page);
125 static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long,
126 unsigned long type);
127 static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t);
129 /* Used to defer flushing of memory structures. */
130 static struct {
131 #define DOP_FLUSH_TLB (1<<0) /* Flush the local TLB. */
132 #define DOP_FLUSH_ALL_TLBS (1<<1) /* Flush TLBs of all VCPUs of current dom. */
133 #define DOP_RELOAD_LDT (1<<2) /* Reload the LDT shadow mapping. */
134 unsigned int deferred_ops;
135 /* If non-NULL, specifies a foreign subject domain for some operations. */
136 struct domain *foreign;
137 } __cacheline_aligned percpu_info[NR_CPUS];
139 /*
140 * Returns the current foreign domain; defaults to the currently-executing
141 * domain if a foreign override hasn't been specified.
142 */
143 #define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ?: current->domain)
145 /* Private domain structs for DOMID_XEN and DOMID_IO. */
146 static struct domain *dom_xen, *dom_io;
148 /* Frame table and its size in pages. */
149 struct pfn_info *frame_table;
150 unsigned long max_page;
151 unsigned long total_pages;
153 void __init init_frametable(void)
154 {
155 unsigned long nr_pages, page_step, i, pfn;
157 frame_table = (struct pfn_info *)FRAMETABLE_VIRT_START;
159 nr_pages = PFN_UP(max_page * sizeof(*frame_table));
160 page_step = (1 << L2_PAGETABLE_SHIFT) >> PAGE_SHIFT;
162 for ( i = 0; i < nr_pages; i += page_step )
163 {
164 pfn = alloc_boot_pages(min(nr_pages - i, page_step), page_step);
165 if ( pfn == 0 )
166 panic("Not enough memory for frame table\n");
167 map_pages_to_xen(
168 FRAMETABLE_VIRT_START + (i << PAGE_SHIFT),
169 pfn, page_step, PAGE_HYPERVISOR);
170 }
172 memset(frame_table, 0, nr_pages << PAGE_SHIFT);
173 }
175 void arch_init_memory(void)
176 {
177 extern void subarch_init_memory(struct domain *);
179 unsigned long i, pfn, rstart_pfn, rend_pfn;
180 struct pfn_info *page;
182 memset(percpu_info, 0, sizeof(percpu_info));
184 /*
185 * Initialise our DOMID_XEN domain.
186 * Any Xen-heap pages that we will allow to be mapped will have
187 * their domain field set to dom_xen.
188 */
189 dom_xen = alloc_domain();
190 atomic_set(&dom_xen->refcnt, 1);
191 dom_xen->domain_id = DOMID_XEN;
193 /*
194 * Initialise our DOMID_IO domain.
195 * This domain owns I/O pages that are within the range of the pfn_info
196 * array. Mappings occur at the priv of the caller.
197 */
198 dom_io = alloc_domain();
199 atomic_set(&dom_io->refcnt, 1);
200 dom_io->domain_id = DOMID_IO;
202 /* First 1MB of RAM is historically marked as I/O. */
203 for ( i = 0; i < 0x100; i++ )
204 {
205 page = pfn_to_page(i);
206 page->count_info = PGC_allocated | 1;
207 page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1;
208 page_set_owner(page, dom_io);
209 }
211 /* Any areas not specified as RAM by the e820 map are considered I/O. */
212 for ( i = 0, pfn = 0; i < e820.nr_map; i++ )
213 {
214 if ( e820.map[i].type != E820_RAM )
215 continue;
216 /* Every page from cursor to start of next RAM region is I/O. */
217 rstart_pfn = PFN_UP(e820.map[i].addr);
218 rend_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
219 for ( ; pfn < rstart_pfn; pfn++ )
220 {
221 BUG_ON(!pfn_valid(pfn));
222 page = pfn_to_page(pfn);
223 page->count_info = PGC_allocated | 1;
224 page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1;
225 page_set_owner(page, dom_io);
226 }
227 /* Skip the RAM region. */
228 pfn = rend_pfn;
229 }
230 BUG_ON(pfn != max_page);
232 subarch_init_memory(dom_xen);
233 }
235 void write_ptbase(struct vcpu *v)
236 {
237 write_cr3(pagetable_get_paddr(v->arch.monitor_table));
238 }
240 void invalidate_shadow_ldt(struct vcpu *v)
241 {
242 int i;
243 unsigned long pfn;
244 struct pfn_info *page;
246 if ( v->arch.shadow_ldt_mapcnt == 0 )
247 return;
249 v->arch.shadow_ldt_mapcnt = 0;
251 for ( i = 16; i < 32; i++ )
252 {
253 pfn = l1e_get_pfn(v->arch.perdomain_ptes[i]);
254 if ( pfn == 0 ) continue;
255 v->arch.perdomain_ptes[i] = l1e_empty();
256 page = pfn_to_page(pfn);
257 ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
258 ASSERT_PAGE_IS_DOMAIN(page, v->domain);
259 put_page_and_type(page);
260 }
262 /* Dispose of the (now possibly invalid) mappings from the TLB. */
263 percpu_info[v->processor].deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT;
264 }
267 static int alloc_segdesc_page(struct pfn_info *page)
268 {
269 struct desc_struct *descs;
270 int i;
272 descs = map_domain_page(page_to_pfn(page));
274 for ( i = 0; i < 512; i++ )
275 if ( unlikely(!check_descriptor(&descs[i])) )
276 goto fail;
278 unmap_domain_page(descs);
279 return 1;
281 fail:
282 unmap_domain_page(descs);
283 return 0;
284 }
287 /* Map shadow page at offset @off. */
288 int map_ldt_shadow_page(unsigned int off)
289 {
290 struct vcpu *v = current;
291 struct domain *d = v->domain;
292 unsigned long gpfn, gmfn;
293 l1_pgentry_t l1e, nl1e;
294 unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT);
295 int res;
297 #if defined(__x86_64__)
298 /* If in user mode, switch to kernel mode just to read LDT mapping. */
299 extern void toggle_guest_mode(struct vcpu *);
300 int user_mode = !(v->arch.flags & TF_kernel_mode);
301 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
302 #elif defined(__i386__)
303 #define TOGGLE_MODE() ((void)0)
304 #endif
306 BUG_ON(unlikely(in_irq()));
308 shadow_sync_va(v, gva);
310 TOGGLE_MODE();
311 __copy_from_user(&l1e, &linear_pg_table[l1_linear_offset(gva)],
312 sizeof(l1e));
313 TOGGLE_MODE();
315 if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
316 return 0;
318 gpfn = l1e_get_pfn(l1e);
319 gmfn = __gpfn_to_mfn(d, gpfn);
320 if ( unlikely(!VALID_MFN(gmfn)) )
321 return 0;
323 res = get_page_and_type(pfn_to_page(gmfn), d, PGT_ldt_page);
325 if ( !res && unlikely(shadow_mode_refcounts(d)) )
326 {
327 shadow_lock(d);
328 shadow_remove_all_write_access(d, gpfn, gmfn);
329 res = get_page_and_type(pfn_to_page(gmfn), d, PGT_ldt_page);
330 shadow_unlock(d);
331 }
333 if ( unlikely(!res) )
334 return 0;
336 nl1e = l1e_from_pfn(gmfn, l1e_get_flags(l1e) | _PAGE_RW);
338 v->arch.perdomain_ptes[off + 16] = nl1e;
339 v->arch.shadow_ldt_mapcnt++;
341 return 1;
342 }
345 static int get_page_from_pagenr(unsigned long page_nr, struct domain *d)
346 {
347 struct pfn_info *page = pfn_to_page(page_nr);
349 if ( unlikely(!pfn_valid(page_nr)) || unlikely(!get_page(page, d)) )
350 {
351 MEM_LOG("Could not get page ref for pfn %lx", page_nr);
352 return 0;
353 }
355 return 1;
356 }
359 static int get_page_and_type_from_pagenr(unsigned long page_nr,
360 unsigned long type,
361 struct domain *d)
362 {
363 struct pfn_info *page = pfn_to_page(page_nr);
365 if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
366 return 0;
368 if ( unlikely(!get_page_type(page, type)) )
369 {
370 put_page(page);
371 return 0;
372 }
374 return 1;
375 }
377 /*
378 * We allow root tables to map each other (a.k.a. linear page tables). It
379 * needs some special care with reference counts and access permissions:
380 * 1. The mapping entry must be read-only, or the guest may get write access
381 * to its own PTEs.
382 * 2. We must only bump the reference counts for an *already validated*
383 * L2 table, or we can end up in a deadlock in get_page_type() by waiting
384 * on a validation that is required to complete that validation.
385 * 3. We only need to increment the reference counts for the mapped page
386 * frame if it is mapped by a different root table. This is sufficient and
387 * also necessary to allow validation of a root table mapping itself.
388 */
389 static int
390 get_linear_pagetable(
391 root_pgentry_t re, unsigned long re_pfn, struct domain *d)
392 {
393 unsigned long x, y;
394 struct pfn_info *page;
395 unsigned long pfn;
397 ASSERT( !shadow_mode_refcounts(d) );
399 if ( (root_get_flags(re) & _PAGE_RW) )
400 {
401 MEM_LOG("Attempt to create linear p.t. with write perms");
402 return 0;
403 }
405 if ( (pfn = root_get_pfn(re)) != re_pfn )
406 {
407 /* Make sure the mapped frame belongs to the correct domain. */
408 if ( unlikely(!get_page_from_pagenr(pfn, d)) )
409 return 0;
411 /*
412 * Make sure that the mapped frame is an already-validated L2 table.
413 * If so, atomically increment the count (checking for overflow).
414 */
415 page = pfn_to_page(pfn);
416 y = page->u.inuse.type_info;
417 do {
418 x = y;
419 if ( unlikely((x & PGT_count_mask) == PGT_count_mask) ||
420 unlikely((x & (PGT_type_mask|PGT_validated)) !=
421 (PGT_root_page_table|PGT_validated)) )
422 {
423 put_page(page);
424 return 0;
425 }
426 }
427 while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x );
428 }
430 return 1;
431 }
433 int
434 get_page_from_l1e(
435 l1_pgentry_t l1e, struct domain *d)
436 {
437 unsigned long mfn = l1e_get_pfn(l1e);
438 struct pfn_info *page = pfn_to_page(mfn);
439 int okay;
440 extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn);
442 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
443 return 1;
445 if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
446 {
447 MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
448 return 0;
449 }
451 if ( unlikely(!pfn_valid(mfn)) ||
452 unlikely(page_get_owner(page) == dom_io) )
453 {
454 /* DOMID_IO reverts to caller for privilege checks. */
455 if ( d == dom_io )
456 d = current->domain;
458 if ( (!IS_PRIV(d)) &&
459 (!IS_CAPABLE_PHYSDEV(d) || !domain_iomem_in_pfn(d, mfn)) )
460 {
461 MEM_LOG("Non-privileged attempt to map I/O space %08lx", mfn);
462 return 0;
463 }
465 /* No reference counting for out-of-range I/O pages. */
466 if ( !pfn_valid(mfn) )
467 return 1;
469 d = dom_io;
470 }
472 okay = ((l1e_get_flags(l1e) & _PAGE_RW) ?
473 get_page_and_type(page, d, PGT_writable_page) :
474 get_page(page, d));
475 if ( !okay )
476 {
477 MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
478 " for dom%d",
479 mfn, get_pfn_from_mfn(mfn), l1e_get_intpte(l1e), d->domain_id);
480 }
482 return okay;
483 }
486 /* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
487 static int
488 get_page_from_l2e(
489 l2_pgentry_t l2e, unsigned long pfn,
490 struct domain *d, unsigned long vaddr)
491 {
492 int rc;
494 ASSERT(!shadow_mode_refcounts(d));
496 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
497 return 1;
499 if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
500 {
501 MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
502 return 0;
503 }
505 vaddr >>= L2_PAGETABLE_SHIFT;
506 vaddr <<= PGT_va_shift;
507 rc = get_page_and_type_from_pagenr(
508 l2e_get_pfn(l2e), PGT_l1_page_table | vaddr, d);
510 #if CONFIG_PAGING_LEVELS == 2
511 if ( unlikely(!rc) )
512 rc = get_linear_pagetable(l2e, pfn, d);
513 #endif
514 return rc;
515 }
518 #if CONFIG_PAGING_LEVELS >= 3
520 static int
521 get_page_from_l3e(
522 l3_pgentry_t l3e, unsigned long pfn,
523 struct domain *d, unsigned long vaddr)
524 {
525 int rc;
527 ASSERT(!shadow_mode_refcounts(d));
529 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
530 return 1;
532 if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) )
533 {
534 MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
535 return 0;
536 }
538 vaddr >>= L3_PAGETABLE_SHIFT;
539 vaddr <<= PGT_va_shift;
540 rc = get_page_and_type_from_pagenr(
541 l3e_get_pfn(l3e),
542 PGT_l2_page_table | vaddr, d);
543 #if CONFIG_PAGING_LEVELS == 3
544 if ( unlikely(!rc) )
545 rc = get_linear_pagetable(l3e, pfn, d);
546 #endif
547 return rc;
548 }
550 #endif /* 3 level */
552 #if CONFIG_PAGING_LEVELS >= 4
554 static int
555 get_page_from_l4e(
556 l4_pgentry_t l4e, unsigned long pfn,
557 struct domain *d, unsigned long vaddr)
558 {
559 int rc;
561 ASSERT( !shadow_mode_refcounts(d) );
563 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
564 return 1;
566 if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
567 {
568 MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
569 return 0;
570 }
572 vaddr >>= L4_PAGETABLE_SHIFT;
573 vaddr <<= PGT_va_shift;
574 rc = get_page_and_type_from_pagenr(
575 l4e_get_pfn(l4e),
576 PGT_l3_page_table | vaddr, d);
578 if ( unlikely(!rc) )
579 rc = get_linear_pagetable(l4e, pfn, d);
581 return rc;
582 }
584 #endif /* 4 level */
587 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
588 {
589 unsigned long pfn = l1e_get_pfn(l1e);
590 struct pfn_info *page = pfn_to_page(pfn);
591 struct domain *e;
592 struct vcpu *v;
594 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !pfn_valid(pfn) )
595 return;
597 e = page_get_owner(page);
599 /*
600 * Check if this is a mapping that was established via a grant reference.
601 * If it was then we should not be here: we require that such mappings are
602 * explicitly destroyed via the grant-table interface.
603 *
604 * The upshot of this is that the guest can end up with active grants that
605 * it cannot destroy (because it no longer has a PTE to present to the
606 * grant-table interface). This can lead to subtle hard-to-catch bugs,
607 * hence a special grant PTE flag can be enabled to catch the bug early.
608 *
609 * (Note that the undestroyable active grants are not a security hole in
610 * Xen. All active grants can safely be cleaned up when the domain dies.)
611 */
612 if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) &&
613 !(d->domain_flags & (DOMF_shutdown|DOMF_dying)) )
614 {
615 MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte,
616 l1e_get_intpte(l1e));
617 domain_crash(d);
618 }
620 if ( l1e_get_flags(l1e) & _PAGE_RW )
621 {
622 put_page_and_type(page);
623 }
624 else
625 {
626 /* We expect this is rare so we blow the entire shadow LDT. */
627 if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) ==
628 PGT_ldt_page)) &&
629 unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) &&
630 (d == e) )
631 {
632 for_each_vcpu ( d, v )
633 invalidate_shadow_ldt(v);
634 }
635 put_page(page);
636 }
637 }
640 /*
641 * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
642 * Note also that this automatically deals correctly with linear p.t.'s.
643 */
644 static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
645 {
646 if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
647 (l2e_get_pfn(l2e) != pfn) )
648 put_page_and_type(pfn_to_page(l2e_get_pfn(l2e)));
649 }
652 #if CONFIG_PAGING_LEVELS >= 3
654 static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
655 {
656 if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
657 (l3e_get_pfn(l3e) != pfn) )
658 put_page_and_type(pfn_to_page(l3e_get_pfn(l3e)));
659 }
661 #endif
663 #if CONFIG_PAGING_LEVELS >= 4
665 static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
666 {
667 if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
668 (l4e_get_pfn(l4e) != pfn) )
669 put_page_and_type(pfn_to_page(l4e_get_pfn(l4e)));
670 }
672 #endif
675 static int alloc_l1_table(struct pfn_info *page)
676 {
677 struct domain *d = page_get_owner(page);
678 unsigned long pfn = page_to_pfn(page);
679 l1_pgentry_t *pl1e;
680 int i;
682 ASSERT(!shadow_mode_refcounts(d));
684 pl1e = map_domain_page(pfn);
686 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
687 if ( is_guest_l1_slot(i) &&
688 unlikely(!get_page_from_l1e(pl1e[i], d)) )
689 goto fail;
691 unmap_domain_page(pl1e);
692 return 1;
694 fail:
695 MEM_LOG("Failure in alloc_l1_table: entry %d", i);
696 while ( i-- > 0 )
697 if ( is_guest_l1_slot(i) )
698 put_page_from_l1e(pl1e[i], d);
700 unmap_domain_page(pl1e);
701 return 0;
702 }
704 #ifdef CONFIG_X86_PAE
705 static int create_pae_xen_mappings(l3_pgentry_t *pl3e)
706 {
707 struct pfn_info *page;
708 l2_pgentry_t *pl2e;
709 l3_pgentry_t l3e3;
710 int i;
712 pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK);
714 /* 3rd L3 slot contains L2 with Xen-private mappings. It *must* exist. */
715 l3e3 = pl3e[3];
716 if ( !(l3e_get_flags(l3e3) & _PAGE_PRESENT) )
717 {
718 MEM_LOG("PAE L3 3rd slot is empty");
719 return 0;
720 }
722 /*
723 * The Xen-private mappings include linear mappings. The L2 thus cannot
724 * be shared by multiple L3 tables. The test here is adequate because:
725 * 1. Cannot appear in slots != 3 because the page would then then have
726 * unknown va backpointer, which get_page_type() explicitly disallows.
727 * 2. Cannot appear in another page table's L3:
728 * a. alloc_l3_table() calls this function and this check will fail
729 * b. mod_l3_entry() disallows updates to slot 3 in an existing table
730 */
731 page = l3e_get_page(l3e3);
732 BUG_ON(page->u.inuse.type_info & PGT_pinned);
733 BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0);
734 if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
735 {
736 MEM_LOG("PAE L3 3rd slot is shared");
737 return 0;
738 }
740 /* Xen private mappings. */
741 pl2e = map_domain_page(l3e_get_pfn(l3e3));
742 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
743 &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
744 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
745 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
746 pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
747 l2e_from_page(
748 virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
749 __PAGE_HYPERVISOR);
750 for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
751 pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
752 (l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ?
753 l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR) :
754 l2e_empty();
755 unmap_domain_page(pl2e);
757 return 1;
758 }
760 static inline int l1_backptr(
761 unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
762 {
763 unsigned long l2_backptr = l2_type & PGT_va_mask;
764 BUG_ON(l2_backptr == PGT_va_unknown);
765 if ( l2_backptr == PGT_va_mutable )
766 return 0;
767 *backptr =
768 ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) |
769 (offset_in_l2 << L2_PAGETABLE_SHIFT);
770 return 1;
771 }
773 #elif CONFIG_X86_64
774 # define create_pae_xen_mappings(pl3e) (1)
776 static inline int l1_backptr(
777 unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type)
778 {
779 unsigned long l2_backptr = l2_type & PGT_va_mask;
780 BUG_ON(l2_backptr == PGT_va_unknown);
782 *backptr = ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) |
783 (offset_in_l2 << L2_PAGETABLE_SHIFT);
784 return 1;
785 }
787 static inline int l2_backptr(
788 unsigned long *backptr, unsigned long offset_in_l3, unsigned long l3_type)
789 {
790 unsigned long l3_backptr = l3_type & PGT_va_mask;
791 BUG_ON(l3_backptr == PGT_va_unknown);
793 *backptr = ((l3_backptr >> PGT_va_shift) << L4_PAGETABLE_SHIFT) |
794 (offset_in_l3 << L3_PAGETABLE_SHIFT);
795 return 1;
796 }
798 static inline int l3_backptr(
799 unsigned long *backptr, unsigned long offset_in_l4, unsigned long l4_type)
800 {
801 unsigned long l4_backptr = l4_type & PGT_va_mask;
802 BUG_ON(l4_backptr == PGT_va_unknown);
804 *backptr = (offset_in_l4 << L4_PAGETABLE_SHIFT);
805 return 1;
806 }
807 #else
808 # define create_pae_xen_mappings(pl3e) (1)
809 # define l1_backptr(bp,l2o,l2t) \
810 ({ *(bp) = (unsigned long)(l2o) << L2_PAGETABLE_SHIFT; 1; })
811 #endif
813 static int alloc_l2_table(struct pfn_info *page, unsigned long type)
814 {
815 struct domain *d = page_get_owner(page);
816 unsigned long pfn = page_to_pfn(page);
817 unsigned long vaddr;
818 l2_pgentry_t *pl2e;
819 int i;
821 /* See the code in shadow_promote() to understand why this is here. */
822 if ( (PGT_base_page_table == PGT_l2_page_table) &&
823 unlikely(shadow_mode_refcounts(d)) )
824 return 1;
825 ASSERT(!shadow_mode_refcounts(d));
827 pl2e = map_domain_page(pfn);
829 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
830 {
831 if ( !l1_backptr(&vaddr, i, type) )
832 goto fail;
833 if ( is_guest_l2_slot(type, i) &&
834 unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) )
835 goto fail;
836 }
838 #if CONFIG_PAGING_LEVELS == 2
839 /* Xen private mappings. */
840 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
841 &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
842 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
843 pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
844 l2e_from_pfn(pfn, __PAGE_HYPERVISOR);
845 pl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
846 l2e_from_page(
847 virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt),
848 __PAGE_HYPERVISOR);
849 #endif
851 unmap_domain_page(pl2e);
852 return 1;
854 fail:
855 MEM_LOG("Failure in alloc_l2_table: entry %d", i);
856 while ( i-- > 0 )
857 if ( is_guest_l2_slot(type, i) )
858 put_page_from_l2e(pl2e[i], pfn);
860 unmap_domain_page(pl2e);
861 return 0;
862 }
865 #if CONFIG_PAGING_LEVELS >= 3
866 static int alloc_l3_table(struct pfn_info *page, unsigned long type)
867 {
868 struct domain *d = page_get_owner(page);
869 unsigned long pfn = page_to_pfn(page);
870 unsigned long vaddr;
871 l3_pgentry_t *pl3e;
872 int i;
874 ASSERT(!shadow_mode_refcounts(d));
876 #ifdef CONFIG_X86_PAE
877 if ( pfn >= 0x100000 )
878 {
879 MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
880 return 0;
881 }
882 #endif
884 pl3e = map_domain_page(pfn);
885 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
886 {
887 #if CONFIG_PAGING_LEVELS >= 4
888 if ( !l2_backptr(&vaddr, i, type) )
889 goto fail;
890 #else
891 vaddr = (unsigned long)i << L3_PAGETABLE_SHIFT;
892 #endif
893 if ( is_guest_l3_slot(i) &&
894 unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) )
895 goto fail;
896 }
898 if ( !create_pae_xen_mappings(pl3e) )
899 goto fail;
901 unmap_domain_page(pl3e);
902 return 1;
904 fail:
905 MEM_LOG("Failure in alloc_l3_table: entry %d", i);
906 while ( i-- > 0 )
907 if ( is_guest_l3_slot(i) )
908 put_page_from_l3e(pl3e[i], pfn);
910 unmap_domain_page(pl3e);
911 return 0;
912 }
913 #else
914 #define alloc_l3_table(page, type) (0)
915 #endif
917 #if CONFIG_PAGING_LEVELS >= 4
918 static int alloc_l4_table(struct pfn_info *page, unsigned long type)
919 {
920 struct domain *d = page_get_owner(page);
921 unsigned long pfn = page_to_pfn(page);
922 l4_pgentry_t *pl4e = page_to_virt(page);
923 unsigned long vaddr;
924 int i;
926 /* See the code in shadow_promote() to understand why this is here. */
927 if ( (PGT_base_page_table == PGT_l4_page_table) &&
928 shadow_mode_refcounts(d) )
929 return 1;
930 ASSERT(!shadow_mode_refcounts(d));
932 for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
933 {
934 if ( !l3_backptr(&vaddr, i, type) )
935 goto fail;
937 if ( is_guest_l4_slot(i) &&
938 unlikely(!get_page_from_l4e(pl4e[i], pfn, d, vaddr)) )
939 goto fail;
940 }
942 /* Xen private mappings. */
943 memcpy(&pl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
944 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
945 ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
946 pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
947 l4e_from_pfn(pfn, __PAGE_HYPERVISOR);
948 pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
949 l4e_from_page(
950 virt_to_page(page_get_owner(page)->arch.mm_perdomain_l3),
951 __PAGE_HYPERVISOR);
953 return 1;
955 fail:
956 MEM_LOG("Failure in alloc_l4_table: entry %d", i);
957 while ( i-- > 0 )
958 if ( is_guest_l4_slot(i) )
959 put_page_from_l4e(pl4e[i], pfn);
961 return 0;
962 }
963 #else
964 #define alloc_l4_table(page, type) (0)
965 #endif
968 static void free_l1_table(struct pfn_info *page)
969 {
970 struct domain *d = page_get_owner(page);
971 unsigned long pfn = page_to_pfn(page);
972 l1_pgentry_t *pl1e;
973 int i;
975 pl1e = map_domain_page(pfn);
977 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
978 if ( is_guest_l1_slot(i) )
979 put_page_from_l1e(pl1e[i], d);
981 unmap_domain_page(pl1e);
982 }
985 static void free_l2_table(struct pfn_info *page)
986 {
987 unsigned long pfn = page_to_pfn(page);
988 l2_pgentry_t *pl2e;
989 int i;
991 pl2e = map_domain_page(pfn);
993 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
994 if ( is_guest_l2_slot(page->u.inuse.type_info, i) )
995 put_page_from_l2e(pl2e[i], pfn);
997 unmap_domain_page(pl2e);
998 }
1001 #if CONFIG_PAGING_LEVELS >= 3
1003 static void free_l3_table(struct pfn_info *page)
1005 unsigned long pfn = page_to_pfn(page);
1006 l3_pgentry_t *pl3e;
1007 int i;
1009 pl3e = map_domain_page(pfn);
1011 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
1012 if ( is_guest_l3_slot(i) )
1013 put_page_from_l3e(pl3e[i], pfn);
1015 unmap_domain_page(pl3e);
1018 #endif
1020 #if CONFIG_PAGING_LEVELS >= 4
1022 static void free_l4_table(struct pfn_info *page)
1024 unsigned long pfn = page_to_pfn(page);
1025 l4_pgentry_t *pl4e = page_to_virt(page);
1026 int i;
1028 for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
1029 if ( is_guest_l4_slot(i) )
1030 put_page_from_l4e(pl4e[i], pfn);
1033 #endif
1035 static inline int update_l1e(l1_pgentry_t *pl1e,
1036 l1_pgentry_t ol1e,
1037 l1_pgentry_t nl1e)
1039 intpte_t o = l1e_get_intpte(ol1e);
1040 intpte_t n = l1e_get_intpte(nl1e);
1042 if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
1043 unlikely(o != l1e_get_intpte(ol1e)) )
1045 MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
1046 ": saw %" PRIpte,
1047 l1e_get_intpte(ol1e),
1048 l1e_get_intpte(nl1e),
1049 o);
1050 return 0;
1052 return 1;
1056 /* Update the L1 entry at pl1e to new value nl1e. */
1057 static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e)
1059 l1_pgentry_t ol1e;
1060 struct domain *d = current->domain;
1062 if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
1063 return 0;
1065 if ( unlikely(shadow_mode_refcounts(d)) )
1066 return update_l1e(pl1e, ol1e, nl1e);
1068 if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
1070 if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
1072 MEM_LOG("Bad L1 flags %x",
1073 l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
1074 return 0;
1077 /* Fast path for identical mapping, r/w and presence. */
1078 if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT))
1079 return update_l1e(pl1e, ol1e, nl1e);
1081 if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
1082 return 0;
1084 if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
1086 put_page_from_l1e(nl1e, d);
1087 return 0;
1090 else
1092 if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
1093 return 0;
1096 put_page_from_l1e(ol1e, d);
1097 return 1;
1100 #define UPDATE_ENTRY(_t,_p,_o,_n) ({ \
1101 intpte_t __o = cmpxchg((intpte_t *)(_p), \
1102 _t ## e_get_intpte(_o), \
1103 _t ## e_get_intpte(_n)); \
1104 if ( __o != _t ## e_get_intpte(_o) ) \
1105 MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte \
1106 ": saw %" PRIpte "", \
1107 (_t ## e_get_intpte(_o)), \
1108 (_t ## e_get_intpte(_n)), \
1109 (__o)); \
1110 (__o == _t ## e_get_intpte(_o)); })
1112 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
1113 static int mod_l2_entry(l2_pgentry_t *pl2e,
1114 l2_pgentry_t nl2e,
1115 unsigned long pfn,
1116 unsigned long type)
1118 l2_pgentry_t ol2e;
1119 unsigned long vaddr = 0;
1121 if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) )
1123 MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
1124 return 0;
1127 if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
1128 return 0;
1130 if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
1132 if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
1134 MEM_LOG("Bad L2 flags %x",
1135 l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
1136 return 0;
1139 /* Fast path for identical mapping and presence. */
1140 if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
1141 return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e);
1143 if ( unlikely(!l1_backptr(&vaddr, pgentry_ptr_to_slot(pl2e), type)) ||
1144 unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, vaddr)) )
1145 return 0;
1147 if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
1149 put_page_from_l2e(nl2e, pfn);
1150 return 0;
1153 else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
1155 return 0;
1158 put_page_from_l2e(ol2e, pfn);
1159 return 1;
1163 #if CONFIG_PAGING_LEVELS >= 3
1165 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
1166 static int mod_l3_entry(l3_pgentry_t *pl3e,
1167 l3_pgentry_t nl3e,
1168 unsigned long pfn,
1169 unsigned long type)
1171 l3_pgentry_t ol3e;
1172 unsigned long vaddr;
1173 int okay;
1175 if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
1177 MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e);
1178 return 0;
1181 #ifdef CONFIG_X86_PAE
1182 /*
1183 * Disallow updates to final L3 slot. It contains Xen mappings, and it
1184 * would be a pain to ensure they remain continuously valid throughout.
1185 */
1186 if ( pgentry_ptr_to_slot(pl3e) >= 3 )
1187 return 0;
1188 #endif
1190 if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
1191 return 0;
1193 if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
1195 if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) )
1197 MEM_LOG("Bad L3 flags %x",
1198 l3e_get_flags(nl3e) & L3_DISALLOW_MASK);
1199 return 0;
1202 /* Fast path for identical mapping and presence. */
1203 if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
1204 return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e);
1206 #if CONFIG_PAGING_LEVELS >= 4
1207 if ( unlikely(!l2_backptr(&vaddr, pgentry_ptr_to_slot(pl3e), type)) ||
1208 unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
1209 return 0;
1210 #else
1211 vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t))
1212 << L3_PAGETABLE_SHIFT;
1213 if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
1214 return 0;
1215 #endif
1217 if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
1219 put_page_from_l3e(nl3e, pfn);
1220 return 0;
1223 else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
1225 return 0;
1228 okay = create_pae_xen_mappings(pl3e);
1229 BUG_ON(!okay);
1231 put_page_from_l3e(ol3e, pfn);
1232 return 1;
1235 #endif
1237 #if CONFIG_PAGING_LEVELS >= 4
1239 /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
1240 static int mod_l4_entry(l4_pgentry_t *pl4e,
1241 l4_pgentry_t nl4e,
1242 unsigned long pfn,
1243 unsigned long type)
1245 l4_pgentry_t ol4e;
1246 unsigned long vaddr;
1248 if ( unlikely(!is_guest_l4_slot(pgentry_ptr_to_slot(pl4e))) )
1250 MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e);
1251 return 0;
1254 if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
1255 return 0;
1257 if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
1259 if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
1261 MEM_LOG("Bad L4 flags %x",
1262 l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
1263 return 0;
1266 /* Fast path for identical mapping and presence. */
1267 if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
1268 return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e);
1270 if ( unlikely(!l3_backptr(&vaddr, pgentry_ptr_to_slot(pl4e), type)) ||
1271 unlikely(!get_page_from_l4e(nl4e, pfn, current->domain, vaddr)) )
1272 return 0;
1274 if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
1276 put_page_from_l4e(nl4e, pfn);
1277 return 0;
1280 else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e)) )
1282 return 0;
1285 put_page_from_l4e(ol4e, pfn);
1286 return 1;
1289 #endif
1291 int alloc_page_type(struct pfn_info *page, unsigned long type)
1293 struct domain *owner = page_get_owner(page);
1295 if ( owner != NULL )
1296 mark_dirty(owner, page_to_pfn(page));
1298 switch ( type & PGT_type_mask )
1300 case PGT_l1_page_table:
1301 return alloc_l1_table(page);
1302 case PGT_l2_page_table:
1303 return alloc_l2_table(page, type);
1304 case PGT_l3_page_table:
1305 return alloc_l3_table(page, type);
1306 case PGT_l4_page_table:
1307 return alloc_l4_table(page, type);
1308 case PGT_gdt_page:
1309 case PGT_ldt_page:
1310 return alloc_segdesc_page(page);
1311 default:
1312 printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n",
1313 type, page->u.inuse.type_info,
1314 page->count_info);
1315 BUG();
1318 return 0;
1322 void free_page_type(struct pfn_info *page, unsigned long type)
1324 struct domain *owner = page_get_owner(page);
1325 unsigned long gpfn;
1327 if ( likely(owner != NULL) )
1329 /*
1330 * We have to flush before the next use of the linear mapping
1331 * (e.g., update_va_mapping()) or we could end up modifying a page
1332 * that is no longer a page table (and hence screw up ref counts).
1333 */
1334 percpu_info[smp_processor_id()].deferred_ops |= DOP_FLUSH_ALL_TLBS;
1336 if ( unlikely(shadow_mode_enabled(owner)) )
1338 /* Raw page tables are rewritten during save/restore. */
1339 if ( !shadow_mode_translate(owner) )
1340 mark_dirty(owner, page_to_pfn(page));
1342 if ( shadow_mode_refcounts(owner) )
1343 return;
1345 gpfn = __mfn_to_gpfn(owner, page_to_pfn(page));
1346 ASSERT(VALID_M2P(gpfn));
1347 remove_shadow(owner, gpfn, type & PGT_type_mask);
1351 switch ( type & PGT_type_mask )
1353 case PGT_l1_page_table:
1354 free_l1_table(page);
1355 break;
1357 case PGT_l2_page_table:
1358 free_l2_table(page);
1359 break;
1361 #if CONFIG_PAGING_LEVELS >= 3
1362 case PGT_l3_page_table:
1363 free_l3_table(page);
1364 break;
1365 #endif
1367 #if CONFIG_PAGING_LEVELS >= 4
1368 case PGT_l4_page_table:
1369 free_l4_table(page);
1370 break;
1371 #endif
1373 default:
1374 printk("%s: type %lx pfn %lx\n",__FUNCTION__,
1375 type, page_to_pfn(page));
1376 BUG();
1381 void put_page_type(struct pfn_info *page)
1383 unsigned long nx, x, y = page->u.inuse.type_info;
1385 again:
1386 do {
1387 x = y;
1388 nx = x - 1;
1390 ASSERT((x & PGT_count_mask) != 0);
1392 /*
1393 * The page should always be validated while a reference is held. The
1394 * exception is during domain destruction, when we forcibly invalidate
1395 * page-table pages if we detect a referential loop.
1396 * See domain.c:relinquish_list().
1397 */
1398 ASSERT((x & PGT_validated) ||
1399 test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags));
1401 if ( unlikely((nx & PGT_count_mask) == 0) )
1403 /* Record TLB information for flush later. Races are harmless. */
1404 page->tlbflush_timestamp = tlbflush_current_time();
1406 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
1407 likely(nx & PGT_validated) )
1409 /*
1410 * Page-table pages must be unvalidated when count is zero. The
1411 * 'free' is safe because the refcnt is non-zero and validated
1412 * bit is clear => other ops will spin or fail.
1413 */
1414 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x,
1415 x & ~PGT_validated)) != x) )
1416 goto again;
1417 /* We cleared the 'valid bit' so we do the clean up. */
1418 free_page_type(page, x);
1419 /* Carry on, but with the 'valid bit' now clear. */
1420 x &= ~PGT_validated;
1421 nx &= ~PGT_validated;
1424 else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) ==
1425 (PGT_pinned | 1)) &&
1426 ((nx & PGT_type_mask) != PGT_writable_page)) )
1428 /* Page is now only pinned. Make the back pointer mutable again. */
1429 nx |= PGT_va_mutable;
1432 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
1436 int get_page_type(struct pfn_info *page, unsigned long type)
1438 unsigned long nx, x, y = page->u.inuse.type_info;
1440 again:
1441 do {
1442 x = y;
1443 nx = x + 1;
1444 if ( unlikely((nx & PGT_count_mask) == 0) )
1446 MEM_LOG("Type count overflow on pfn %lx", page_to_pfn(page));
1447 return 0;
1449 else if ( unlikely((x & PGT_count_mask) == 0) )
1451 if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
1453 if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
1455 /*
1456 * On type change we check to flush stale TLB
1457 * entries. This may be unnecessary (e.g., page
1458 * was GDT/LDT) but those circumstances should be
1459 * very rare.
1460 */
1461 cpumask_t mask = page_get_owner(page)->cpumask;
1462 tlbflush_filter(mask, page->tlbflush_timestamp);
1464 if ( unlikely(!cpus_empty(mask)) )
1466 perfc_incrc(need_flush_tlb_flush);
1467 flush_tlb_mask(mask);
1471 /* We lose existing type, back pointer, and validity. */
1472 nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
1473 nx |= type;
1475 /* No special validation needed for writable pages. */
1476 /* Page tables and GDT/LDT need to be scanned for validity. */
1477 if ( type == PGT_writable_page )
1478 nx |= PGT_validated;
1481 else
1483 if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
1485 if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
1487 if ( current->domain == page_get_owner(page) )
1489 /*
1490 * This ensures functions like set_gdt() see up-to-date
1491 * type info without needing to clean up writable p.t.
1492 * state on the fast path.
1493 */
1494 LOCK_BIGLOCK(current->domain);
1495 cleanup_writable_pagetable(current->domain);
1496 y = page->u.inuse.type_info;
1497 UNLOCK_BIGLOCK(current->domain);
1498 /* Can we make progress now? */
1499 if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) ||
1500 ((y & PGT_count_mask) == 0) )
1501 goto again;
1503 if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
1504 ((type & PGT_type_mask) != PGT_l1_page_table) )
1505 MEM_LOG("Bad type (saw %" PRtype_info
1506 " != exp %" PRtype_info ") "
1507 "for mfn %lx (pfn %lx)",
1508 x, type, page_to_pfn(page),
1509 get_pfn_from_mfn(page_to_pfn(page)));
1510 return 0;
1512 else if ( (x & PGT_va_mask) == PGT_va_mutable )
1514 /* The va backpointer is mutable, hence we update it. */
1515 nx &= ~PGT_va_mask;
1516 nx |= type; /* we know the actual type is correct */
1518 else if ( ((type & PGT_va_mask) != PGT_va_mutable) &&
1519 ((type & PGT_va_mask) != (x & PGT_va_mask)) )
1521 #ifdef CONFIG_X86_PAE
1522 /* We use backptr as extra typing. Cannot be unknown. */
1523 if ( (type & PGT_type_mask) == PGT_l2_page_table )
1524 return 0;
1525 #endif
1526 /* This table is possibly mapped at multiple locations. */
1527 nx &= ~PGT_va_mask;
1528 nx |= PGT_va_unknown;
1531 if ( unlikely(!(x & PGT_validated)) )
1533 /* Someone else is updating validation of this page. Wait... */
1534 while ( (y = page->u.inuse.type_info) == x )
1535 cpu_relax();
1536 goto again;
1540 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
1542 if ( unlikely(!(nx & PGT_validated)) )
1544 /* Try to validate page type; drop the new reference on failure. */
1545 if ( unlikely(!alloc_page_type(page, type)) )
1547 MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
1548 PRtype_info ": caf=%08x taf=%" PRtype_info,
1549 page_to_pfn(page), get_pfn_from_mfn(page_to_pfn(page)),
1550 type, page->count_info, page->u.inuse.type_info);
1551 /* Noone else can get a reference. We hold the only ref. */
1552 page->u.inuse.type_info = 0;
1553 return 0;
1556 /* Noone else is updating simultaneously. */
1557 __set_bit(_PGT_validated, &page->u.inuse.type_info);
1560 return 1;
1564 int new_guest_cr3(unsigned long mfn)
1566 struct vcpu *v = current;
1567 struct domain *d = v->domain;
1568 int okay;
1569 unsigned long old_base_mfn;
1571 if ( shadow_mode_refcounts(d) )
1572 okay = get_page_from_pagenr(mfn, d);
1573 else
1574 okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
1576 if ( likely(okay) )
1578 invalidate_shadow_ldt(v);
1580 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1581 v->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
1582 update_pagetables(v); /* update shadow_table and monitor_table */
1584 write_ptbase(v);
1586 if ( shadow_mode_refcounts(d) )
1587 put_page(pfn_to_page(old_base_mfn));
1588 else
1589 put_page_and_type(pfn_to_page(old_base_mfn));
1591 /* CR3 also holds a ref to its shadow... */
1592 if ( shadow_mode_enabled(d) )
1594 if ( v->arch.monitor_shadow_ref )
1595 put_shadow_ref(v->arch.monitor_shadow_ref);
1596 v->arch.monitor_shadow_ref =
1597 pagetable_get_pfn(v->arch.monitor_table);
1598 ASSERT(!page_get_owner(pfn_to_page(v->arch.monitor_shadow_ref)));
1599 get_shadow_ref(v->arch.monitor_shadow_ref);
1602 else
1604 MEM_LOG("Error while installing new baseptr %lx", mfn);
1607 return okay;
1610 static void process_deferred_ops(unsigned int cpu)
1612 unsigned int deferred_ops;
1613 struct domain *d = current->domain;
1615 deferred_ops = percpu_info[cpu].deferred_ops;
1616 percpu_info[cpu].deferred_ops = 0;
1618 if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) )
1620 if ( shadow_mode_enabled(d) )
1621 shadow_sync_all(d);
1622 if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
1623 flush_tlb_mask(d->cpumask);
1624 else
1625 local_flush_tlb();
1628 if ( deferred_ops & DOP_RELOAD_LDT )
1629 (void)map_ldt_shadow_page(0);
1631 if ( unlikely(percpu_info[cpu].foreign != NULL) )
1633 put_domain(percpu_info[cpu].foreign);
1634 percpu_info[cpu].foreign = NULL;
1638 static int set_foreigndom(unsigned int cpu, domid_t domid)
1640 struct domain *e, *d = current->domain;
1641 int okay = 1;
1643 if ( (e = percpu_info[cpu].foreign) != NULL )
1644 put_domain(e);
1645 percpu_info[cpu].foreign = NULL;
1647 if ( domid == DOMID_SELF )
1648 goto out;
1650 if ( !IS_PRIV(d) )
1652 switch ( domid )
1654 case DOMID_IO:
1655 get_knownalive_domain(dom_io);
1656 percpu_info[cpu].foreign = dom_io;
1657 break;
1658 default:
1659 MEM_LOG("Dom %u cannot set foreign dom", d->domain_id);
1660 okay = 0;
1661 break;
1664 else
1666 percpu_info[cpu].foreign = e = find_domain_by_id(domid);
1667 if ( e == NULL )
1669 switch ( domid )
1671 case DOMID_XEN:
1672 get_knownalive_domain(dom_xen);
1673 percpu_info[cpu].foreign = dom_xen;
1674 break;
1675 case DOMID_IO:
1676 get_knownalive_domain(dom_io);
1677 percpu_info[cpu].foreign = dom_io;
1678 break;
1679 default:
1680 MEM_LOG("Unknown domain '%u'", domid);
1681 okay = 0;
1682 break;
1687 out:
1688 return okay;
1691 static inline cpumask_t vcpumask_to_pcpumask(
1692 struct domain *d, unsigned long vmask)
1694 unsigned int vcpu_id;
1695 cpumask_t pmask;
1696 struct vcpu *v;
1698 while ( vmask != 0 )
1700 vcpu_id = find_first_set_bit(vmask);
1701 vmask &= ~(1UL << vcpu_id);
1702 if ( (vcpu_id < MAX_VIRT_CPUS) &&
1703 ((v = d->vcpu[vcpu_id]) != NULL) )
1704 cpu_set(v->processor, pmask);
1707 return pmask;
1710 int do_mmuext_op(
1711 struct mmuext_op *uops,
1712 unsigned int count,
1713 unsigned int *pdone,
1714 unsigned int foreigndom)
1716 struct mmuext_op op;
1717 int rc = 0, i = 0, okay, cpu = smp_processor_id();
1718 unsigned long mfn, type, done = 0;
1719 struct pfn_info *page;
1720 struct vcpu *v = current;
1721 struct domain *d = v->domain;
1723 LOCK_BIGLOCK(d);
1725 cleanup_writable_pagetable(d);
1727 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
1729 count &= ~MMU_UPDATE_PREEMPTED;
1730 if ( unlikely(pdone != NULL) )
1731 (void)get_user(done, pdone);
1734 if ( !set_foreigndom(cpu, foreigndom) )
1736 rc = -EINVAL;
1737 goto out;
1740 if ( unlikely(!array_access_ok(uops, count, sizeof(op))) )
1742 rc = -EFAULT;
1743 goto out;
1746 for ( i = 0; i < count; i++ )
1748 if ( hypercall_preempt_check() )
1750 rc = hypercall4_create_continuation(
1751 __HYPERVISOR_mmuext_op, uops,
1752 (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
1753 break;
1756 if ( unlikely(__copy_from_user(&op, uops, sizeof(op)) != 0) )
1758 MEM_LOG("Bad __copy_from_user");
1759 rc = -EFAULT;
1760 break;
1763 okay = 1;
1764 mfn = op.arg1.mfn;
1765 page = pfn_to_page(mfn);
1767 switch ( op.cmd )
1769 case MMUEXT_PIN_L1_TABLE:
1770 type = PGT_l1_page_table | PGT_va_mutable;
1772 pin_page:
1773 if ( shadow_mode_refcounts(FOREIGNDOM) )
1774 type = PGT_writable_page;
1776 okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
1777 if ( unlikely(!okay) )
1779 MEM_LOG("Error while pinning mfn %lx", mfn);
1780 break;
1783 if ( unlikely(test_and_set_bit(_PGT_pinned,
1784 &page->u.inuse.type_info)) )
1786 MEM_LOG("Mfn %lx already pinned", mfn);
1787 put_page_and_type(page);
1788 okay = 0;
1789 break;
1792 break;
1794 #ifndef CONFIG_X86_PAE /* Unsafe on PAE because of Xen-private mappings. */
1795 case MMUEXT_PIN_L2_TABLE:
1796 type = PGT_l2_page_table | PGT_va_mutable;
1797 goto pin_page;
1798 #endif
1800 case MMUEXT_PIN_L3_TABLE:
1801 type = PGT_l3_page_table | PGT_va_mutable;
1802 goto pin_page;
1804 case MMUEXT_PIN_L4_TABLE:
1805 type = PGT_l4_page_table | PGT_va_mutable;
1806 goto pin_page;
1808 case MMUEXT_UNPIN_TABLE:
1809 if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) )
1811 MEM_LOG("Mfn %lx bad domain (dom=%p)",
1812 mfn, page_get_owner(page));
1814 else if ( likely(test_and_clear_bit(_PGT_pinned,
1815 &page->u.inuse.type_info)) )
1817 put_page_and_type(page);
1818 put_page(page);
1820 else
1822 okay = 0;
1823 put_page(page);
1824 MEM_LOG("Mfn %lx not pinned", mfn);
1826 break;
1828 case MMUEXT_NEW_BASEPTR:
1829 okay = new_guest_cr3(mfn);
1830 percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
1831 break;
1833 #ifdef __x86_64__
1834 case MMUEXT_NEW_USER_BASEPTR:
1835 okay = get_page_and_type_from_pagenr(
1836 mfn, PGT_root_page_table, d);
1837 if ( unlikely(!okay) )
1839 MEM_LOG("Error while installing new mfn %lx", mfn);
1841 else
1843 unsigned long old_mfn =
1844 pagetable_get_pfn(v->arch.guest_table_user);
1845 v->arch.guest_table_user = mk_pagetable(mfn << PAGE_SHIFT);
1846 if ( old_mfn != 0 )
1847 put_page_and_type(pfn_to_page(old_mfn));
1849 break;
1850 #endif
1852 case MMUEXT_TLB_FLUSH_LOCAL:
1853 percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
1854 break;
1856 case MMUEXT_INVLPG_LOCAL:
1857 if ( shadow_mode_enabled(d) )
1858 shadow_invlpg(v, op.arg1.linear_addr);
1859 local_flush_tlb_one(op.arg1.linear_addr);
1860 break;
1862 case MMUEXT_TLB_FLUSH_MULTI:
1863 case MMUEXT_INVLPG_MULTI:
1865 unsigned long vmask;
1866 cpumask_t pmask;
1867 if ( unlikely(get_user(vmask, (unsigned long *)op.arg2.vcpumask)) )
1869 okay = 0;
1870 break;
1872 pmask = vcpumask_to_pcpumask(d, vmask);
1873 cpus_and(pmask, pmask, d->cpumask);
1874 if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
1875 flush_tlb_mask(pmask);
1876 else
1877 flush_tlb_one_mask(pmask, op.arg1.linear_addr);
1878 break;
1881 case MMUEXT_TLB_FLUSH_ALL:
1882 flush_tlb_mask(d->cpumask);
1883 break;
1885 case MMUEXT_INVLPG_ALL:
1886 flush_tlb_one_mask(d->cpumask, op.arg1.linear_addr);
1887 break;
1889 case MMUEXT_FLUSH_CACHE:
1890 if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) )
1892 MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
1893 okay = 0;
1895 else
1897 wbinvd();
1899 break;
1901 case MMUEXT_SET_LDT:
1903 unsigned long ptr = op.arg1.linear_addr;
1904 unsigned long ents = op.arg2.nr_ents;
1906 if ( shadow_mode_external(d) )
1908 MEM_LOG("ignoring SET_LDT hypercall from external "
1909 "domain %u", d->domain_id);
1910 okay = 0;
1912 else if ( ((ptr & (PAGE_SIZE-1)) != 0) ||
1913 (ents > 8192) ||
1914 !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) )
1916 okay = 0;
1917 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
1919 else if ( (v->arch.guest_context.ldt_ents != ents) ||
1920 (v->arch.guest_context.ldt_base != ptr) )
1922 invalidate_shadow_ldt(v);
1923 v->arch.guest_context.ldt_base = ptr;
1924 v->arch.guest_context.ldt_ents = ents;
1925 load_LDT(v);
1926 percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT;
1927 if ( ents != 0 )
1928 percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT;
1930 break;
1933 default:
1934 MEM_LOG("Invalid extended pt command 0x%x", op.cmd);
1935 okay = 0;
1936 break;
1939 if ( unlikely(!okay) )
1941 rc = -EINVAL;
1942 break;
1945 uops++;
1948 out:
1949 process_deferred_ops(cpu);
1951 /* Add incremental work we have done to the @done output parameter. */
1952 if ( unlikely(pdone != NULL) )
1953 __put_user(done + i, pdone);
1955 UNLOCK_BIGLOCK(d);
1956 return rc;
1959 int do_mmu_update(
1960 mmu_update_t *ureqs,
1961 unsigned int count,
1962 unsigned int *pdone,
1963 unsigned int foreigndom)
1965 mmu_update_t req;
1966 void *va;
1967 unsigned long gpfn, mfn;
1968 struct pfn_info *page;
1969 int rc = 0, okay = 1, i = 0, cpu = smp_processor_id();
1970 unsigned int cmd, done = 0;
1971 struct vcpu *v = current;
1972 struct domain *d = v->domain;
1973 unsigned long type_info;
1974 struct domain_mmap_cache mapcache, sh_mapcache;
1976 LOCK_BIGLOCK(d);
1978 cleanup_writable_pagetable(d);
1980 if ( unlikely(shadow_mode_enabled(d)) )
1981 check_pagetable(v, "pre-mmu"); /* debug */
1983 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
1985 count &= ~MMU_UPDATE_PREEMPTED;
1986 if ( unlikely(pdone != NULL) )
1987 (void)get_user(done, pdone);
1990 domain_mmap_cache_init(&mapcache);
1991 domain_mmap_cache_init(&sh_mapcache);
1993 if ( !set_foreigndom(cpu, foreigndom) )
1995 rc = -EINVAL;
1996 goto out;
1999 perfc_incrc(calls_to_mmu_update);
2000 perfc_addc(num_page_updates, count);
2001 perfc_incr_histo(bpt_updates, count, PT_UPDATES);
2003 if ( unlikely(!array_access_ok(ureqs, count, sizeof(req))) )
2005 rc = -EFAULT;
2006 goto out;
2009 for ( i = 0; i < count; i++ )
2011 if ( hypercall_preempt_check() )
2013 rc = hypercall4_create_continuation(
2014 __HYPERVISOR_mmu_update, ureqs,
2015 (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
2016 break;
2019 if ( unlikely(__copy_from_user(&req, ureqs, sizeof(req)) != 0) )
2021 MEM_LOG("Bad __copy_from_user");
2022 rc = -EFAULT;
2023 break;
2026 cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
2027 okay = 0;
2029 switch ( cmd )
2031 /*
2032 * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
2033 */
2034 case MMU_NORMAL_PT_UPDATE:
2036 gpfn = req.ptr >> PAGE_SHIFT;
2037 mfn = __gpfn_to_mfn(d, gpfn);
2039 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2041 MEM_LOG("Could not get page for normal update");
2042 break;
2045 va = map_domain_page_with_cache(mfn, &mapcache);
2046 va = (void *)((unsigned long)va +
2047 (unsigned long)(req.ptr & ~PAGE_MASK));
2048 page = pfn_to_page(mfn);
2050 switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
2052 case PGT_l1_page_table:
2053 ASSERT( !shadow_mode_refcounts(d) );
2054 if ( likely(get_page_type(
2055 page, type_info & (PGT_type_mask|PGT_va_mask))) )
2057 l1_pgentry_t l1e;
2059 /* FIXME: doesn't work with PAE */
2060 l1e = l1e_from_intpte(req.val);
2061 okay = mod_l1_entry(va, l1e);
2062 if ( okay && unlikely(shadow_mode_enabled(d)) )
2063 shadow_l1_normal_pt_update(
2064 d, req.ptr, l1e, &sh_mapcache);
2065 put_page_type(page);
2067 break;
2068 case PGT_l2_page_table:
2069 ASSERT( !shadow_mode_refcounts(d) );
2070 if ( likely(get_page_type(
2071 page, type_info & (PGT_type_mask|PGT_va_mask))) )
2073 l2_pgentry_t l2e;
2075 /* FIXME: doesn't work with PAE */
2076 l2e = l2e_from_intpte(req.val);
2077 okay = mod_l2_entry(
2078 (l2_pgentry_t *)va, l2e, mfn, type_info);
2079 if ( okay && unlikely(shadow_mode_enabled(d)) )
2080 shadow_l2_normal_pt_update(
2081 d, req.ptr, l2e, &sh_mapcache);
2082 put_page_type(page);
2084 break;
2085 #if CONFIG_PAGING_LEVELS >= 3
2086 case PGT_l3_page_table:
2087 ASSERT( !shadow_mode_refcounts(d) );
2088 if ( likely(get_page_type(
2089 page, type_info & (PGT_type_mask|PGT_va_mask))) )
2091 l3_pgentry_t l3e;
2093 /* FIXME: doesn't work with PAE */
2094 l3e = l3e_from_intpte(req.val);
2095 okay = mod_l3_entry(va, l3e, mfn, type_info);
2096 if ( okay && unlikely(shadow_mode_enabled(d)) )
2097 shadow_l3_normal_pt_update(
2098 d, req.ptr, l3e, &sh_mapcache);
2099 put_page_type(page);
2101 break;
2102 #endif
2103 #if CONFIG_PAGING_LEVELS >= 4
2104 case PGT_l4_page_table:
2105 ASSERT( !shadow_mode_refcounts(d) );
2106 if ( likely(get_page_type(
2107 page, type_info & (PGT_type_mask|PGT_va_mask))) )
2109 l4_pgentry_t l4e;
2111 l4e = l4e_from_intpte(req.val);
2112 okay = mod_l4_entry(va, l4e, mfn, type_info);
2113 if ( okay && unlikely(shadow_mode_enabled(d)) )
2114 shadow_l4_normal_pt_update(
2115 d, req.ptr, l4e, &sh_mapcache);
2116 put_page_type(page);
2118 break;
2119 #endif
2120 default:
2121 if ( likely(get_page_type(page, PGT_writable_page)) )
2123 if ( shadow_mode_enabled(d) )
2125 shadow_lock(d);
2127 __mark_dirty(d, mfn);
2129 if ( page_is_page_table(page) &&
2130 !page_out_of_sync(page) )
2132 shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
2136 *(intpte_t *)va = req.val;
2137 okay = 1;
2139 if ( shadow_mode_enabled(d) )
2140 shadow_unlock(d);
2142 put_page_type(page);
2144 break;
2147 unmap_domain_page_with_cache(va, &mapcache);
2149 put_page(page);
2150 break;
2152 case MMU_MACHPHYS_UPDATE:
2154 mfn = req.ptr >> PAGE_SHIFT;
2155 gpfn = req.val;
2157 /* HACK ALERT... Need to think about this some more... */
2158 if ( unlikely(shadow_mode_translate(FOREIGNDOM) && IS_PRIV(d)) )
2160 shadow_lock(FOREIGNDOM);
2161 printk("privileged guest dom%d requests pfn=%lx to "
2162 "map mfn=%lx for dom%d\n",
2163 d->domain_id, gpfn, mfn, FOREIGNDOM->domain_id);
2164 set_pfn_from_mfn(mfn, gpfn);
2165 set_p2m_entry(FOREIGNDOM, gpfn, mfn, &sh_mapcache, &mapcache);
2166 okay = 1;
2167 shadow_unlock(FOREIGNDOM);
2168 break;
2171 if ( unlikely(!get_page_from_pagenr(mfn, FOREIGNDOM)) )
2173 MEM_LOG("Could not get page for mach->phys update");
2174 break;
2177 if ( unlikely(shadow_mode_translate(FOREIGNDOM) && !IS_PRIV(d)) )
2179 MEM_LOG("can't mutate the m2p of translated guests");
2180 break;
2183 set_pfn_from_mfn(mfn, gpfn);
2184 okay = 1;
2186 mark_dirty(FOREIGNDOM, mfn);
2188 put_page(pfn_to_page(mfn));
2189 break;
2191 default:
2192 MEM_LOG("Invalid page update command %x", cmd);
2193 break;
2196 if ( unlikely(!okay) )
2198 rc = -EINVAL;
2199 break;
2202 ureqs++;
2205 out:
2206 domain_mmap_cache_destroy(&mapcache);
2207 domain_mmap_cache_destroy(&sh_mapcache);
2209 process_deferred_ops(cpu);
2211 /* Add incremental work we have done to the @done output parameter. */
2212 if ( unlikely(pdone != NULL) )
2213 __put_user(done + i, pdone);
2215 if ( unlikely(shadow_mode_enabled(d)) )
2216 check_pagetable(v, "post-mmu"); /* debug */
2218 UNLOCK_BIGLOCK(d);
2219 return rc;
2223 static int create_grant_pte_mapping(
2224 unsigned long pte_addr, l1_pgentry_t _nl1e, struct vcpu *v)
2226 int rc = GNTST_okay;
2227 void *va;
2228 unsigned long gpfn, mfn;
2229 struct pfn_info *page;
2230 u32 type_info;
2231 l1_pgentry_t ol1e;
2232 struct domain *d = v->domain;
2234 ASSERT(spin_is_locked(&d->big_lock));
2235 ASSERT(!shadow_mode_refcounts(d));
2237 gpfn = pte_addr >> PAGE_SHIFT;
2238 mfn = __gpfn_to_mfn(d, gpfn);
2240 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2242 MEM_LOG("Could not get page for normal update");
2243 return GNTST_general_error;
2246 va = map_domain_page(mfn);
2247 va = (void *)((unsigned long)va + (pte_addr & ~PAGE_MASK));
2248 page = pfn_to_page(mfn);
2250 type_info = page->u.inuse.type_info;
2251 if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
2252 !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
2254 MEM_LOG("Grant map attempted to update a non-L1 page");
2255 rc = GNTST_general_error;
2256 goto failed;
2259 if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) ||
2260 !update_l1e(va, ol1e, _nl1e) )
2262 put_page_type(page);
2263 rc = GNTST_general_error;
2264 goto failed;
2267 put_page_from_l1e(ol1e, d);
2269 if ( unlikely(shadow_mode_enabled(d)) )
2271 struct domain_mmap_cache sh_mapcache;
2272 domain_mmap_cache_init(&sh_mapcache);
2273 shadow_l1_normal_pt_update(d, pte_addr, _nl1e, &sh_mapcache);
2274 domain_mmap_cache_destroy(&sh_mapcache);
2277 put_page_type(page);
2279 failed:
2280 unmap_domain_page(va);
2281 put_page(page);
2282 return rc;
2285 static int destroy_grant_pte_mapping(
2286 unsigned long addr, unsigned long frame, struct domain *d)
2288 int rc = GNTST_okay;
2289 void *va;
2290 unsigned long gpfn, mfn;
2291 struct pfn_info *page;
2292 u32 type_info;
2293 l1_pgentry_t ol1e;
2295 ASSERT(!shadow_mode_refcounts(d));
2297 gpfn = addr >> PAGE_SHIFT;
2298 mfn = __gpfn_to_mfn(d, gpfn);
2300 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2302 MEM_LOG("Could not get page for normal update");
2303 return GNTST_general_error;
2306 va = map_domain_page(mfn);
2307 va = (void *)((unsigned long)va + (addr & ~PAGE_MASK));
2308 page = pfn_to_page(mfn);
2310 type_info = page->u.inuse.type_info;
2311 if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) ||
2312 !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) )
2314 MEM_LOG("Grant map attempted to update a non-L1 page");
2315 rc = GNTST_general_error;
2316 goto failed;
2319 if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) )
2321 put_page_type(page);
2322 rc = GNTST_general_error;
2323 goto failed;
2326 /* Check that the virtual address supplied is actually mapped to frame. */
2327 if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
2329 MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
2330 (unsigned long)l1e_get_intpte(ol1e), addr, frame);
2331 put_page_type(page);
2332 rc = GNTST_general_error;
2333 goto failed;
2336 /* Delete pagetable entry. */
2337 if ( unlikely(__put_user(0, (intpte_t *)va)))
2339 MEM_LOG("Cannot delete PTE entry at %p", va);
2340 put_page_type(page);
2341 rc = GNTST_general_error;
2342 goto failed;
2345 if ( unlikely(shadow_mode_enabled(d)) )
2347 struct domain_mmap_cache sh_mapcache;
2348 domain_mmap_cache_init(&sh_mapcache);
2349 shadow_l1_normal_pt_update(d, addr, l1e_empty(), &sh_mapcache);
2350 domain_mmap_cache_destroy(&sh_mapcache);
2353 put_page_type(page);
2355 failed:
2356 unmap_domain_page(va);
2357 put_page(page);
2358 return rc;
2362 static int create_grant_va_mapping(
2363 unsigned long va, l1_pgentry_t _nl1e, struct vcpu *v)
2365 l1_pgentry_t *pl1e, ol1e;
2366 struct domain *d = v->domain;
2368 ASSERT(spin_is_locked(&d->big_lock));
2369 ASSERT(!shadow_mode_refcounts(d));
2371 /*
2372 * This is actually overkill - we don't need to sync the L1 itself,
2373 * just everything involved in getting to this L1 (i.e. we need
2374 * linear_pg_table[l1_linear_offset(va)] to be in sync)...
2375 */
2376 __shadow_sync_va(v, va);
2378 pl1e = &linear_pg_table[l1_linear_offset(va)];
2380 if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ||
2381 !update_l1e(pl1e, ol1e, _nl1e) )
2382 return GNTST_general_error;
2384 put_page_from_l1e(ol1e, d);
2386 if ( unlikely(shadow_mode_enabled(d)) )
2387 shadow_do_update_va_mapping(va, _nl1e, v);
2389 return GNTST_okay;
2392 static int destroy_grant_va_mapping(
2393 unsigned long addr, unsigned long frame)
2395 l1_pgentry_t *pl1e, ol1e;
2397 pl1e = &linear_pg_table[l1_linear_offset(addr)];
2399 if ( unlikely(__get_user(ol1e.l1, &pl1e->l1) != 0) )
2401 MEM_LOG("Could not find PTE entry for address %lx", addr);
2402 return GNTST_general_error;
2405 /*
2406 * Check that the virtual address supplied is actually mapped to
2407 * frame.
2408 */
2409 if ( unlikely(l1e_get_pfn(ol1e) != frame) )
2411 MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
2412 l1e_get_pfn(ol1e), addr, frame);
2413 return GNTST_general_error;
2416 /* Delete pagetable entry. */
2417 if ( unlikely(__put_user(0, &pl1e->l1)) )
2419 MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
2420 return GNTST_general_error;
2423 return 0;
2426 int create_grant_host_mapping(
2427 unsigned long addr, unsigned long frame, unsigned int flags)
2429 l1_pgentry_t pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS);
2431 if ( (flags & GNTMAP_application_map) )
2432 l1e_add_flags(pte,_PAGE_USER);
2433 if ( !(flags & GNTMAP_readonly) )
2434 l1e_add_flags(pte,_PAGE_RW);
2436 if ( flags & GNTMAP_contains_pte )
2437 return create_grant_pte_mapping(addr, pte, current);
2438 return create_grant_va_mapping(addr, pte, current);
2441 int destroy_grant_host_mapping(
2442 unsigned long addr, unsigned long frame, unsigned int flags)
2444 if ( flags & GNTMAP_contains_pte )
2445 return destroy_grant_pte_mapping(addr, frame, current->domain);
2446 return destroy_grant_va_mapping(addr, frame);
2449 int steal_page_for_grant_transfer(
2450 struct domain *d, struct pfn_info *page)
2452 u32 _d, _nd, x, y;
2454 spin_lock(&d->page_alloc_lock);
2456 /*
2457 * The tricky bit: atomically release ownership while there is just one
2458 * benign reference to the page (PGC_allocated). If that reference
2459 * disappears then the deallocation routine will safely spin.
2460 */
2461 _d = pickle_domptr(d);
2462 _nd = page->u.inuse._domain;
2463 y = page->count_info;
2464 do {
2465 x = y;
2466 if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
2467 (1 | PGC_allocated)) || unlikely(_nd != _d)) {
2468 DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p,"
2469 " caf=%08x, taf=%" PRtype_info "\n",
2470 (void *) page_to_pfn(page),
2471 d, d->domain_id, unpickle_domptr(_nd), x,
2472 page->u.inuse.type_info);
2473 spin_unlock(&d->page_alloc_lock);
2474 return -1;
2476 __asm__ __volatile__(
2477 LOCK_PREFIX "cmpxchg8b %2"
2478 : "=d" (_nd), "=a" (y),
2479 "=m" (*(volatile u64 *)(&page->count_info))
2480 : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
2481 } while (unlikely(_nd != _d) || unlikely(y != x));
2483 /*
2484 * Unlink from 'd'. At least one reference remains (now anonymous), so
2485 * noone else is spinning to try to delete this page from 'd'.
2486 */
2487 d->tot_pages--;
2488 list_del(&page->list);
2490 spin_unlock(&d->page_alloc_lock);
2492 return 0;
2495 int do_update_va_mapping(unsigned long va, u64 val64,
2496 unsigned long flags)
2498 l1_pgentry_t val = l1e_from_intpte(val64);
2499 struct vcpu *v = current;
2500 struct domain *d = v->domain;
2501 unsigned int cpu = v->processor;
2502 unsigned long vmask, bmap_ptr;
2503 cpumask_t pmask;
2504 int rc = 0;
2506 perfc_incrc(calls_to_update_va);
2508 if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) )
2509 return -EINVAL;
2511 LOCK_BIGLOCK(d);
2513 cleanup_writable_pagetable(d);
2515 if ( unlikely(shadow_mode_enabled(d)) )
2516 check_pagetable(v, "pre-va"); /* debug */
2518 if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
2519 val)) )
2520 rc = -EINVAL;
2522 if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) )
2524 if ( unlikely(percpu_info[cpu].foreign &&
2525 (shadow_mode_translate(d) ||
2526 shadow_mode_translate(percpu_info[cpu].foreign))) )
2528 /*
2529 * The foreign domain's pfn's are in a different namespace. There's
2530 * not enough information in just a gpte to figure out how to
2531 * (re-)shadow this entry.
2532 */
2533 domain_crash(d);
2536 rc = shadow_do_update_va_mapping(va, val, v);
2538 check_pagetable(v, "post-va"); /* debug */
2541 switch ( flags & UVMF_FLUSHTYPE_MASK )
2543 case UVMF_TLB_FLUSH:
2544 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
2546 case UVMF_LOCAL:
2547 if ( unlikely(shadow_mode_enabled(d)) )
2548 shadow_sync_all(d);
2549 local_flush_tlb();
2550 break;
2551 case UVMF_ALL:
2552 flush_tlb_mask(d->cpumask);
2553 break;
2554 default:
2555 if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
2556 rc = -EFAULT;
2557 pmask = vcpumask_to_pcpumask(d, vmask);
2558 cpus_and(pmask, pmask, d->cpumask);
2559 flush_tlb_mask(pmask);
2560 break;
2562 break;
2564 case UVMF_INVLPG:
2565 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
2567 case UVMF_LOCAL:
2568 if ( unlikely(shadow_mode_enabled(d)) )
2569 shadow_invlpg(current, va);
2570 local_flush_tlb_one(va);
2571 break;
2572 case UVMF_ALL:
2573 flush_tlb_one_mask(d->cpumask, va);
2574 break;
2575 default:
2576 if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
2577 rc = -EFAULT;
2578 pmask = vcpumask_to_pcpumask(d, vmask);
2579 cpus_and(pmask, pmask, d->cpumask);
2580 flush_tlb_one_mask(pmask, va);
2581 break;
2583 break;
2586 process_deferred_ops(cpu);
2588 UNLOCK_BIGLOCK(d);
2590 return rc;
2593 int do_update_va_mapping_otherdomain(unsigned long va, u64 val64,
2594 unsigned long flags,
2595 domid_t domid)
2597 unsigned int cpu = smp_processor_id();
2598 struct domain *d;
2599 int rc;
2601 if ( unlikely(!IS_PRIV(current->domain)) )
2602 return -EPERM;
2604 percpu_info[cpu].foreign = d = find_domain_by_id(domid);
2605 if ( unlikely(d == NULL) )
2607 MEM_LOG("Unknown domain '%u'", domid);
2608 return -ESRCH;
2611 rc = do_update_va_mapping(va, val64, flags);
2613 return rc;
2618 /*************************
2619 * Descriptor Tables
2620 */
2622 void destroy_gdt(struct vcpu *v)
2624 int i;
2625 unsigned long pfn;
2627 v->arch.guest_context.gdt_ents = 0;
2628 for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ )
2630 if ( (pfn = l1e_get_pfn(v->arch.perdomain_ptes[i])) != 0 )
2631 put_page_and_type(pfn_to_page(pfn));
2632 v->arch.perdomain_ptes[i] = l1e_empty();
2633 v->arch.guest_context.gdt_frames[i] = 0;
2638 long set_gdt(struct vcpu *v,
2639 unsigned long *frames,
2640 unsigned int entries)
2642 struct domain *d = v->domain;
2643 /* NB. There are 512 8-byte entries per GDT page. */
2644 int i, nr_pages = (entries + 511) / 512;
2645 unsigned long pfn;
2647 if ( entries > FIRST_RESERVED_GDT_ENTRY )
2648 return -EINVAL;
2650 shadow_sync_all(d);
2652 /* Check the pages in the new GDT. */
2653 for ( i = 0; i < nr_pages; i++ ) {
2654 pfn = frames[i];
2655 if ((pfn >= max_page) ||
2656 !get_page_and_type(pfn_to_page(pfn), d, PGT_gdt_page) )
2657 goto fail;
2660 /* Tear down the old GDT. */
2661 destroy_gdt(v);
2663 /* Install the new GDT. */
2664 v->arch.guest_context.gdt_ents = entries;
2665 for ( i = 0; i < nr_pages; i++ )
2667 v->arch.guest_context.gdt_frames[i] = frames[i];
2668 v->arch.perdomain_ptes[i] =
2669 l1e_from_pfn(frames[i], __PAGE_HYPERVISOR);
2672 return 0;
2674 fail:
2675 while ( i-- > 0 )
2676 put_page_and_type(pfn_to_page(frames[i]));
2677 return -EINVAL;
2681 long do_set_gdt(unsigned long *frame_list, unsigned int entries)
2683 int nr_pages = (entries + 511) / 512;
2684 unsigned long frames[16];
2685 long ret;
2687 /* Rechecked in set_gdt, but ensures a sane limit for copy_from_user(). */
2688 if ( entries > FIRST_RESERVED_GDT_ENTRY )
2689 return -EINVAL;
2691 if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
2692 return -EFAULT;
2694 LOCK_BIGLOCK(current->domain);
2696 if ( (ret = set_gdt(current, frames, entries)) == 0 )
2697 local_flush_tlb();
2699 UNLOCK_BIGLOCK(current->domain);
2701 return ret;
2705 long do_update_descriptor(u64 pa, u64 desc)
2707 struct domain *dom = current->domain;
2708 unsigned long gpfn = pa >> PAGE_SHIFT;
2709 unsigned long mfn;
2710 unsigned int offset;
2711 struct desc_struct *gdt_pent, d;
2712 struct pfn_info *page;
2713 long ret = -EINVAL;
2715 offset = ((unsigned int)pa & ~PAGE_MASK) / sizeof(struct desc_struct);
2717 *(u64 *)&d = desc;
2719 LOCK_BIGLOCK(dom);
2721 if ( !VALID_MFN(mfn = __gpfn_to_mfn(dom, gpfn)) ||
2722 (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
2723 (mfn >= max_page) ||
2724 !check_descriptor(&d) )
2726 UNLOCK_BIGLOCK(dom);
2727 return -EINVAL;
2730 page = pfn_to_page(mfn);
2731 if ( unlikely(!get_page(page, dom)) )
2733 UNLOCK_BIGLOCK(dom);
2734 return -EINVAL;
2737 /* Check if the given frame is in use in an unsafe context. */
2738 switch ( page->u.inuse.type_info & PGT_type_mask )
2740 case PGT_gdt_page:
2741 if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
2742 goto out;
2743 break;
2744 case PGT_ldt_page:
2745 if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
2746 goto out;
2747 break;
2748 default:
2749 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
2750 goto out;
2751 break;
2754 if ( shadow_mode_enabled(dom) )
2756 shadow_lock(dom);
2758 __mark_dirty(dom, mfn);
2760 if ( page_is_page_table(page) && !page_out_of_sync(page) )
2761 shadow_mark_mfn_out_of_sync(current, gpfn, mfn);
2764 /* All is good so make the update. */
2765 gdt_pent = map_domain_page(mfn);
2766 memcpy(&gdt_pent[offset], &d, 8);
2767 unmap_domain_page(gdt_pent);
2769 if ( shadow_mode_enabled(dom) )
2770 shadow_unlock(dom);
2772 put_page_type(page);
2774 ret = 0; /* success */
2776 out:
2777 put_page(page);
2779 UNLOCK_BIGLOCK(dom);
2781 return ret;
2786 /*************************
2787 * Writable Pagetables
2788 */
2790 #ifdef VVERBOSE
2791 int ptwr_debug = 0x0;
2792 #define PTWR_PRINTK(_f, _a...) \
2793 do { if ( unlikely(ptwr_debug) ) printk( _f , ## _a ); } while ( 0 )
2794 #define PTWR_PRINT_WHICH (which ? 'I' : 'A')
2795 #else
2796 #define PTWR_PRINTK(_f, _a...) ((void)0)
2797 #endif
2800 #ifdef PERF_ARRAYS
2802 /**************** writeable pagetables profiling functions *****************/
2804 #define ptwr_eip_buckets 256
2806 int ptwr_eip_stat_threshold[] = {1, 10, 50, 100, L1_PAGETABLE_ENTRIES};
2808 #define ptwr_eip_stat_thresholdN (sizeof(ptwr_eip_stat_threshold)/sizeof(int))
2810 struct {
2811 unsigned long eip;
2812 domid_t id;
2813 u32 val[ptwr_eip_stat_thresholdN];
2814 } typedef ptwr_eip_stat_t;
2816 ptwr_eip_stat_t ptwr_eip_stats[ptwr_eip_buckets];
2818 static inline unsigned int ptwr_eip_stat_hash( unsigned long eip, domid_t id )
2820 return (((unsigned long) id) ^ eip ^ (eip>>8) ^ (eip>>16) ^ (eip>24)) %
2821 ptwr_eip_buckets;
2824 static void ptwr_eip_stat_inc(u32 *n)
2826 int i, j;
2828 if ( ++(*n) != 0 )
2829 return;
2831 *n = ~0;
2833 /* Re-scale all buckets. */
2834 for ( i = 0; i <ptwr_eip_buckets; i++ )
2835 for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
2836 ptwr_eip_stats[i].val[j] >>= 1;
2839 static void ptwr_eip_stat_update(unsigned long eip, domid_t id, int modified)
2841 int i, j, b;
2843 i = b = ptwr_eip_stat_hash(eip, id);
2845 do
2847 if ( !ptwr_eip_stats[i].eip )
2849 /* doesn't exist */
2850 ptwr_eip_stats[i].eip = eip;
2851 ptwr_eip_stats[i].id = id;
2852 memset(ptwr_eip_stats[i].val,0, sizeof(ptwr_eip_stats[i].val));
2855 if ( ptwr_eip_stats[i].eip == eip )
2857 for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
2858 if ( modified <= ptwr_eip_stat_threshold[j] )
2859 break;
2860 BUG_ON(j >= ptwr_eip_stat_thresholdN);
2861 ptwr_eip_stat_inc(&ptwr_eip_stats[i].val[j]);
2862 return;
2865 i = (i+1) % ptwr_eip_buckets;
2867 while ( i != b );
2869 printk("ptwr_eip_stat: too many EIPs in use!\n");
2871 ptwr_eip_stat_print();
2872 ptwr_eip_stat_reset();
2875 void ptwr_eip_stat_reset(void)
2877 memset(ptwr_eip_stats, 0, sizeof(ptwr_eip_stats));
2880 void ptwr_eip_stat_print(void)
2882 struct domain *e;
2883 domid_t d;
2884 int i, j;
2886 for_each_domain( e )
2888 d = e->domain_id;
2890 for ( i = 0; i < ptwr_eip_buckets; i++ )
2892 if ( ptwr_eip_stats[i].eip && ptwr_eip_stats[i].id != d )
2893 continue;
2895 printk("D %d eip %08lx ",
2896 ptwr_eip_stats[i].id, ptwr_eip_stats[i].eip);
2898 for ( j = 0; j < ptwr_eip_stat_thresholdN; j++ )
2899 printk("<=%u %4u \t",
2900 ptwr_eip_stat_threshold[j],
2901 ptwr_eip_stats[i].val[j]);
2902 printk("\n");
2907 #else /* PERF_ARRAYS */
2909 #define ptwr_eip_stat_update(eip, id, modified) ((void)0)
2911 #endif
2913 /*******************************************************************/
2915 /* Re-validate a given p.t. page, given its prior snapshot */
2916 int revalidate_l1(
2917 struct domain *d, l1_pgentry_t *l1page, l1_pgentry_t *snapshot)
2919 l1_pgentry_t ol1e, nl1e;
2920 int modified = 0, i;
2922 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
2924 ol1e = snapshot[i];
2925 nl1e = l1page[i];
2927 if ( likely(l1e_get_intpte(ol1e) == l1e_get_intpte(nl1e)) )
2928 continue;
2930 /* Update number of entries modified. */
2931 modified++;
2933 /*
2934 * Fast path for PTEs that have merely been write-protected
2935 * (e.g., during a Unix fork()). A strict reduction in privilege.
2936 */
2937 if ( likely(l1e_get_intpte(ol1e) == (l1e_get_intpte(nl1e)|_PAGE_RW)) )
2939 if ( likely(l1e_get_flags(nl1e) & _PAGE_PRESENT) )
2940 put_page_type(pfn_to_page(l1e_get_pfn(nl1e)));
2941 continue;
2944 if ( unlikely(!get_page_from_l1e(nl1e, d)) )
2946 /*
2947 * Make the remaining p.t's consistent before crashing, so the
2948 * reference counts are correct.
2949 */
2950 memcpy(&l1page[i], &snapshot[i],
2951 (L1_PAGETABLE_ENTRIES - i) * sizeof(l1_pgentry_t));
2953 /* Crash the offending domain. */
2954 MEM_LOG("ptwr: Could not revalidate l1 page");
2955 domain_crash(d);
2956 break;
2959 put_page_from_l1e(ol1e, d);
2962 return modified;
2966 /* Flush the given writable p.t. page and write-protect it again. */
2967 void ptwr_flush(struct domain *d, const int which)
2969 unsigned long l1va;
2970 l1_pgentry_t *pl1e, pte, *ptep;
2971 l2_pgentry_t *pl2e;
2972 unsigned int modified;
2974 #ifdef CONFIG_X86_64
2975 struct vcpu *v = current;
2976 extern void toggle_guest_mode(struct vcpu *);
2977 int user_mode = !(v->arch.flags & TF_kernel_mode);
2978 #endif
2980 ASSERT(!shadow_mode_enabled(d));
2982 if ( unlikely(d->arch.ptwr[which].vcpu != current) )
2983 /* Don't use write_ptbase: it may switch to guest_user on x86/64! */
2984 write_cr3(pagetable_get_paddr(
2985 d->arch.ptwr[which].vcpu->arch.guest_table));
2986 else
2987 TOGGLE_MODE();
2989 l1va = d->arch.ptwr[which].l1va;
2990 ptep = (l1_pgentry_t *)&linear_pg_table[l1_linear_offset(l1va)];
2992 /*
2993 * STEP 1. Write-protect the p.t. page so no more updates can occur.
2994 */
2996 if ( unlikely(__get_user(pte.l1, &ptep->l1)) )
2998 MEM_LOG("ptwr: Could not read pte at %p", ptep);
2999 /*
3000 * Really a bug. We could read this PTE during the initial fault,
3001 * and pagetables can't have changed meantime.
3002 */
3003 BUG();
3005 PTWR_PRINTK("[%c] disconnected_l1va at %p is %"PRIpte"\n",
3006 PTWR_PRINT_WHICH, ptep, pte.l1);
3007 l1e_remove_flags(pte, _PAGE_RW);
3009 /* Write-protect the p.t. page in the guest page table. */
3010 if ( unlikely(__put_user(pte, ptep)) )
3012 MEM_LOG("ptwr: Could not update pte at %p", ptep);
3013 /*
3014 * Really a bug. We could write this PTE during the initial fault,
3015 * and pagetables can't have changed meantime.
3016 */
3017 BUG();
3020 /* Ensure that there are no stale writable mappings in any TLB. */
3021 /* NB. INVLPG is a serialising instruction: flushes pending updates. */
3022 flush_tlb_one_mask(d->cpumask, l1va);
3023 PTWR_PRINTK("[%c] disconnected_l1va at %p now %"PRIpte"\n",
3024 PTWR_PRINT_WHICH, ptep, pte.l1);
3026 /*
3027 * STEP 2. Validate any modified PTEs.
3028 */
3030 pl1e = d->arch.ptwr[which].pl1e;
3031 modified = revalidate_l1(d, pl1e, d->arch.ptwr[which].page);
3032 unmap_domain_page(pl1e);
3033 perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
3034 ptwr_eip_stat_update(d->arch.ptwr[which].eip, d->domain_id, modified);
3035 d->arch.ptwr[which].prev_nr_updates = modified;
3037 /*
3038 * STEP 3. Reattach the L1 p.t. page into the current address space.
3039 */
3041 if ( which == PTWR_PT_ACTIVE )
3043 pl2e = &__linear_l2_table[d->arch.ptwr[which].l2_idx];
3044 l2e_add_flags(*pl2e, _PAGE_PRESENT);
3047 /*
3048 * STEP 4. Final tidy-up.
3049 */
3051 d->arch.ptwr[which].l1va = 0;
3053 if ( unlikely(d->arch.ptwr[which].vcpu != current) )
3054 write_ptbase(current);
3055 else
3056 TOGGLE_MODE();
3059 static int ptwr_emulated_update(
3060 unsigned long addr,
3061 physaddr_t old,
3062 physaddr_t val,
3063 unsigned int bytes,
3064 unsigned int do_cmpxchg)
3066 unsigned long pfn, l1va;
3067 struct pfn_info *page;
3068 l1_pgentry_t pte, ol1e, nl1e, *pl1e;
3069 struct domain *d = current->domain;
3071 /* Aligned access only, thank you. */
3072 if ( !access_ok(addr, bytes) || ((addr & (bytes-1)) != 0) )
3074 MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)",
3075 bytes, addr);
3076 return X86EMUL_UNHANDLEABLE;
3079 /* Turn a sub-word access into a full-word access. */
3080 if ( bytes != sizeof(physaddr_t) )
3082 int rc;
3083 physaddr_t full;
3084 unsigned int offset = addr & (sizeof(physaddr_t)-1);
3086 /* Align address; read full word. */
3087 addr &= ~(sizeof(physaddr_t)-1);
3088 if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
3089 sizeof(physaddr_t))) )
3090 return rc;
3091 /* Mask out bits provided by caller. */
3092 full &= ~((((physaddr_t)1 << (bytes*8)) - 1) << (offset*8));
3093 /* Shift the caller value and OR in the missing bits. */
3094 val &= (((physaddr_t)1 << (bytes*8)) - 1);
3095 val <<= (offset)*8;
3096 val |= full;
3097 /* Also fill in missing parts of the cmpxchg old value. */
3098 old &= (((physaddr_t)1 << (bytes*8)) - 1);
3099 old <<= (offset)*8;
3100 old |= full;
3103 /*
3104 * We must not emulate an update to a PTE that is temporarily marked
3105 * writable by the batched ptwr logic, else we can corrupt page refcnts!
3106 */
3107 if ( ((l1va = d->arch.ptwr[PTWR_PT_ACTIVE].l1va) != 0) &&
3108 (l1_linear_offset(l1va) == l1_linear_offset(addr)) )
3109 ptwr_flush(d, PTWR_PT_ACTIVE);
3110 if ( ((l1va = d->arch.ptwr[PTWR_PT_INACTIVE].l1va) != 0) &&
3111 (l1_linear_offset(l1va) == l1_linear_offset(addr)) )
3112 ptwr_flush(d, PTWR_PT_INACTIVE);
3114 /* Read the PTE that maps the page being updated. */
3115 if (__copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)],
3116 sizeof(pte)))
3118 MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table");
3119 return X86EMUL_UNHANDLEABLE;
3122 pfn = l1e_get_pfn(pte);
3123 page = pfn_to_page(pfn);
3125 /* We are looking only for read-only mappings of p.t. pages. */
3126 if ( ((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) != _PAGE_PRESENT) ||
3127 ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
3128 (page_get_owner(page) != d) )
3130 MEM_LOG("ptwr_emulate: Page is mistyped or bad pte "
3131 "(%lx, %" PRtype_info ")",
3132 l1e_get_pfn(pte), page->u.inuse.type_info);
3133 return X86EMUL_UNHANDLEABLE;
3136 /* Check the new PTE. */
3137 nl1e = l1e_from_intpte(val);
3138 if ( unlikely(!get_page_from_l1e(nl1e, d)) )
3140 MEM_LOG("ptwr_emulate: could not get_page_from_l1e()");
3141 return X86EMUL_UNHANDLEABLE;
3144 /* Checked successfully: do the update (write or cmpxchg). */
3145 pl1e = map_domain_page(page_to_pfn(page));
3146 pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
3147 if ( do_cmpxchg )
3149 ol1e = l1e_from_intpte(old);
3150 if ( cmpxchg((intpte_t *)pl1e, old, val) != old )
3152 unmap_domain_page(pl1e);
3153 put_page_from_l1e(nl1e, d);
3154 return X86EMUL_CMPXCHG_FAILED;
3157 else
3159 ol1e = *pl1e;
3160 *pl1e = nl1e;
3162 unmap_domain_page(pl1e);
3164 /* Finally, drop the old PTE. */
3165 put_page_from_l1e(ol1e, d);
3167 return X86EMUL_CONTINUE;
3170 static int ptwr_emulated_write(
3171 unsigned long addr,
3172 unsigned long val,
3173 unsigned int bytes)
3175 return ptwr_emulated_update(addr, 0, val, bytes, 0);
3178 static int ptwr_emulated_cmpxchg(
3179 unsigned long addr,
3180 unsigned long old,
3181 unsigned long new,
3182 unsigned int bytes)
3184 return ptwr_emulated_update(addr, old, new, bytes, 1);
3187 static int ptwr_emulated_cmpxchg8b(
3188 unsigned long addr,
3189 unsigned long old,
3190 unsigned long old_hi,
3191 unsigned long new,
3192 unsigned long new_hi)
3194 return ptwr_emulated_update(
3195 addr, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1);
3198 static struct x86_mem_emulator ptwr_mem_emulator = {
3199 .read_std = x86_emulate_read_std,
3200 .write_std = x86_emulate_write_std,
3201 .read_emulated = x86_emulate_read_std,
3202 .write_emulated = ptwr_emulated_write,
3203 .cmpxchg_emulated = ptwr_emulated_cmpxchg,
3204 .cmpxchg8b_emulated = ptwr_emulated_cmpxchg8b
3205 };
3207 /* Write page fault handler: check if guest is trying to modify a PTE. */
3208 int ptwr_do_page_fault(struct domain *d, unsigned long addr,
3209 struct cpu_user_regs *regs)
3211 unsigned long pfn;
3212 struct pfn_info *page;
3213 l1_pgentry_t pte;
3214 l2_pgentry_t *pl2e, l2e;
3215 int which, flags;
3216 unsigned long l2_idx;
3218 if ( unlikely(shadow_mode_enabled(d)) )
3219 return 0;
3221 /*
3222 * Attempt to read the PTE that maps the VA being accessed. By checking for
3223 * PDE validity in the L2 we avoid many expensive fixups in __get_user().
3224 */
3225 if ( !(l2e_get_flags(__linear_l2_table[l2_linear_offset(addr)]) &
3226 _PAGE_PRESENT) ||
3227 __copy_from_user(&pte,&linear_pg_table[l1_linear_offset(addr)],
3228 sizeof(pte)) )
3230 return 0;
3233 pfn = l1e_get_pfn(pte);
3234 page = pfn_to_page(pfn);
3236 #ifdef CONFIG_X86_64
3237 #define WRPT_PTE_FLAGS (_PAGE_RW | _PAGE_PRESENT | _PAGE_USER)
3238 #else
3239 #define WRPT_PTE_FLAGS (_PAGE_RW | _PAGE_PRESENT)
3240 #endif
3242 /*
3243 * Check the required flags for a valid wrpt mapping. If the page is
3244 * already writable then we can return straight to the guest (SMP race).
3245 * We decide whether or not to propagate the fault by testing for write
3246 * permissions in page directories by writing back to the linear mapping.
3247 */
3248 if ( (flags = l1e_get_flags(pte) & WRPT_PTE_FLAGS) == WRPT_PTE_FLAGS )
3249 return !__put_user(
3250 pte.l1, &linear_pg_table[l1_linear_offset(addr)].l1);
3252 /* We are looking only for read-only mappings of p.t. pages. */
3253 if ( ((flags | _PAGE_RW) != WRPT_PTE_FLAGS) ||
3254 ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
3255 ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
3256 (page_get_owner(page) != d) )
3258 return 0;
3261 #if 0 /* Leave this in as useful for debugging */
3262 goto emulate;
3263 #endif
3265 PTWR_PRINTK("ptwr_page_fault on l1 pt at va %lx, pfn %lx, eip %lx\n",
3266 addr, pfn, (unsigned long)regs->eip);
3268 /* Get the L2 index at which this L1 p.t. is always mapped. */
3269 l2_idx = page->u.inuse.type_info & PGT_va_mask;
3270 if ( unlikely(l2_idx >= PGT_va_unknown) )
3271 goto emulate; /* Urk! This L1 is mapped in multiple L2 slots! */
3272 l2_idx >>= PGT_va_shift;
3274 if ( unlikely(l2_idx == l2_linear_offset(addr)) )
3275 goto emulate; /* Urk! Pagetable maps itself! */
3277 /*
3278 * Is the L1 p.t. mapped into the current address space? If so we call it
3279 * an ACTIVE p.t., otherwise it is INACTIVE.
3280 */
3281 pl2e = &__linear_l2_table[l2_idx];
3282 which = PTWR_PT_INACTIVE;
3284 if ( (__get_user(l2e.l2, &pl2e->l2) == 0) && (l2e_get_pfn(l2e) == pfn) )
3286 /*
3287 * Check the PRESENT bit to set ACTIVE mode.
3288 * If the PRESENT bit is clear, we may be conflicting with the current
3289 * ACTIVE p.t. (it may be the same p.t. mapped at another virt addr).
3290 * The ptwr_flush call below will restore the PRESENT bit.
3291 */
3292 if ( likely(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
3293 (d->arch.ptwr[PTWR_PT_ACTIVE].l1va &&
3294 (l2_idx == d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx)) )
3295 which = PTWR_PT_ACTIVE;
3298 /*
3299 * If this is a multi-processor guest then ensure that the page is hooked
3300 * into at most one L2 table, which must be the one running on this VCPU.
3301 */
3302 if ( (d->vcpu[0]->next_in_list != NULL) &&
3303 ((page->u.inuse.type_info & PGT_count_mask) !=
3304 (!!(page->u.inuse.type_info & PGT_pinned) +
3305 (which == PTWR_PT_ACTIVE))) )
3307 /* Could be conflicting writable mappings from other VCPUs. */
3308 cleanup_writable_pagetable(d);
3309 goto emulate;
3312 /*
3313 * We only allow one ACTIVE and one INACTIVE p.t. to be updated at at
3314 * time. If there is already one, we must flush it out.
3315 */
3316 if ( d->arch.ptwr[which].l1va )
3317 ptwr_flush(d, which);
3319 /*
3320 * If last batch made no updates then we are probably stuck. Emulate this
3321 * update to ensure we make progress.
3322 */
3323 if ( d->arch.ptwr[which].prev_nr_updates == 0 )
3325 /* Ensure that we don't get stuck in an emulation-only rut. */
3326 d->arch.ptwr[which].prev_nr_updates = 1;
3327 goto emulate;
3330 PTWR_PRINTK("[%c] batched ptwr_page_fault at va %lx, pt for %08lx, "
3331 "pfn %lx\n", PTWR_PRINT_WHICH, addr,
3332 l2_idx << L2_PAGETABLE_SHIFT, pfn);
3334 d->arch.ptwr[which].l1va = addr | 1;
3335 d->arch.ptwr[which].l2_idx = l2_idx;
3336 d->arch.ptwr[which].vcpu = current;
3338 #ifdef PERF_ARRAYS
3339 d->arch.ptwr[which].eip = regs->eip;
3340 #endif
3342 /* For safety, disconnect the L1 p.t. page from current space. */
3343 if ( which == PTWR_PT_ACTIVE )
3345 l2e_remove_flags(*pl2e, _PAGE_PRESENT);
3346 flush_tlb_mask(d->cpumask);
3349 /* Temporarily map the L1 page, and make a copy of it. */
3350 d->arch.ptwr[which].pl1e = map_domain_page(pfn);
3351 memcpy(d->arch.ptwr[which].page,
3352 d->arch.ptwr[which].pl1e,
3353 L1_PAGETABLE_ENTRIES * sizeof(l1_pgentry_t));
3355 /* Finally, make the p.t. page writable by the guest OS. */
3356 l1e_add_flags(pte, _PAGE_RW);
3357 if ( unlikely(__put_user(pte.l1,
3358 &linear_pg_table[l1_linear_offset(addr)].l1)) )
3360 MEM_LOG("ptwr: Could not update pte at %p", (unsigned long *)
3361 &linear_pg_table[l1_linear_offset(addr)]);
3362 /* Toss the writable pagetable state and crash. */
3363 unmap_domain_page(d->arch.ptwr[which].pl1e);
3364 d->arch.ptwr[which].l1va = 0;
3365 domain_crash(d);
3366 return 0;
3369 return EXCRET_fault_fixed;
3371 emulate:
3372 if ( x86_emulate_memop(guest_cpu_user_regs(), addr,
3373 &ptwr_mem_emulator, BITS_PER_LONG/8) )
3374 return 0;
3375 perfc_incrc(ptwr_emulations);
3376 return EXCRET_fault_fixed;
3379 int ptwr_init(struct domain *d)
3381 void *x = alloc_xenheap_page();
3382 void *y = alloc_xenheap_page();
3384 if ( (x == NULL) || (y == NULL) )
3386 free_xenheap_page(x);
3387 free_xenheap_page(y);
3388 return -ENOMEM;
3391 d->arch.ptwr[PTWR_PT_ACTIVE].page = x;
3392 d->arch.ptwr[PTWR_PT_INACTIVE].page = y;
3394 return 0;
3397 void ptwr_destroy(struct domain *d)
3399 LOCK_BIGLOCK(d);
3400 cleanup_writable_pagetable(d);
3401 UNLOCK_BIGLOCK(d);
3402 free_xenheap_page(d->arch.ptwr[PTWR_PT_ACTIVE].page);
3403 free_xenheap_page(d->arch.ptwr[PTWR_PT_INACTIVE].page);
3406 void cleanup_writable_pagetable(struct domain *d)
3408 if ( unlikely(!VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
3409 return;
3411 if ( unlikely(shadow_mode_enabled(d)) )
3413 shadow_sync_all(d);
3415 else
3417 if ( d->arch.ptwr[PTWR_PT_ACTIVE].l1va )
3418 ptwr_flush(d, PTWR_PT_ACTIVE);
3419 if ( d->arch.ptwr[PTWR_PT_INACTIVE].l1va )
3420 ptwr_flush(d, PTWR_PT_INACTIVE);
3424 int map_pages_to_xen(
3425 unsigned long virt,
3426 unsigned long pfn,
3427 unsigned long nr_pfns,
3428 unsigned long flags)
3430 l2_pgentry_t *pl2e, ol2e;
3431 l1_pgentry_t *pl1e, ol1e;
3432 unsigned int i;
3434 unsigned int map_small_pages = !!(flags & MAP_SMALL_PAGES);
3435 flags &= ~MAP_SMALL_PAGES;
3437 while ( nr_pfns != 0 )
3439 pl2e = virt_to_xen_l2e(virt);
3441 if ( ((((virt>>PAGE_SHIFT) | pfn) & ((1<<PAGETABLE_ORDER)-1)) == 0) &&
3442 (nr_pfns >= (1<<PAGETABLE_ORDER)) &&
3443 !map_small_pages )
3445 /* Super-page mapping. */
3446 ol2e = *pl2e;
3447 *pl2e = l2e_from_pfn(pfn, flags|_PAGE_PSE);
3449 if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
3451 local_flush_tlb_pge();
3452 if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) )
3453 free_xen_pagetable(l2e_get_page(*pl2e));
3456 virt += 1UL << L2_PAGETABLE_SHIFT;
3457 pfn += 1UL << PAGETABLE_ORDER;
3458 nr_pfns -= 1UL << PAGETABLE_ORDER;
3460 else
3462 /* Normal page mapping. */
3463 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
3465 pl1e = page_to_virt(alloc_xen_pagetable());
3466 clear_page(pl1e);
3467 *pl2e = l2e_from_page(virt_to_page(pl1e), __PAGE_HYPERVISOR);
3469 else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
3471 pl1e = page_to_virt(alloc_xen_pagetable());
3472 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
3473 pl1e[i] = l1e_from_pfn(
3474 l2e_get_pfn(*pl2e) + i,
3475 l2e_get_flags(*pl2e) & ~_PAGE_PSE);
3476 *pl2e = l2e_from_page(virt_to_page(pl1e), __PAGE_HYPERVISOR);
3477 local_flush_tlb_pge();
3480 pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
3481 ol1e = *pl1e;
3482 *pl1e = l1e_from_pfn(pfn, flags);
3483 if ( (l1e_get_flags(ol1e) & _PAGE_PRESENT) )
3484 local_flush_tlb_one(virt);
3486 virt += 1UL << L1_PAGETABLE_SHIFT;
3487 pfn += 1UL;
3488 nr_pfns -= 1UL;
3492 return 0;
3495 void __set_fixmap(
3496 enum fixed_addresses idx, unsigned long p, unsigned long flags)
3498 if ( unlikely(idx >= __end_of_fixed_addresses) )
3499 BUG();
3500 map_pages_to_xen(fix_to_virt(idx), p >> PAGE_SHIFT, 1, flags);
3503 #ifdef MEMORY_GUARD
3505 void memguard_init(void)
3507 map_pages_to_xen(
3508 PAGE_OFFSET, 0, xenheap_phys_end >> PAGE_SHIFT,
3509 __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
3512 static void __memguard_change_range(void *p, unsigned long l, int guard)
3514 unsigned long _p = (unsigned long)p;
3515 unsigned long _l = (unsigned long)l;
3516 unsigned long flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES;
3518 /* Ensure we are dealing with a page-aligned whole number of pages. */
3519 ASSERT((_p&PAGE_MASK) != 0);
3520 ASSERT((_l&PAGE_MASK) != 0);
3521 ASSERT((_p&~PAGE_MASK) == 0);
3522 ASSERT((_l&~PAGE_MASK) == 0);
3524 if ( guard )
3525 flags &= ~_PAGE_PRESENT;
3527 map_pages_to_xen(
3528 _p, virt_to_phys(p) >> PAGE_SHIFT, _l >> PAGE_SHIFT, flags);
3531 void memguard_guard_range(void *p, unsigned long l)
3533 __memguard_change_range(p, l, 1);
3536 void memguard_unguard_range(void *p, unsigned long l)
3538 __memguard_change_range(p, l, 0);
3541 #endif
3543 /*
3544 * Local variables:
3545 * mode: C
3546 * c-set-style: "BSD"
3547 * c-basic-offset: 4
3548 * tab-width: 4
3549 * indent-tabs-mode: nil
3550 * End:
3551 */