ia64/xen-unstable

view xen/arch/x86/mm.c @ 13481:e5f585514b16

[XEN] Simplify CR3 switching code (remove slow path that allows old
page tables to be writable in new page tables -- shadow mode cannot
handle the failure case and it is easy to work around this in the
guest anyway). Also remove broken shadow-refcount compat-mode case.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@localhost.localdomain
date Wed Jan 17 22:27:56 2007 +0000 (2007-01-17)
parents 05c1db1b8cb9
children 271ffb1c12eb
line source
1 /******************************************************************************
2 * arch/x86/mm.c
3 *
4 * Copyright (c) 2002-2005 K A Fraser
5 * Copyright (c) 2004 Christian Limpach
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
22 /*
23 * A description of the x86 page table API:
24 *
25 * Domains trap to do_mmu_update with a list of update requests.
26 * This is a list of (ptr, val) pairs, where the requested operation
27 * is *ptr = val.
28 *
29 * Reference counting of pages:
30 * ----------------------------
31 * Each page has two refcounts: tot_count and type_count.
32 *
33 * TOT_COUNT is the obvious reference count. It counts all uses of a
34 * physical page frame by a domain, including uses as a page directory,
35 * a page table, or simple mappings via a PTE. This count prevents a
36 * domain from releasing a frame back to the free pool when it still holds
37 * a reference to it.
38 *
39 * TYPE_COUNT is more subtle. A frame can be put to one of three
40 * mutually-exclusive uses: it might be used as a page directory, or a
41 * page table, or it may be mapped writable by the domain [of course, a
42 * frame may not be used in any of these three ways!].
43 * So, type_count is a count of the number of times a frame is being
44 * referred to in its current incarnation. Therefore, a page can only
45 * change its type when its type count is zero.
46 *
47 * Pinning the page type:
48 * ----------------------
49 * The type of a page can be pinned/unpinned with the commands
50 * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
51 * pinning is not reference counted, so it can't be nested).
52 * This is useful to prevent a page's type count falling to zero, at which
53 * point safety checks would need to be carried out next time the count
54 * is increased again.
55 *
56 * A further note on writable page mappings:
57 * -----------------------------------------
58 * For simplicity, the count of writable mappings for a page may not
59 * correspond to reality. The 'writable count' is incremented for every
60 * PTE which maps the page with the _PAGE_RW flag set. However, for
61 * write access to be possible the page directory entry must also have
62 * its _PAGE_RW bit set. We do not check this as it complicates the
63 * reference counting considerably [consider the case of multiple
64 * directory entries referencing a single page table, some with the RW
65 * bit set, others not -- it starts getting a bit messy].
66 * In normal use, this simplification shouldn't be a problem.
67 * However, the logic can be added if required.
68 *
69 * One more note on read-only page mappings:
70 * -----------------------------------------
71 * We want domains to be able to map pages for read-only access. The
72 * main reason is that page tables and directories should be readable
73 * by a domain, but it would not be safe for them to be writable.
74 * However, domains have free access to rings 1 & 2 of the Intel
75 * privilege model. In terms of page protection, these are considered
76 * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
77 * read-only restrictions are respected in supervisor mode -- if the
78 * bit is clear then any mapped page is writable.
79 *
80 * We get round this by always setting the WP bit and disallowing
81 * updates to it. This is very unlikely to cause a problem for guest
82 * OS's, which will generally use the WP bit to simplify copy-on-write
83 * implementation (in that case, OS wants a fault when it writes to
84 * an application-supplied buffer).
85 */
87 #include <xen/config.h>
88 #include <xen/init.h>
89 #include <xen/kernel.h>
90 #include <xen/lib.h>
91 #include <xen/mm.h>
92 #include <xen/domain.h>
93 #include <xen/sched.h>
94 #include <xen/errno.h>
95 #include <xen/perfc.h>
96 #include <xen/irq.h>
97 #include <xen/softirq.h>
98 #include <xen/domain_page.h>
99 #include <xen/event.h>
100 #include <xen/iocap.h>
101 #include <xen/guest_access.h>
102 #include <asm/shadow.h>
103 #include <asm/page.h>
104 #include <asm/flushtlb.h>
105 #include <asm/io.h>
106 #include <asm/ldt.h>
107 #include <asm/x86_emulate.h>
108 #include <asm/e820.h>
109 #include <asm/hypercall.h>
110 #include <public/memory.h>
112 #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)
114 /*
115 * PTE updates can be done with ordinary writes except:
116 * 1. Debug builds get extra checking by using CMPXCHG[8B].
117 * 2. PAE builds perform an atomic 8-byte store with CMPXCHG8B.
118 */
119 #if !defined(NDEBUG) || defined(CONFIG_X86_PAE)
120 #define PTE_UPDATE_WITH_CMPXCHG
121 #endif
123 /* Used to defer flushing of memory structures. */
124 struct percpu_mm_info {
125 #define DOP_FLUSH_TLB (1<<0) /* Flush the local TLB. */
126 #define DOP_FLUSH_ALL_TLBS (1<<1) /* Flush TLBs of all VCPUs of current dom. */
127 #define DOP_RELOAD_LDT (1<<2) /* Reload the LDT shadow mapping. */
128 unsigned int deferred_ops;
129 /* If non-NULL, specifies a foreign subject domain for some operations. */
130 struct domain *foreign;
131 };
132 static DEFINE_PER_CPU(struct percpu_mm_info, percpu_mm_info);
134 /*
135 * Returns the current foreign domain; defaults to the currently-executing
136 * domain if a foreign override hasn't been specified.
137 */
138 #define FOREIGNDOM (this_cpu(percpu_mm_info).foreign ?: current->domain)
140 /* Private domain structs for DOMID_XEN and DOMID_IO. */
141 static struct domain *dom_xen, *dom_io;
143 /* Frame table and its size in pages. */
144 struct page_info *frame_table;
145 unsigned long max_page;
146 unsigned long total_pages;
148 #ifdef CONFIG_COMPAT
149 l2_pgentry_t *compat_idle_pg_table_l2 = NULL;
150 #define l3_disallow_mask(d) (!IS_COMPAT(d) ? \
151 L3_DISALLOW_MASK : \
152 COMPAT_L3_DISALLOW_MASK)
153 #else
154 #define l3_disallow_mask(d) L3_DISALLOW_MASK
155 #endif
157 static void queue_deferred_ops(struct domain *d, unsigned int ops)
158 {
159 if ( d == current->domain )
160 this_cpu(percpu_mm_info).deferred_ops |= ops;
161 else
162 BUG_ON(!test_bit(_DOMF_paused, &d->domain_flags) ||
163 !cpus_empty(d->domain_dirty_cpumask));
164 }
166 void __init init_frametable(void)
167 {
168 unsigned long nr_pages, page_step, i, mfn;
170 frame_table = (struct page_info *)FRAMETABLE_VIRT_START;
172 nr_pages = PFN_UP(max_page * sizeof(*frame_table));
173 page_step = (1 << L2_PAGETABLE_SHIFT) >> PAGE_SHIFT;
175 for ( i = 0; i < nr_pages; i += page_step )
176 {
177 mfn = alloc_boot_pages(min(nr_pages - i, page_step), page_step);
178 if ( mfn == 0 )
179 panic("Not enough memory for frame table\n");
180 map_pages_to_xen(
181 FRAMETABLE_VIRT_START + (i << PAGE_SHIFT),
182 mfn, page_step, PAGE_HYPERVISOR);
183 }
185 memset(frame_table, 0, nr_pages << PAGE_SHIFT);
186 }
188 void arch_init_memory(void)
189 {
190 extern void subarch_init_memory(void);
192 unsigned long i, pfn, rstart_pfn, rend_pfn;
194 /*
195 * Initialise our DOMID_XEN domain.
196 * Any Xen-heap pages that we will allow to be mapped will have
197 * their domain field set to dom_xen.
198 */
199 dom_xen = alloc_domain(DOMID_XEN);
200 BUG_ON(dom_xen == NULL);
202 /*
203 * Initialise our DOMID_IO domain.
204 * This domain owns I/O pages that are within the range of the page_info
205 * array. Mappings occur at the priv of the caller.
206 */
207 dom_io = alloc_domain(DOMID_IO);
208 BUG_ON(dom_io == NULL);
210 /* First 1MB of RAM is historically marked as I/O. */
211 for ( i = 0; i < 0x100; i++ )
212 share_xen_page_with_guest(mfn_to_page(i), dom_io, XENSHARE_writable);
214 /* Any areas not specified as RAM by the e820 map are considered I/O. */
215 for ( i = 0, pfn = 0; i < e820.nr_map; i++ )
216 {
217 if ( e820.map[i].type != E820_RAM )
218 continue;
219 /* Every page from cursor to start of next RAM region is I/O. */
220 rstart_pfn = PFN_UP(e820.map[i].addr);
221 rend_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
222 for ( ; pfn < rstart_pfn; pfn++ )
223 {
224 BUG_ON(!mfn_valid(pfn));
225 share_xen_page_with_guest(
226 mfn_to_page(pfn), dom_io, XENSHARE_writable);
227 }
228 /* Skip the RAM region. */
229 pfn = rend_pfn;
230 }
231 BUG_ON(pfn != max_page);
233 subarch_init_memory();
234 }
236 int memory_is_conventional_ram(paddr_t p)
237 {
238 int i;
240 for ( i = 0; i < e820.nr_map; i++ )
241 {
242 if ( (e820.map[i].type == E820_RAM) &&
243 (e820.map[i].addr <= p) &&
244 (e820.map[i].size > p) )
245 return 1;
246 }
248 return 0;
249 }
251 void share_xen_page_with_guest(
252 struct page_info *page, struct domain *d, int readonly)
253 {
254 if ( page_get_owner(page) == d )
255 return;
257 set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
259 spin_lock(&d->page_alloc_lock);
261 /* The incremented type count pins as writable or read-only. */
262 page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page);
263 page->u.inuse.type_info |= PGT_validated | 1;
265 page_set_owner(page, d);
266 wmb(); /* install valid domain ptr before updating refcnt. */
267 ASSERT(page->count_info == 0);
268 page->count_info |= PGC_allocated | 1;
270 if ( unlikely(d->xenheap_pages++ == 0) )
271 get_knownalive_domain(d);
272 list_add_tail(&page->list, &d->xenpage_list);
274 spin_unlock(&d->page_alloc_lock);
275 }
277 void share_xen_page_with_privileged_guests(
278 struct page_info *page, int readonly)
279 {
280 share_xen_page_with_guest(page, dom_xen, readonly);
281 }
283 #if defined(CONFIG_X86_PAE)
285 #ifdef NDEBUG
286 /* Only PDPTs above 4GB boundary need to be shadowed in low memory. */
287 #define l3tab_needs_shadow(mfn) ((mfn) >= 0x100000)
288 #else
289 /*
290 * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths.
291 * We cannot safely shadow the idle page table, nor shadow (v1) page tables
292 * (detected by lack of an owning domain). As required for correctness, we
293 * always shadow PDPTs above 4GB.
294 */
295 #define l3tab_needs_shadow(mfn) \
296 (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \
297 (page_get_owner(mfn_to_page(mfn)) != NULL) && \
298 ((mfn) & 1)) || /* odd MFNs are shadowed */ \
299 ((mfn) >= 0x100000))
300 #endif
302 static l1_pgentry_t *fix_pae_highmem_pl1e;
304 /* Cache the address of PAE high-memory fixmap page tables. */
305 static int __init cache_pae_fixmap_address(void)
306 {
307 unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0);
308 l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base);
309 fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base);
310 return 0;
311 }
312 __initcall(cache_pae_fixmap_address);
314 static DEFINE_PER_CPU(u32, make_cr3_timestamp);
316 void make_cr3(struct vcpu *v, unsigned long mfn)
317 /* Takes the MFN of a PAE l3 table, copies the contents to below 4GB if
318 * necessary, and sets v->arch.cr3 to the value to load in CR3. */
319 {
320 l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
321 struct pae_l3_cache *cache = &v->arch.pae_l3_cache;
322 unsigned int cpu = smp_processor_id();
324 /* Fast path: does this mfn need a shadow at all? */
325 if ( !l3tab_needs_shadow(mfn) )
326 {
327 v->arch.cr3 = mfn << PAGE_SHIFT;
328 /* Cache is no longer in use or valid */
329 cache->high_mfn = 0;
330 return;
331 }
333 /* Caching logic is not interrupt safe. */
334 ASSERT(!in_irq());
336 /* Protects against pae_flush_pgd(). */
337 spin_lock(&cache->lock);
339 cache->inuse_idx ^= 1;
340 cache->high_mfn = mfn;
342 /* Map the guest L3 table and copy to the chosen low-memory cache. */
343 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
344 /* First check the previous high mapping can't be in the TLB.
345 * (i.e. have we loaded CR3 since we last did this?) */
346 if ( unlikely(this_cpu(make_cr3_timestamp) == this_cpu(tlbflush_time)) )
347 local_flush_tlb_one(fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu));
348 highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
349 lowmem_l3tab = cache->table[cache->inuse_idx];
350 memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0]));
351 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_empty());
352 this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time);
354 v->arch.cr3 = __pa(lowmem_l3tab);
356 spin_unlock(&cache->lock);
357 }
359 #else /* !CONFIG_X86_PAE */
361 void make_cr3(struct vcpu *v, unsigned long mfn)
362 {
363 v->arch.cr3 = mfn << PAGE_SHIFT;
364 }
366 #endif /* !CONFIG_X86_PAE */
368 void write_ptbase(struct vcpu *v)
369 {
370 write_cr3(v->arch.cr3);
371 }
373 /* Should be called after CR3 is updated.
374 * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
375 *
376 * Also updates other state derived from CR3 (vcpu->arch.guest_vtable,
377 * shadow_vtable, etc).
378 *
379 * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
380 * for HVM guests, arch.monitor_table and hvm's guest CR3.
381 *
382 * Update ref counts to shadow tables appropriately.
383 */
384 void update_cr3(struct vcpu *v)
385 {
386 unsigned long cr3_mfn=0;
388 if ( shadow_mode_enabled(v->domain) )
389 {
390 shadow_update_cr3(v);
391 return;
392 }
394 #if CONFIG_PAGING_LEVELS == 4
395 if ( !(v->arch.flags & TF_kernel_mode) )
396 cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
397 else
398 #endif
399 cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
401 make_cr3(v, cr3_mfn);
402 }
405 void invalidate_shadow_ldt(struct vcpu *v)
406 {
407 int i;
408 unsigned long pfn;
409 struct page_info *page;
411 if ( v->arch.shadow_ldt_mapcnt == 0 )
412 return;
414 v->arch.shadow_ldt_mapcnt = 0;
416 for ( i = 16; i < 32; i++ )
417 {
418 pfn = l1e_get_pfn(v->arch.perdomain_ptes[i]);
419 if ( pfn == 0 ) continue;
420 l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
421 page = mfn_to_page(pfn);
422 ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
423 ASSERT_PAGE_IS_DOMAIN(page, v->domain);
424 put_page_and_type(page);
425 }
427 /* Dispose of the (now possibly invalid) mappings from the TLB. */
428 queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
429 }
432 static int alloc_segdesc_page(struct page_info *page)
433 {
434 struct desc_struct *descs;
435 int i;
437 descs = map_domain_page(page_to_mfn(page));
439 for ( i = 0; i < 512; i++ )
440 if ( unlikely(!check_descriptor(page_get_owner(page), &descs[i])) )
441 goto fail;
443 unmap_domain_page(descs);
444 return 1;
446 fail:
447 unmap_domain_page(descs);
448 return 0;
449 }
452 /* Map shadow page at offset @off. */
453 int map_ldt_shadow_page(unsigned int off)
454 {
455 struct vcpu *v = current;
456 struct domain *d = v->domain;
457 unsigned long gmfn, mfn;
458 l1_pgentry_t l1e, nl1e;
459 unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT);
460 int okay;
462 BUG_ON(unlikely(in_irq()));
464 guest_get_eff_kern_l1e(v, gva, &l1e);
465 if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
466 return 0;
468 gmfn = l1e_get_pfn(l1e);
469 mfn = gmfn_to_mfn(d, gmfn);
470 if ( unlikely(!mfn_valid(mfn)) )
471 return 0;
473 okay = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
474 if ( unlikely(!okay) )
475 return 0;
477 nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW);
479 l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e);
480 v->arch.shadow_ldt_mapcnt++;
482 return 1;
483 }
486 static int get_page_from_pagenr(unsigned long page_nr, struct domain *d)
487 {
488 struct page_info *page = mfn_to_page(page_nr);
490 if ( unlikely(!mfn_valid(page_nr)) || unlikely(!get_page(page, d)) )
491 {
492 MEM_LOG("Could not get page ref for pfn %lx", page_nr);
493 return 0;
494 }
496 return 1;
497 }
500 static int get_page_and_type_from_pagenr(unsigned long page_nr,
501 unsigned long type,
502 struct domain *d)
503 {
504 struct page_info *page = mfn_to_page(page_nr);
506 if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
507 return 0;
509 if ( unlikely(!get_page_type(page, type)) )
510 {
511 put_page(page);
512 return 0;
513 }
515 return 1;
516 }
518 #ifndef CONFIG_X86_PAE /* We do not support guest linear mappings on PAE. */
519 /*
520 * We allow root tables to map each other (a.k.a. linear page tables). It
521 * needs some special care with reference counts and access permissions:
522 * 1. The mapping entry must be read-only, or the guest may get write access
523 * to its own PTEs.
524 * 2. We must only bump the reference counts for an *already validated*
525 * L2 table, or we can end up in a deadlock in get_page_type() by waiting
526 * on a validation that is required to complete that validation.
527 * 3. We only need to increment the reference counts for the mapped page
528 * frame if it is mapped by a different root table. This is sufficient and
529 * also necessary to allow validation of a root table mapping itself.
530 */
531 static int
532 get_linear_pagetable(
533 root_pgentry_t re, unsigned long re_pfn, struct domain *d)
534 {
535 unsigned long x, y;
536 struct page_info *page;
537 unsigned long pfn;
539 if ( (root_get_flags(re) & _PAGE_RW) )
540 {
541 MEM_LOG("Attempt to create linear p.t. with write perms");
542 return 0;
543 }
545 if ( (pfn = root_get_pfn(re)) != re_pfn )
546 {
547 /* Make sure the mapped frame belongs to the correct domain. */
548 if ( unlikely(!get_page_from_pagenr(pfn, d)) )
549 return 0;
551 /*
552 * Make sure that the mapped frame is an already-validated L2 table.
553 * If so, atomically increment the count (checking for overflow).
554 */
555 page = mfn_to_page(pfn);
556 y = page->u.inuse.type_info;
557 do {
558 x = y;
559 if ( unlikely((x & PGT_count_mask) == PGT_count_mask) ||
560 unlikely((x & (PGT_type_mask|PGT_validated)) !=
561 (PGT_root_page_table|PGT_validated)) )
562 {
563 put_page(page);
564 return 0;
565 }
566 }
567 while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x );
568 }
570 return 1;
571 }
572 #endif /* !CONFIG_X86_PAE */
574 int
575 get_page_from_l1e(
576 l1_pgentry_t l1e, struct domain *d)
577 {
578 unsigned long mfn = l1e_get_pfn(l1e);
579 struct page_info *page = mfn_to_page(mfn);
580 int okay;
582 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
583 return 1;
585 if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
586 {
587 MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
588 return 0;
589 }
591 if ( unlikely(!mfn_valid(mfn)) ||
592 unlikely(page_get_owner(page) == dom_io) )
593 {
594 /* DOMID_IO reverts to caller for privilege checks. */
595 if ( d == dom_io )
596 d = current->domain;
598 if ( !iomem_access_permitted(d, mfn, mfn) )
599 {
600 if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */
601 MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx",
602 d->domain_id, mfn);
603 return 0;
604 }
606 /* No reference counting for out-of-range I/O pages. */
607 if ( !mfn_valid(mfn) )
608 return 1;
610 d = dom_io;
611 }
613 /* Foreign mappings into guests in shadow external mode don't
614 * contribute to writeable mapping refcounts. (This allows the
615 * qemu-dm helper process in dom0 to map the domain's memory without
616 * messing up the count of "real" writable mappings.) */
617 okay = (((l1e_get_flags(l1e) & _PAGE_RW) &&
618 !(unlikely(shadow_mode_external(d) && (d != current->domain))))
619 ? get_page_and_type(page, d, PGT_writable_page)
620 : get_page(page, d));
621 if ( !okay )
622 {
623 MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
624 " for dom%d",
625 mfn, get_gpfn_from_mfn(mfn),
626 l1e_get_intpte(l1e), d->domain_id);
627 }
629 return okay;
630 }
633 /* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
634 static int
635 get_page_from_l2e(
636 l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
637 {
638 int rc;
640 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
641 return 1;
643 if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
644 {
645 MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
646 return 0;
647 }
649 rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d);
650 #if CONFIG_PAGING_LEVELS == 2
651 if ( unlikely(!rc) )
652 rc = get_linear_pagetable(l2e, pfn, d);
653 #endif
654 return rc;
655 }
658 #if CONFIG_PAGING_LEVELS >= 3
659 static int
660 get_page_from_l3e(
661 l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
662 {
663 int rc;
665 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
666 return 1;
668 if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
669 {
670 MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d));
671 return 0;
672 }
674 rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d);
675 return rc;
676 }
677 #endif /* 3 level */
679 #if CONFIG_PAGING_LEVELS >= 4
680 static int
681 get_page_from_l4e(
682 l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
683 {
684 int rc;
686 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
687 return 1;
689 if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
690 {
691 MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
692 return 0;
693 }
695 rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d);
697 if ( unlikely(!rc) )
698 rc = get_linear_pagetable(l4e, pfn, d);
700 return rc;
701 }
702 #endif /* 4 level */
704 #ifdef __x86_64__
706 #ifdef USER_MAPPINGS_ARE_GLOBAL
707 #define adjust_guest_l1e(pl1e, d) \
708 do { \
709 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
710 likely(!IS_COMPAT(d)) ) \
711 { \
712 /* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */ \
713 if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \
714 == (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL) ) \
715 MEM_LOG("Global bit is set to kernel page %lx", \
716 l1e_get_pfn((pl1e))); \
717 if ( !(l1e_get_flags((pl1e)) & _PAGE_USER) ) \
718 l1e_add_flags((pl1e), (_PAGE_GUEST_KERNEL|_PAGE_USER)); \
719 if ( !(l1e_get_flags((pl1e)) & _PAGE_GUEST_KERNEL) ) \
720 l1e_add_flags((pl1e), (_PAGE_GLOBAL|_PAGE_USER)); \
721 } \
722 } while ( 0 )
723 #else
724 #define adjust_guest_l1e(pl1e, d) \
725 do { \
726 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
727 likely(!IS_COMPAT(d)) ) \
728 l1e_add_flags((pl1e), _PAGE_USER); \
729 } while ( 0 )
730 #endif
732 #define adjust_guest_l2e(pl2e, d) \
733 do { \
734 if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) && \
735 likely(!IS_COMPAT(d)) ) \
736 l2e_add_flags((pl2e), _PAGE_USER); \
737 } while ( 0 )
739 #define adjust_guest_l3e(pl3e, d) \
740 do { \
741 if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
742 l3e_add_flags((pl3e), likely(!IS_COMPAT(d)) ? \
743 _PAGE_USER : \
744 _PAGE_USER|_PAGE_RW); \
745 } while ( 0 )
747 #define adjust_guest_l4e(pl4e, d) \
748 do { \
749 if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) && \
750 likely(!IS_COMPAT(d)) ) \
751 l4e_add_flags((pl4e), _PAGE_USER); \
752 } while ( 0 )
754 #else /* !defined(__x86_64__) */
756 #define adjust_guest_l1e(_p, _d) ((void)(_d))
757 #define adjust_guest_l2e(_p, _d) ((void)(_d))
758 #define adjust_guest_l3e(_p, _d) ((void)(_d))
760 #endif
762 #ifdef CONFIG_COMPAT
763 #define unadjust_guest_l3e(pl3e, d) \
764 do { \
765 if ( unlikely(IS_COMPAT(d)) && \
766 likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
767 l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \
768 } while ( 0 )
769 #else
770 #define unadjust_guest_l3e(_p, _d) ((void)(_d))
771 #endif
773 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
774 {
775 unsigned long pfn = l1e_get_pfn(l1e);
776 struct page_info *page = mfn_to_page(pfn);
777 struct domain *e;
778 struct vcpu *v;
780 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(pfn) )
781 return;
783 e = page_get_owner(page);
785 /*
786 * Check if this is a mapping that was established via a grant reference.
787 * If it was then we should not be here: we require that such mappings are
788 * explicitly destroyed via the grant-table interface.
789 *
790 * The upshot of this is that the guest can end up with active grants that
791 * it cannot destroy (because it no longer has a PTE to present to the
792 * grant-table interface). This can lead to subtle hard-to-catch bugs,
793 * hence a special grant PTE flag can be enabled to catch the bug early.
794 *
795 * (Note that the undestroyable active grants are not a security hole in
796 * Xen. All active grants can safely be cleaned up when the domain dies.)
797 */
798 if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) &&
799 !(d->domain_flags & (DOMF_shutdown|DOMF_dying)) )
800 {
801 MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte,
802 l1e_get_intpte(l1e));
803 domain_crash(d);
804 }
806 /* Remember we didn't take a type-count of foreign writable mappings
807 * to shadow external domains */
808 if ( (l1e_get_flags(l1e) & _PAGE_RW) &&
809 !(unlikely((e != d) && shadow_mode_external(e))) )
810 {
811 put_page_and_type(page);
812 }
813 else
814 {
815 /* We expect this is rare so we blow the entire shadow LDT. */
816 if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) ==
817 PGT_ldt_page)) &&
818 unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) &&
819 (d == e) )
820 {
821 for_each_vcpu ( d, v )
822 invalidate_shadow_ldt(v);
823 }
824 put_page(page);
825 }
826 }
829 /*
830 * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
831 * Note also that this automatically deals correctly with linear p.t.'s.
832 */
833 static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
834 {
835 if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
836 (l2e_get_pfn(l2e) != pfn) )
837 put_page_and_type(l2e_get_page(l2e));
838 }
841 #if CONFIG_PAGING_LEVELS >= 3
842 static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
843 {
844 if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
845 (l3e_get_pfn(l3e) != pfn) )
846 put_page_and_type(l3e_get_page(l3e));
847 }
848 #endif
850 #if CONFIG_PAGING_LEVELS >= 4
851 static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
852 {
853 if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
854 (l4e_get_pfn(l4e) != pfn) )
855 put_page_and_type(l4e_get_page(l4e));
856 }
857 #endif
859 static int alloc_l1_table(struct page_info *page)
860 {
861 struct domain *d = page_get_owner(page);
862 unsigned long pfn = page_to_mfn(page);
863 l1_pgentry_t *pl1e;
864 int i;
866 pl1e = map_domain_page(pfn);
868 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
869 {
870 if ( is_guest_l1_slot(i) &&
871 unlikely(!get_page_from_l1e(pl1e[i], d)) )
872 goto fail;
874 adjust_guest_l1e(pl1e[i], d);
875 }
877 unmap_domain_page(pl1e);
878 return 1;
880 fail:
881 MEM_LOG("Failure in alloc_l1_table: entry %d", i);
882 while ( i-- > 0 )
883 if ( is_guest_l1_slot(i) )
884 put_page_from_l1e(pl1e[i], d);
886 unmap_domain_page(pl1e);
887 return 0;
888 }
890 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
891 static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
892 {
893 struct page_info *page;
894 l2_pgentry_t *pl2e;
895 l3_pgentry_t l3e3;
896 #ifndef CONFIG_COMPAT
897 l2_pgentry_t l2e;
898 int i;
899 #else
901 if ( !IS_COMPAT(d) )
902 return 1;
903 #endif
905 pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK);
907 /* 3rd L3 slot contains L2 with Xen-private mappings. It *must* exist. */
908 l3e3 = pl3e[3];
909 if ( !(l3e_get_flags(l3e3) & _PAGE_PRESENT) )
910 {
911 MEM_LOG("PAE L3 3rd slot is empty");
912 return 0;
913 }
915 /*
916 * The Xen-private mappings include linear mappings. The L2 thus cannot
917 * be shared by multiple L3 tables. The test here is adequate because:
918 * 1. Cannot appear in slots != 3 because get_page_type() checks the
919 * PGT_pae_xen_l2 flag, which is asserted iff the L2 appears in slot 3
920 * 2. Cannot appear in another page table's L3:
921 * a. alloc_l3_table() calls this function and this check will fail
922 * b. mod_l3_entry() disallows updates to slot 3 in an existing table
923 */
924 page = l3e_get_page(l3e3);
925 BUG_ON(page->u.inuse.type_info & PGT_pinned);
926 BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0);
927 BUG_ON(!(page->u.inuse.type_info & PGT_pae_xen_l2));
928 if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
929 {
930 MEM_LOG("PAE L3 3rd slot is shared");
931 return 0;
932 }
934 /* Xen private mappings. */
935 pl2e = map_domain_page(l3e_get_pfn(l3e3));
936 #ifndef CONFIG_COMPAT
937 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
938 &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
939 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
940 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
941 {
942 l2e = l2e_from_page(
943 virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
944 __PAGE_HYPERVISOR);
945 l2e_write(&pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i], l2e);
946 }
947 for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
948 {
949 l2e = l2e_empty();
950 if ( l3e_get_flags(pl3e[i]) & _PAGE_PRESENT )
951 l2e = l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR);
952 l2e_write(&pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i], l2e);
953 }
954 #else
955 memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
956 &compat_idle_pg_table_l2[
957 l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
958 COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e));
959 #endif
960 unmap_domain_page(pl2e);
962 return 1;
963 }
964 #else
965 # define create_pae_xen_mappings(d, pl3e) (1)
966 #endif
968 #ifdef CONFIG_X86_PAE
969 /* Flush a pgdir update into low-memory caches. */
970 static void pae_flush_pgd(
971 unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
972 {
973 struct domain *d = page_get_owner(mfn_to_page(mfn));
974 struct vcpu *v;
975 intpte_t _ol3e, _nl3e, _pl3e;
976 l3_pgentry_t *l3tab_ptr;
977 struct pae_l3_cache *cache;
979 /* If below 4GB then the pgdir is not shadowed in low memory. */
980 if ( !l3tab_needs_shadow(mfn) )
981 return;
983 for_each_vcpu ( d, v )
984 {
985 cache = &v->arch.pae_l3_cache;
987 spin_lock(&cache->lock);
989 if ( cache->high_mfn == mfn )
990 {
991 l3tab_ptr = &cache->table[cache->inuse_idx][idx];
992 _ol3e = l3e_get_intpte(*l3tab_ptr);
993 _nl3e = l3e_get_intpte(nl3e);
994 _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
995 BUG_ON(_pl3e != _ol3e);
996 }
998 spin_unlock(&cache->lock);
999 }
1001 flush_tlb_mask(d->domain_dirty_cpumask);
1003 #else
1004 # define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
1005 #endif
1007 static int alloc_l2_table(struct page_info *page, unsigned long type)
1009 struct domain *d = page_get_owner(page);
1010 unsigned long pfn = page_to_mfn(page);
1011 l2_pgentry_t *pl2e;
1012 int i;
1014 pl2e = map_domain_page(pfn);
1016 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
1018 if ( is_guest_l2_slot(d, type, i) &&
1019 unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
1020 goto fail;
1022 adjust_guest_l2e(pl2e[i], d);
1025 #if CONFIG_PAGING_LEVELS == 2
1026 /* Xen private mappings. */
1027 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
1028 &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
1029 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
1030 pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1031 l2e_from_pfn(pfn, __PAGE_HYPERVISOR);
1032 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
1033 pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
1034 l2e_from_page(
1035 virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
1036 __PAGE_HYPERVISOR);
1037 #endif
1039 unmap_domain_page(pl2e);
1040 return 1;
1042 fail:
1043 MEM_LOG("Failure in alloc_l2_table: entry %d", i);
1044 while ( i-- > 0 )
1045 if ( is_guest_l2_slot(d, type, i) )
1046 put_page_from_l2e(pl2e[i], pfn);
1048 unmap_domain_page(pl2e);
1049 return 0;
1053 #if CONFIG_PAGING_LEVELS >= 3
1054 static int alloc_l3_table(struct page_info *page)
1056 struct domain *d = page_get_owner(page);
1057 unsigned long pfn = page_to_mfn(page);
1058 l3_pgentry_t *pl3e;
1059 int i;
1061 #ifdef CONFIG_X86_PAE
1062 /*
1063 * PAE pgdirs above 4GB are unacceptable if the guest does not understand
1064 * the weird 'extended cr3' format for dealing with high-order address
1065 * bits. We cut some slack for control tools (before vcpu0 is initialised).
1066 */
1067 if ( (pfn >= 0x100000) &&
1068 unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) &&
1069 d->vcpu[0] && test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) )
1071 MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
1072 return 0;
1074 #endif
1076 pl3e = map_domain_page(pfn);
1078 /*
1079 * PAE guests allocate full pages, but aren't required to initialize
1080 * more than the first four entries; when running in compatibility
1081 * mode, however, the full page is visible to the MMU, and hence all
1082 * 512 entries must be valid/verified, which is most easily achieved
1083 * by clearing them out.
1084 */
1085 if ( IS_COMPAT(d) )
1086 memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
1088 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
1090 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
1091 if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) && i == 3 )
1093 if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
1094 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ||
1095 !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
1096 PGT_l2_page_table |
1097 PGT_pae_xen_l2,
1098 d) )
1099 goto fail;
1101 else
1102 #endif
1103 if ( is_guest_l3_slot(i) &&
1104 unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
1105 goto fail;
1107 adjust_guest_l3e(pl3e[i], d);
1110 if ( !create_pae_xen_mappings(d, pl3e) )
1111 goto fail;
1113 unmap_domain_page(pl3e);
1114 return 1;
1116 fail:
1117 MEM_LOG("Failure in alloc_l3_table: entry %d", i);
1118 while ( i-- > 0 )
1119 if ( is_guest_l3_slot(i) )
1120 put_page_from_l3e(pl3e[i], pfn);
1122 unmap_domain_page(pl3e);
1123 return 0;
1125 #else
1126 #define alloc_l3_table(page) (0)
1127 #endif
1129 #if CONFIG_PAGING_LEVELS >= 4
1130 static int alloc_l4_table(struct page_info *page)
1132 struct domain *d = page_get_owner(page);
1133 unsigned long pfn = page_to_mfn(page);
1134 l4_pgentry_t *pl4e = page_to_virt(page);
1135 int i;
1137 for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
1139 if ( is_guest_l4_slot(i) &&
1140 unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
1141 goto fail;
1143 adjust_guest_l4e(pl4e[i], d);
1146 /* Xen private mappings. */
1147 memcpy(&pl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1148 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1149 ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
1150 pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
1151 l4e_from_pfn(pfn, __PAGE_HYPERVISOR);
1152 pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
1153 l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
1154 __PAGE_HYPERVISOR);
1155 if ( IS_COMPAT(d) )
1156 pl4e[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
1157 l4e_from_page(virt_to_page(d->arch.mm_arg_xlat_l3),
1158 __PAGE_HYPERVISOR);
1160 return 1;
1162 fail:
1163 MEM_LOG("Failure in alloc_l4_table: entry %d", i);
1164 while ( i-- > 0 )
1165 if ( is_guest_l4_slot(i) )
1166 put_page_from_l4e(pl4e[i], pfn);
1168 return 0;
1170 #else
1171 #define alloc_l4_table(page) (0)
1172 #endif
1175 static void free_l1_table(struct page_info *page)
1177 struct domain *d = page_get_owner(page);
1178 unsigned long pfn = page_to_mfn(page);
1179 l1_pgentry_t *pl1e;
1180 int i;
1182 pl1e = map_domain_page(pfn);
1184 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
1185 if ( is_guest_l1_slot(i) )
1186 put_page_from_l1e(pl1e[i], d);
1188 unmap_domain_page(pl1e);
1192 static void free_l2_table(struct page_info *page)
1194 #ifdef CONFIG_COMPAT
1195 struct domain *d = page_get_owner(page);
1196 #endif
1197 unsigned long pfn = page_to_mfn(page);
1198 l2_pgentry_t *pl2e;
1199 int i;
1201 pl2e = map_domain_page(pfn);
1203 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
1204 if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
1205 put_page_from_l2e(pl2e[i], pfn);
1207 unmap_domain_page(pl2e);
1209 page->u.inuse.type_info &= ~PGT_pae_xen_l2;
1213 #if CONFIG_PAGING_LEVELS >= 3
1215 static void free_l3_table(struct page_info *page)
1217 struct domain *d = page_get_owner(page);
1218 unsigned long pfn = page_to_mfn(page);
1219 l3_pgentry_t *pl3e;
1220 int i;
1222 pl3e = map_domain_page(pfn);
1224 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
1225 if ( is_guest_l3_slot(i) )
1227 put_page_from_l3e(pl3e[i], pfn);
1228 unadjust_guest_l3e(pl3e[i], d);
1231 unmap_domain_page(pl3e);
1234 #endif
1236 #if CONFIG_PAGING_LEVELS >= 4
1238 static void free_l4_table(struct page_info *page)
1240 unsigned long pfn = page_to_mfn(page);
1241 l4_pgentry_t *pl4e = page_to_virt(page);
1242 int i;
1244 for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
1245 if ( is_guest_l4_slot(i) )
1246 put_page_from_l4e(pl4e[i], pfn);
1249 #endif
1252 /* How to write an entry to the guest pagetables.
1253 * Returns 0 for failure (pointer not valid), 1 for success. */
1254 static inline int update_intpte(intpte_t *p,
1255 intpte_t old,
1256 intpte_t new,
1257 unsigned long mfn,
1258 struct vcpu *v)
1260 int rv = 1;
1261 #ifndef PTE_UPDATE_WITH_CMPXCHG
1262 if ( unlikely(shadow_mode_enabled(v->domain)) )
1263 rv = shadow_write_guest_entry(v, p, new, _mfn(mfn));
1264 else
1265 rv = (!__copy_to_user(p, &new, sizeof(new)));
1266 #else
1268 intpte_t t = old;
1269 for ( ; ; )
1271 if ( unlikely(shadow_mode_enabled(v->domain)) )
1272 rv = shadow_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn));
1273 else
1274 rv = (!cmpxchg_user(p, t, new));
1276 if ( unlikely(rv == 0) )
1278 MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
1279 ": saw %" PRIpte, old, new, t);
1280 break;
1283 if ( t == old )
1284 break;
1286 /* Allowed to change in Accessed/Dirty flags only. */
1287 BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY));
1289 old = t;
1292 #endif
1293 return rv;
1296 /* Macro that wraps the appropriate type-changes around update_intpte().
1297 * Arguments are: type, ptr, old, new, mfn, vcpu */
1298 #define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v) \
1299 update_intpte((intpte_t *)(_p), \
1300 _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \
1301 (_m), (_v))
1303 /* Update the L1 entry at pl1e to new value nl1e. */
1304 static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
1305 unsigned long gl1mfn)
1307 l1_pgentry_t ol1e;
1308 struct domain *d = current->domain;
1310 if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
1311 return 0;
1313 if ( unlikely(shadow_mode_refcounts(d)) )
1314 return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
1316 if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
1318 /* Translate foreign guest addresses. */
1319 nl1e = l1e_from_pfn(gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e)),
1320 l1e_get_flags(nl1e));
1322 if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
1324 MEM_LOG("Bad L1 flags %x",
1325 l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
1326 return 0;
1329 adjust_guest_l1e(nl1e, d);
1331 /* Fast path for identical mapping, r/w and presence. */
1332 if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
1333 return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
1335 if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
1336 return 0;
1338 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
1340 put_page_from_l1e(nl1e, d);
1341 return 0;
1344 else
1346 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
1347 return 0;
1350 put_page_from_l1e(ol1e, d);
1351 return 1;
1355 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
1356 static int mod_l2_entry(l2_pgentry_t *pl2e,
1357 l2_pgentry_t nl2e,
1358 unsigned long pfn,
1359 unsigned long type)
1361 l2_pgentry_t ol2e;
1362 struct domain *d = current->domain;
1364 if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
1366 MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
1367 return 0;
1370 if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
1371 return 0;
1373 if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
1375 if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
1377 MEM_LOG("Bad L2 flags %x",
1378 l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
1379 return 0;
1382 adjust_guest_l2e(nl2e, d);
1384 /* Fast path for identical mapping and presence. */
1385 if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
1386 return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current);
1388 if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
1389 return 0;
1391 if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
1393 put_page_from_l2e(nl2e, pfn);
1394 return 0;
1397 else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
1399 return 0;
1402 put_page_from_l2e(ol2e, pfn);
1403 return 1;
1406 #if CONFIG_PAGING_LEVELS >= 3
1408 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
1409 static int mod_l3_entry(l3_pgentry_t *pl3e,
1410 l3_pgentry_t nl3e,
1411 unsigned long pfn)
1413 l3_pgentry_t ol3e;
1414 struct domain *d = current->domain;
1415 int okay;
1417 if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
1419 MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e);
1420 return 0;
1423 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
1424 /*
1425 * Disallow updates to final L3 slot. It contains Xen mappings, and it
1426 * would be a pain to ensure they remain continuously valid throughout.
1427 */
1428 if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) &&
1429 pgentry_ptr_to_slot(pl3e) >= 3 )
1430 return 0;
1431 #endif
1433 if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
1434 return 0;
1436 if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
1438 if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
1440 MEM_LOG("Bad L3 flags %x",
1441 l3e_get_flags(nl3e) & l3_disallow_mask(d));
1442 return 0;
1445 adjust_guest_l3e(nl3e, d);
1447 /* Fast path for identical mapping and presence. */
1448 if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
1449 return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current);
1451 if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
1452 return 0;
1454 if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
1456 put_page_from_l3e(nl3e, pfn);
1457 return 0;
1460 else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
1462 return 0;
1465 okay = create_pae_xen_mappings(d, pl3e);
1466 BUG_ON(!okay);
1468 pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
1470 put_page_from_l3e(ol3e, pfn);
1471 return 1;
1474 #endif
1476 #if CONFIG_PAGING_LEVELS >= 4
1478 /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
1479 static int mod_l4_entry(l4_pgentry_t *pl4e,
1480 l4_pgentry_t nl4e,
1481 unsigned long pfn)
1483 l4_pgentry_t ol4e;
1485 if ( unlikely(!is_guest_l4_slot(pgentry_ptr_to_slot(pl4e))) )
1487 MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e);
1488 return 0;
1491 if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
1492 return 0;
1494 if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
1496 if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
1498 MEM_LOG("Bad L4 flags %x",
1499 l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
1500 return 0;
1503 adjust_guest_l4e(nl4e, current->domain);
1505 /* Fast path for identical mapping and presence. */
1506 if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
1507 return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current);
1509 if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
1510 return 0;
1512 if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
1514 put_page_from_l4e(nl4e, pfn);
1515 return 0;
1518 else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
1520 return 0;
1523 put_page_from_l4e(ol4e, pfn);
1524 return 1;
1527 #endif
1529 int alloc_page_type(struct page_info *page, unsigned long type)
1531 struct domain *owner = page_get_owner(page);
1533 /* A page table is dirtied when its type count becomes non-zero. */
1534 if ( likely(owner != NULL) )
1535 mark_dirty(owner, page_to_mfn(page));
1537 switch ( type & PGT_type_mask )
1539 case PGT_l1_page_table:
1540 return alloc_l1_table(page);
1541 case PGT_l2_page_table:
1542 return alloc_l2_table(page, type);
1543 case PGT_l3_page_table:
1544 return alloc_l3_table(page);
1545 case PGT_l4_page_table:
1546 return alloc_l4_table(page);
1547 case PGT_gdt_page:
1548 case PGT_ldt_page:
1549 return alloc_segdesc_page(page);
1550 default:
1551 printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n",
1552 type, page->u.inuse.type_info,
1553 page->count_info);
1554 BUG();
1557 return 0;
1561 void free_page_type(struct page_info *page, unsigned long type)
1563 struct domain *owner = page_get_owner(page);
1564 unsigned long gmfn;
1566 if ( likely(owner != NULL) )
1568 /*
1569 * We have to flush before the next use of the linear mapping
1570 * (e.g., update_va_mapping()) or we could end up modifying a page
1571 * that is no longer a page table (and hence screw up ref counts).
1572 */
1573 queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS);
1575 if ( unlikely(shadow_mode_enabled(owner)) )
1577 /* A page table is dirtied when its type count becomes zero. */
1578 mark_dirty(owner, page_to_mfn(page));
1580 if ( shadow_mode_refcounts(owner) )
1581 return;
1583 gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
1584 ASSERT(VALID_M2P(gmfn));
1585 shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
1589 switch ( type & PGT_type_mask )
1591 case PGT_l1_page_table:
1592 free_l1_table(page);
1593 break;
1595 case PGT_l2_page_table:
1596 free_l2_table(page);
1597 break;
1599 #if CONFIG_PAGING_LEVELS >= 3
1600 case PGT_l3_page_table:
1601 free_l3_table(page);
1602 break;
1603 #endif
1605 #if CONFIG_PAGING_LEVELS >= 4
1606 case PGT_l4_page_table:
1607 free_l4_table(page);
1608 break;
1609 #endif
1611 default:
1612 printk("%s: type %lx pfn %lx\n",__FUNCTION__,
1613 type, page_to_mfn(page));
1614 BUG();
1619 void put_page_type(struct page_info *page)
1621 unsigned long nx, x, y = page->u.inuse.type_info;
1623 again:
1624 do {
1625 x = y;
1626 nx = x - 1;
1628 ASSERT((x & PGT_count_mask) != 0);
1630 if ( unlikely((nx & PGT_count_mask) == 0) )
1632 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
1633 likely(nx & PGT_validated) )
1635 /*
1636 * Page-table pages must be unvalidated when count is zero. The
1637 * 'free' is safe because the refcnt is non-zero and validated
1638 * bit is clear => other ops will spin or fail.
1639 */
1640 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x,
1641 x & ~PGT_validated)) != x) )
1642 goto again;
1643 /* We cleared the 'valid bit' so we do the clean up. */
1644 free_page_type(page, x);
1645 /* Carry on, but with the 'valid bit' now clear. */
1646 x &= ~PGT_validated;
1647 nx &= ~PGT_validated;
1650 /*
1651 * Record TLB information for flush later. We do not stamp page
1652 * tables when running in shadow mode:
1653 * 1. Pointless, since it's the shadow pt's which must be tracked.
1654 * 2. Shadow mode reuses this field for shadowed page tables to
1655 * store flags info -- we don't want to conflict with that.
1656 */
1657 if ( !(shadow_mode_enabled(page_get_owner(page)) &&
1658 (page->count_info & PGC_page_table)) )
1659 page->tlbflush_timestamp = tlbflush_current_time();
1662 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
1666 int get_page_type(struct page_info *page, unsigned long type)
1668 unsigned long nx, x, y = page->u.inuse.type_info;
1670 ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
1672 again:
1673 do {
1674 x = y;
1675 nx = x + 1;
1676 if ( unlikely((nx & PGT_count_mask) == 0) )
1678 MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
1679 return 0;
1681 else if ( unlikely((x & PGT_count_mask) == 0) )
1683 struct domain *d = page_get_owner(page);
1685 /* Never allow a shadowed frame to go from type count 0 to 1 */
1686 if ( d && shadow_mode_enabled(d) )
1687 shadow_remove_all_shadows(d->vcpu[0], _mfn(page_to_mfn(page)));
1689 ASSERT(!(x & PGT_pae_xen_l2));
1690 if ( (x & PGT_type_mask) != type )
1692 /*
1693 * On type change we check to flush stale TLB entries. This
1694 * may be unnecessary (e.g., page was GDT/LDT) but those
1695 * circumstances should be very rare.
1696 */
1697 cpumask_t mask = d->domain_dirty_cpumask;
1699 /* Don't flush if the timestamp is old enough */
1700 tlbflush_filter(mask, page->tlbflush_timestamp);
1702 if ( unlikely(!cpus_empty(mask)) &&
1703 /* Shadow mode: track only writable pages. */
1704 (!shadow_mode_enabled(page_get_owner(page)) ||
1705 ((nx & PGT_type_mask) == PGT_writable_page)) )
1707 perfc_incrc(need_flush_tlb_flush);
1708 flush_tlb_mask(mask);
1711 /* We lose existing type, back pointer, and validity. */
1712 nx &= ~(PGT_type_mask | PGT_validated);
1713 nx |= type;
1715 /* No special validation needed for writable pages. */
1716 /* Page tables and GDT/LDT need to be scanned for validity. */
1717 if ( type == PGT_writable_page )
1718 nx |= PGT_validated;
1721 else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
1723 if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
1724 (type != PGT_l1_page_table) )
1725 MEM_LOG("Bad type (saw %" PRtype_info
1726 " != exp %" PRtype_info ") "
1727 "for mfn %lx (pfn %lx)",
1728 x, type, page_to_mfn(page),
1729 get_gpfn_from_mfn(page_to_mfn(page)));
1730 return 0;
1732 else if ( unlikely(!(x & PGT_validated)) )
1734 /* Someone else is updating validation of this page. Wait... */
1735 while ( (y = page->u.inuse.type_info) == x )
1736 cpu_relax();
1737 goto again;
1740 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
1742 if ( unlikely(!(nx & PGT_validated)) )
1744 /* Try to validate page type; drop the new reference on failure. */
1745 if ( unlikely(!alloc_page_type(page, type)) )
1747 MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
1748 PRtype_info ": caf=%08x taf=%" PRtype_info,
1749 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
1750 type, page->count_info, page->u.inuse.type_info);
1751 /* Noone else can get a reference. We hold the only ref. */
1752 page->u.inuse.type_info = 0;
1753 return 0;
1756 /* Noone else is updating simultaneously. */
1757 __set_bit(_PGT_validated, &page->u.inuse.type_info);
1760 return 1;
1764 int new_guest_cr3(unsigned long mfn)
1766 struct vcpu *v = current;
1767 struct domain *d = v->domain;
1768 int okay;
1769 unsigned long old_base_mfn;
1771 #ifdef CONFIG_COMPAT
1772 if ( IS_COMPAT(d) )
1774 okay = shadow_mode_refcounts(d)
1775 ? 0 /* Old code was broken, but what should it be? */
1776 : mod_l4_entry(__va(pagetable_get_paddr(v->arch.guest_table)),
1777 l4e_from_pfn(mfn, (_PAGE_PRESENT|_PAGE_RW|
1778 _PAGE_USER|_PAGE_ACCESSED)), 0);
1779 if ( unlikely(!okay) )
1781 MEM_LOG("Error while installing new compat baseptr %lx", mfn);
1782 return 0;
1785 invalidate_shadow_ldt(v);
1786 write_ptbase(v);
1788 return 1;
1790 #endif
1791 okay = shadow_mode_refcounts(d)
1792 ? get_page_from_pagenr(mfn, d)
1793 : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
1794 if ( unlikely(!okay) )
1796 MEM_LOG("Error while installing new baseptr %lx", mfn);
1797 return 0;
1800 invalidate_shadow_ldt(v);
1802 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1804 v->arch.guest_table = pagetable_from_pfn(mfn);
1805 update_cr3(v);
1807 write_ptbase(v);
1809 if ( likely(old_base_mfn != 0) )
1811 if ( shadow_mode_refcounts(d) )
1812 put_page(mfn_to_page(old_base_mfn));
1813 else
1814 put_page_and_type(mfn_to_page(old_base_mfn));
1817 return 1;
1820 static void process_deferred_ops(void)
1822 unsigned int deferred_ops;
1823 struct domain *d = current->domain;
1824 struct percpu_mm_info *info = &this_cpu(percpu_mm_info);
1826 deferred_ops = info->deferred_ops;
1827 info->deferred_ops = 0;
1829 if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) )
1831 if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
1832 flush_tlb_mask(d->domain_dirty_cpumask);
1833 else
1834 local_flush_tlb();
1837 if ( deferred_ops & DOP_RELOAD_LDT )
1838 (void)map_ldt_shadow_page(0);
1840 if ( unlikely(info->foreign != NULL) )
1842 put_domain(info->foreign);
1843 info->foreign = NULL;
1847 static int set_foreigndom(domid_t domid)
1849 struct domain *e, *d = current->domain;
1850 struct percpu_mm_info *info = &this_cpu(percpu_mm_info);
1851 int okay = 1;
1853 ASSERT(info->foreign == NULL);
1855 if ( likely(domid == DOMID_SELF) )
1856 goto out;
1858 if ( unlikely(domid == d->domain_id) )
1860 MEM_LOG("Dom %u tried to specify itself as foreign domain",
1861 d->domain_id);
1862 okay = 0;
1864 else if ( unlikely(shadow_mode_translate(d)) )
1866 MEM_LOG("Cannot mix foreign mappings with translated domains");
1867 okay = 0;
1869 else if ( !IS_PRIV(d) )
1871 switch ( domid )
1873 case DOMID_IO:
1874 get_knownalive_domain(dom_io);
1875 info->foreign = dom_io;
1876 break;
1877 default:
1878 MEM_LOG("Dom %u cannot set foreign dom", d->domain_id);
1879 okay = 0;
1880 break;
1883 else
1885 info->foreign = e = find_domain_by_id(domid);
1886 if ( e == NULL )
1888 switch ( domid )
1890 case DOMID_XEN:
1891 get_knownalive_domain(dom_xen);
1892 info->foreign = dom_xen;
1893 break;
1894 case DOMID_IO:
1895 get_knownalive_domain(dom_io);
1896 info->foreign = dom_io;
1897 break;
1898 default:
1899 MEM_LOG("Unknown domain '%u'", domid);
1900 okay = 0;
1901 break;
1906 out:
1907 return okay;
1910 static inline cpumask_t vcpumask_to_pcpumask(
1911 struct domain *d, unsigned long vmask)
1913 unsigned int vcpu_id;
1914 cpumask_t pmask = CPU_MASK_NONE;
1915 struct vcpu *v;
1917 while ( vmask != 0 )
1919 vcpu_id = find_first_set_bit(vmask);
1920 vmask &= ~(1UL << vcpu_id);
1921 if ( (vcpu_id < MAX_VIRT_CPUS) &&
1922 ((v = d->vcpu[vcpu_id]) != NULL) )
1923 cpus_or(pmask, pmask, v->vcpu_dirty_cpumask);
1926 return pmask;
1929 int do_mmuext_op(
1930 XEN_GUEST_HANDLE(mmuext_op_t) uops,
1931 unsigned int count,
1932 XEN_GUEST_HANDLE(uint) pdone,
1933 unsigned int foreigndom)
1935 struct mmuext_op op;
1936 int rc = 0, i = 0, okay;
1937 unsigned long mfn = 0, gmfn = 0, type;
1938 unsigned int done = 0;
1939 struct page_info *page;
1940 struct vcpu *v = current;
1941 struct domain *d = v->domain;
1943 LOCK_BIGLOCK(d);
1945 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
1947 count &= ~MMU_UPDATE_PREEMPTED;
1948 if ( unlikely(!guest_handle_is_null(pdone)) )
1949 (void)copy_from_guest(&done, pdone, 1);
1952 if ( !set_foreigndom(foreigndom) )
1954 rc = -ESRCH;
1955 goto out;
1958 if ( unlikely(!guest_handle_okay(uops, count)) )
1960 rc = -EFAULT;
1961 goto out;
1964 for ( i = 0; i < count; i++ )
1966 if ( hypercall_preempt_check() )
1968 rc = hypercall_create_continuation(
1969 __HYPERVISOR_mmuext_op, "hihi",
1970 uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
1971 break;
1974 if ( unlikely(__copy_from_guest(&op, uops, 1) != 0) )
1976 MEM_LOG("Bad __copy_from_guest");
1977 rc = -EFAULT;
1978 break;
1981 okay = 1;
1982 gmfn = op.arg1.mfn;
1983 mfn = gmfn_to_mfn(FOREIGNDOM, gmfn);
1984 page = mfn_to_page(mfn);
1986 switch ( op.cmd )
1988 case MMUEXT_PIN_L1_TABLE:
1989 type = PGT_l1_page_table;
1990 goto pin_page;
1992 case MMUEXT_PIN_L2_TABLE:
1993 type = PGT_l2_page_table;
1994 goto pin_page;
1996 case MMUEXT_PIN_L3_TABLE:
1997 type = PGT_l3_page_table;
1998 goto pin_page;
2000 case MMUEXT_PIN_L4_TABLE:
2001 if ( IS_COMPAT(FOREIGNDOM) )
2002 break;
2003 type = PGT_l4_page_table;
2005 pin_page:
2006 /* Ignore pinning of invalid paging levels. */
2007 if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) )
2008 break;
2010 if ( shadow_mode_refcounts(FOREIGNDOM) )
2011 break;
2013 okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
2014 if ( unlikely(!okay) )
2016 MEM_LOG("Error while pinning mfn %lx", mfn);
2017 break;
2020 if ( unlikely(test_and_set_bit(_PGT_pinned,
2021 &page->u.inuse.type_info)) )
2023 MEM_LOG("Mfn %lx already pinned", mfn);
2024 put_page_and_type(page);
2025 okay = 0;
2026 break;
2029 /* A page is dirtied when its pin status is set. */
2030 mark_dirty(d, mfn);
2032 break;
2034 case MMUEXT_UNPIN_TABLE:
2035 if ( shadow_mode_refcounts(d) )
2036 break;
2038 if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) )
2040 MEM_LOG("Mfn %lx bad domain (dom=%p)",
2041 mfn, page_get_owner(page));
2043 else if ( likely(test_and_clear_bit(_PGT_pinned,
2044 &page->u.inuse.type_info)) )
2046 put_page_and_type(page);
2047 put_page(page);
2048 /* A page is dirtied when its pin status is cleared. */
2049 mark_dirty(d, mfn);
2051 else
2053 okay = 0;
2054 put_page(page);
2055 MEM_LOG("Mfn %lx not pinned", mfn);
2057 break;
2059 case MMUEXT_NEW_BASEPTR:
2060 okay = new_guest_cr3(mfn);
2061 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB;
2062 break;
2064 #ifdef __x86_64__
2065 case MMUEXT_NEW_USER_BASEPTR:
2066 if ( IS_COMPAT(FOREIGNDOM) )
2068 okay = 0;
2069 break;
2071 if (likely(mfn != 0))
2073 if ( shadow_mode_refcounts(d) )
2074 okay = get_page_from_pagenr(mfn, d);
2075 else
2076 okay = get_page_and_type_from_pagenr(
2077 mfn, PGT_root_page_table, d);
2079 if ( unlikely(!okay) )
2081 MEM_LOG("Error while installing new mfn %lx", mfn);
2083 else
2085 unsigned long old_mfn =
2086 pagetable_get_pfn(v->arch.guest_table_user);
2087 v->arch.guest_table_user = pagetable_from_pfn(mfn);
2088 if ( old_mfn != 0 )
2090 if ( shadow_mode_refcounts(d) )
2091 put_page(mfn_to_page(old_mfn));
2092 else
2093 put_page_and_type(mfn_to_page(old_mfn));
2096 break;
2097 #endif
2099 case MMUEXT_TLB_FLUSH_LOCAL:
2100 this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
2101 break;
2103 case MMUEXT_INVLPG_LOCAL:
2104 if ( !shadow_mode_enabled(d)
2105 || shadow_invlpg(v, op.arg1.linear_addr) != 0 )
2106 local_flush_tlb_one(op.arg1.linear_addr);
2107 break;
2109 case MMUEXT_TLB_FLUSH_MULTI:
2110 case MMUEXT_INVLPG_MULTI:
2112 unsigned long vmask;
2113 cpumask_t pmask;
2114 if ( unlikely(copy_from_guest(&vmask, op.arg2.vcpumask, 1)) )
2116 okay = 0;
2117 break;
2119 pmask = vcpumask_to_pcpumask(d, vmask);
2120 if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
2121 flush_tlb_mask(pmask);
2122 else
2123 flush_tlb_one_mask(pmask, op.arg1.linear_addr);
2124 break;
2127 case MMUEXT_TLB_FLUSH_ALL:
2128 flush_tlb_mask(d->domain_dirty_cpumask);
2129 break;
2131 case MMUEXT_INVLPG_ALL:
2132 flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr);
2133 break;
2135 case MMUEXT_FLUSH_CACHE:
2136 if ( unlikely(!cache_flush_permitted(d)) )
2138 MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
2139 okay = 0;
2141 else
2143 wbinvd();
2145 break;
2147 case MMUEXT_SET_LDT:
2149 unsigned long ptr = op.arg1.linear_addr;
2150 unsigned long ents = op.arg2.nr_ents;
2152 if ( shadow_mode_external(d) )
2154 MEM_LOG("ignoring SET_LDT hypercall from external "
2155 "domain %u", d->domain_id);
2156 okay = 0;
2158 else if ( ((ptr & (PAGE_SIZE-1)) != 0) ||
2159 (ents > 8192) ||
2160 !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) )
2162 okay = 0;
2163 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
2165 else if ( (v->arch.guest_context.ldt_ents != ents) ||
2166 (v->arch.guest_context.ldt_base != ptr) )
2168 invalidate_shadow_ldt(v);
2169 v->arch.guest_context.ldt_base = ptr;
2170 v->arch.guest_context.ldt_ents = ents;
2171 load_LDT(v);
2172 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT;
2173 if ( ents != 0 )
2174 this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT;
2176 break;
2179 default:
2180 MEM_LOG("Invalid extended pt command 0x%x", op.cmd);
2181 rc = -ENOSYS;
2182 okay = 0;
2183 break;
2186 if ( unlikely(!okay) )
2188 rc = rc ? rc : -EINVAL;
2189 break;
2192 guest_handle_add_offset(uops, 1);
2195 out:
2196 process_deferred_ops();
2198 /* Add incremental work we have done to the @done output parameter. */
2199 if ( unlikely(!guest_handle_is_null(pdone)) )
2201 done += i;
2202 copy_to_guest(pdone, &done, 1);
2205 UNLOCK_BIGLOCK(d);
2206 return rc;
2209 int do_mmu_update(
2210 XEN_GUEST_HANDLE(mmu_update_t) ureqs,
2211 unsigned int count,
2212 XEN_GUEST_HANDLE(uint) pdone,
2213 unsigned int foreigndom)
2215 struct mmu_update req;
2216 void *va;
2217 unsigned long gpfn, gmfn, mfn;
2218 struct page_info *page;
2219 int rc = 0, okay = 1, i = 0;
2220 unsigned int cmd, done = 0;
2221 struct vcpu *v = current;
2222 struct domain *d = v->domain;
2223 unsigned long type_info;
2224 struct domain_mmap_cache mapcache, sh_mapcache;
2226 LOCK_BIGLOCK(d);
2228 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
2230 count &= ~MMU_UPDATE_PREEMPTED;
2231 if ( unlikely(!guest_handle_is_null(pdone)) )
2232 (void)copy_from_guest(&done, pdone, 1);
2235 domain_mmap_cache_init(&mapcache);
2236 domain_mmap_cache_init(&sh_mapcache);
2238 if ( !set_foreigndom(foreigndom) )
2240 rc = -ESRCH;
2241 goto out;
2244 perfc_incrc(calls_to_mmu_update);
2245 perfc_addc(num_page_updates, count);
2247 if ( unlikely(!guest_handle_okay(ureqs, count)) )
2249 rc = -EFAULT;
2250 goto out;
2253 for ( i = 0; i < count; i++ )
2255 if ( hypercall_preempt_check() )
2257 rc = hypercall_create_continuation(
2258 __HYPERVISOR_mmu_update, "hihi",
2259 ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
2260 break;
2263 if ( unlikely(__copy_from_guest(&req, ureqs, 1) != 0) )
2265 MEM_LOG("Bad __copy_from_guest");
2266 rc = -EFAULT;
2267 break;
2270 cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
2271 okay = 0;
2273 switch ( cmd )
2275 /*
2276 * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
2277 */
2278 case MMU_NORMAL_PT_UPDATE:
2280 gmfn = req.ptr >> PAGE_SHIFT;
2281 mfn = gmfn_to_mfn(d, gmfn);
2283 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2285 MEM_LOG("Could not get page for normal update");
2286 break;
2289 va = map_domain_page_with_cache(mfn, &mapcache);
2290 va = (void *)((unsigned long)va +
2291 (unsigned long)(req.ptr & ~PAGE_MASK));
2292 page = mfn_to_page(mfn);
2294 switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
2296 case PGT_l1_page_table:
2297 case PGT_l2_page_table:
2298 case PGT_l3_page_table:
2299 case PGT_l4_page_table:
2301 if ( shadow_mode_refcounts(d) )
2303 MEM_LOG("mmu update on shadow-refcounted domain!");
2304 break;
2307 if ( unlikely(!get_page_type(
2308 page, type_info & (PGT_type_mask|PGT_pae_xen_l2))) )
2309 goto not_a_pt;
2311 switch ( type_info & PGT_type_mask )
2313 case PGT_l1_page_table:
2315 l1_pgentry_t l1e = l1e_from_intpte(req.val);
2316 okay = mod_l1_entry(va, l1e, mfn);
2318 break;
2319 case PGT_l2_page_table:
2321 l2_pgentry_t l2e = l2e_from_intpte(req.val);
2322 okay = mod_l2_entry(va, l2e, mfn, type_info);
2324 break;
2325 #if CONFIG_PAGING_LEVELS >= 3
2326 case PGT_l3_page_table:
2328 l3_pgentry_t l3e = l3e_from_intpte(req.val);
2329 okay = mod_l3_entry(va, l3e, mfn);
2331 break;
2332 #endif
2333 #if CONFIG_PAGING_LEVELS >= 4
2334 case PGT_l4_page_table:
2335 if ( !IS_COMPAT(FOREIGNDOM) )
2337 l4_pgentry_t l4e = l4e_from_intpte(req.val);
2338 okay = mod_l4_entry(va, l4e, mfn);
2340 break;
2341 #endif
2344 put_page_type(page);
2346 break;
2348 default:
2349 not_a_pt:
2351 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
2352 break;
2354 if ( unlikely(shadow_mode_enabled(d)) )
2355 okay = shadow_write_guest_entry(v, va, req.val, _mfn(mfn));
2356 else
2358 *(intpte_t *)va = req.val;
2359 okay = 1;
2362 put_page_type(page);
2364 break;
2367 unmap_domain_page_with_cache(va, &mapcache);
2369 put_page(page);
2370 break;
2372 case MMU_MACHPHYS_UPDATE:
2374 mfn = req.ptr >> PAGE_SHIFT;
2375 gpfn = req.val;
2377 if ( unlikely(!get_page_from_pagenr(mfn, FOREIGNDOM)) )
2379 MEM_LOG("Could not get page for mach->phys update");
2380 break;
2383 if ( unlikely(shadow_mode_translate(FOREIGNDOM)) )
2385 MEM_LOG("Mach-phys update on shadow-translate guest");
2386 break;
2389 set_gpfn_from_mfn(mfn, gpfn);
2390 okay = 1;
2392 mark_dirty(FOREIGNDOM, mfn);
2394 put_page(mfn_to_page(mfn));
2395 break;
2397 default:
2398 MEM_LOG("Invalid page update command %x", cmd);
2399 rc = -ENOSYS;
2400 okay = 0;
2401 break;
2404 if ( unlikely(!okay) )
2406 rc = rc ? rc : -EINVAL;
2407 break;
2410 guest_handle_add_offset(ureqs, 1);
2413 out:
2414 domain_mmap_cache_destroy(&mapcache);
2415 domain_mmap_cache_destroy(&sh_mapcache);
2417 process_deferred_ops();
2419 /* Add incremental work we have done to the @done output parameter. */
2420 if ( unlikely(!guest_handle_is_null(pdone)) )
2422 done += i;
2423 copy_to_guest(pdone, &done, 1);
2426 UNLOCK_BIGLOCK(d);
2427 return rc;
2431 static int create_grant_pte_mapping(
2432 uint64_t pte_addr, l1_pgentry_t nl1e, struct vcpu *v)
2434 int rc = GNTST_okay;
2435 void *va;
2436 unsigned long gmfn, mfn;
2437 struct page_info *page;
2438 u32 type;
2439 l1_pgentry_t ol1e;
2440 struct domain *d = v->domain;
2442 ASSERT(spin_is_locked(&d->big_lock));
2444 adjust_guest_l1e(nl1e, d);
2446 gmfn = pte_addr >> PAGE_SHIFT;
2447 mfn = gmfn_to_mfn(d, gmfn);
2449 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2451 MEM_LOG("Could not get page for normal update");
2452 return GNTST_general_error;
2455 va = map_domain_page(mfn);
2456 va = (void *)((unsigned long)va + ((unsigned long)pte_addr & ~PAGE_MASK));
2457 page = mfn_to_page(mfn);
2459 type = page->u.inuse.type_info & PGT_type_mask;
2460 if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
2462 MEM_LOG("Grant map attempted to update a non-L1 page");
2463 rc = GNTST_general_error;
2464 goto failed;
2467 ol1e = *(l1_pgentry_t *)va;
2468 if ( !UPDATE_ENTRY(l1, va, ol1e, nl1e, mfn, v) )
2470 put_page_type(page);
2471 rc = GNTST_general_error;
2472 goto failed;
2475 if ( !shadow_mode_refcounts(d) )
2476 put_page_from_l1e(ol1e, d);
2478 put_page_type(page);
2480 failed:
2481 unmap_domain_page(va);
2482 put_page(page);
2484 return rc;
2487 static int destroy_grant_pte_mapping(
2488 uint64_t addr, unsigned long frame, struct domain *d)
2490 int rc = GNTST_okay;
2491 void *va;
2492 unsigned long gmfn, mfn;
2493 struct page_info *page;
2494 u32 type;
2495 l1_pgentry_t ol1e;
2497 gmfn = addr >> PAGE_SHIFT;
2498 mfn = gmfn_to_mfn(d, gmfn);
2500 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2502 MEM_LOG("Could not get page for normal update");
2503 return GNTST_general_error;
2506 va = map_domain_page(mfn);
2507 va = (void *)((unsigned long)va + ((unsigned long)addr & ~PAGE_MASK));
2508 page = mfn_to_page(mfn);
2510 type = page->u.inuse.type_info & PGT_type_mask;
2511 if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
2513 MEM_LOG("Grant map attempted to update a non-L1 page");
2514 rc = GNTST_general_error;
2515 goto failed;
2518 if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) )
2520 put_page_type(page);
2521 rc = GNTST_general_error;
2522 goto failed;
2525 /* Check that the virtual address supplied is actually mapped to frame. */
2526 if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
2528 MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame %lx",
2529 (unsigned long)l1e_get_intpte(ol1e), addr, frame);
2530 put_page_type(page);
2531 rc = GNTST_general_error;
2532 goto failed;
2535 /* Delete pagetable entry. */
2536 if ( unlikely(!UPDATE_ENTRY(l1,
2537 (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn,
2538 d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) )
2540 MEM_LOG("Cannot delete PTE entry at %p", va);
2541 put_page_type(page);
2542 rc = GNTST_general_error;
2543 goto failed;
2546 put_page_type(page);
2548 failed:
2549 unmap_domain_page(va);
2550 put_page(page);
2551 return rc;
2555 static int create_grant_va_mapping(
2556 unsigned long va, l1_pgentry_t nl1e, struct vcpu *v)
2558 l1_pgentry_t *pl1e, ol1e;
2559 struct domain *d = v->domain;
2560 unsigned long gl1mfn;
2561 int okay;
2563 ASSERT(spin_is_locked(&d->big_lock));
2565 adjust_guest_l1e(nl1e, d);
2567 pl1e = guest_map_l1e(v, va, &gl1mfn);
2568 if ( !pl1e )
2570 MEM_LOG("Could not find L1 PTE for address %lx", va);
2571 return GNTST_general_error;
2573 ol1e = *pl1e;
2574 okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v);
2575 guest_unmap_l1e(v, pl1e);
2576 pl1e = NULL;
2578 if ( !okay )
2579 return GNTST_general_error;
2581 if ( !shadow_mode_refcounts(d) )
2582 put_page_from_l1e(ol1e, d);
2584 return GNTST_okay;
2587 static int destroy_grant_va_mapping(
2588 unsigned long addr, unsigned long frame, struct vcpu *v)
2590 l1_pgentry_t *pl1e, ol1e;
2591 unsigned long gl1mfn;
2592 int rc = 0;
2594 pl1e = guest_map_l1e(v, addr, &gl1mfn);
2595 if ( !pl1e )
2597 MEM_LOG("Could not find L1 PTE for address %lx", addr);
2598 return GNTST_general_error;
2600 ol1e = *pl1e;
2602 /* Check that the virtual address supplied is actually mapped to frame. */
2603 if ( unlikely(l1e_get_pfn(ol1e) != frame) )
2605 MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
2606 l1e_get_pfn(ol1e), addr, frame);
2607 rc = GNTST_general_error;
2608 goto out;
2611 /* Delete pagetable entry. */
2612 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
2614 MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
2615 rc = GNTST_general_error;
2616 goto out;
2619 out:
2620 guest_unmap_l1e(v, pl1e);
2621 return rc;
2624 int create_grant_host_mapping(
2625 uint64_t addr, unsigned long frame, unsigned int flags)
2627 l1_pgentry_t pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS);
2629 if ( (flags & GNTMAP_application_map) )
2630 l1e_add_flags(pte,_PAGE_USER);
2631 if ( !(flags & GNTMAP_readonly) )
2632 l1e_add_flags(pte,_PAGE_RW);
2634 if ( flags & GNTMAP_contains_pte )
2635 return create_grant_pte_mapping(addr, pte, current);
2636 return create_grant_va_mapping(addr, pte, current);
2639 int destroy_grant_host_mapping(
2640 uint64_t addr, unsigned long frame, unsigned int flags)
2642 if ( flags & GNTMAP_contains_pte )
2643 return destroy_grant_pte_mapping(addr, frame, current->domain);
2644 return destroy_grant_va_mapping(addr, frame, current);
2647 int steal_page(
2648 struct domain *d, struct page_info *page, unsigned int memflags)
2650 u32 _d, _nd, x, y;
2652 spin_lock(&d->page_alloc_lock);
2654 /*
2655 * The tricky bit: atomically release ownership while there is just one
2656 * benign reference to the page (PGC_allocated). If that reference
2657 * disappears then the deallocation routine will safely spin.
2658 */
2659 _d = pickle_domptr(d);
2660 _nd = page->u.inuse._domain;
2661 y = page->count_info;
2662 do {
2663 x = y;
2664 if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
2665 (1 | PGC_allocated)) || unlikely(_nd != _d)) {
2666 MEM_LOG("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p,"
2667 " caf=%08x, taf=%" PRtype_info "\n",
2668 (void *) page_to_mfn(page),
2669 d, d->domain_id, unpickle_domptr(_nd), x,
2670 page->u.inuse.type_info);
2671 spin_unlock(&d->page_alloc_lock);
2672 return -1;
2674 __asm__ __volatile__(
2675 LOCK_PREFIX "cmpxchg8b %2"
2676 : "=d" (_nd), "=a" (y),
2677 "=m" (*(volatile u64 *)(&page->count_info))
2678 : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
2679 } while (unlikely(_nd != _d) || unlikely(y != x));
2681 /*
2682 * Unlink from 'd'. At least one reference remains (now anonymous), so
2683 * noone else is spinning to try to delete this page from 'd'.
2684 */
2685 if ( !(memflags & MEMF_no_refcount) )
2686 d->tot_pages--;
2687 list_del(&page->list);
2689 spin_unlock(&d->page_alloc_lock);
2691 return 0;
2694 int do_update_va_mapping(unsigned long va, u64 val64,
2695 unsigned long flags)
2697 l1_pgentry_t val = l1e_from_intpte(val64);
2698 struct vcpu *v = current;
2699 struct domain *d = v->domain;
2700 l1_pgentry_t *pl1e;
2701 unsigned long vmask, bmap_ptr, gl1mfn;
2702 cpumask_t pmask;
2703 int rc = 0;
2705 perfc_incrc(calls_to_update_va);
2707 if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) )
2708 return -EINVAL;
2710 LOCK_BIGLOCK(d);
2712 pl1e = guest_map_l1e(v, va, &gl1mfn);
2714 if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn)) )
2715 rc = -EINVAL;
2717 if ( pl1e )
2718 guest_unmap_l1e(v, pl1e);
2719 pl1e = NULL;
2721 switch ( flags & UVMF_FLUSHTYPE_MASK )
2723 case UVMF_TLB_FLUSH:
2724 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
2726 case UVMF_LOCAL:
2727 local_flush_tlb();
2728 break;
2729 case UVMF_ALL:
2730 flush_tlb_mask(d->domain_dirty_cpumask);
2731 break;
2732 default:
2733 if ( unlikely(!IS_COMPAT(d) ?
2734 get_user(vmask, (unsigned long *)bmap_ptr) :
2735 get_user(vmask, (unsigned int *)bmap_ptr)) )
2736 rc = -EFAULT;
2737 pmask = vcpumask_to_pcpumask(d, vmask);
2738 flush_tlb_mask(pmask);
2739 break;
2741 break;
2743 case UVMF_INVLPG:
2744 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
2746 case UVMF_LOCAL:
2747 if ( !shadow_mode_enabled(d)
2748 || (shadow_invlpg(current, va) != 0) )
2749 local_flush_tlb_one(va);
2750 break;
2751 case UVMF_ALL:
2752 flush_tlb_one_mask(d->domain_dirty_cpumask, va);
2753 break;
2754 default:
2755 if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
2756 rc = -EFAULT;
2757 pmask = vcpumask_to_pcpumask(d, vmask);
2758 flush_tlb_one_mask(pmask, va);
2759 break;
2761 break;
2764 process_deferred_ops();
2766 UNLOCK_BIGLOCK(d);
2768 return rc;
2771 int do_update_va_mapping_otherdomain(unsigned long va, u64 val64,
2772 unsigned long flags,
2773 domid_t domid)
2775 int rc;
2777 if ( unlikely(!IS_PRIV(current->domain)) )
2778 return -EPERM;
2780 if ( !set_foreigndom(domid) )
2781 return -ESRCH;
2783 rc = do_update_va_mapping(va, val64, flags);
2785 return rc;
2790 /*************************
2791 * Descriptor Tables
2792 */
2794 void destroy_gdt(struct vcpu *v)
2796 int i;
2797 unsigned long pfn;
2799 v->arch.guest_context.gdt_ents = 0;
2800 for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ )
2802 if ( (pfn = l1e_get_pfn(v->arch.perdomain_ptes[i])) != 0 )
2803 put_page_and_type(mfn_to_page(pfn));
2804 l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
2805 v->arch.guest_context.gdt_frames[i] = 0;
2810 long set_gdt(struct vcpu *v,
2811 unsigned long *frames,
2812 unsigned int entries)
2814 struct domain *d = v->domain;
2815 /* NB. There are 512 8-byte entries per GDT page. */
2816 int i, nr_pages = (entries + 511) / 512;
2817 unsigned long mfn;
2819 if ( entries > FIRST_RESERVED_GDT_ENTRY )
2820 return -EINVAL;
2822 /* Check the pages in the new GDT. */
2823 for ( i = 0; i < nr_pages; i++ ) {
2824 mfn = frames[i] = gmfn_to_mfn(d, frames[i]);
2825 if ( !mfn_valid(mfn) ||
2826 !get_page_and_type(mfn_to_page(mfn), d, PGT_gdt_page) )
2827 goto fail;
2830 /* Tear down the old GDT. */
2831 destroy_gdt(v);
2833 /* Install the new GDT. */
2834 v->arch.guest_context.gdt_ents = entries;
2835 for ( i = 0; i < nr_pages; i++ )
2837 v->arch.guest_context.gdt_frames[i] = frames[i];
2838 l1e_write(&v->arch.perdomain_ptes[i],
2839 l1e_from_pfn(frames[i], __PAGE_HYPERVISOR));
2842 return 0;
2844 fail:
2845 while ( i-- > 0 )
2846 put_page_and_type(mfn_to_page(frames[i]));
2847 return -EINVAL;
2851 long do_set_gdt(XEN_GUEST_HANDLE(ulong) frame_list, unsigned int entries)
2853 int nr_pages = (entries + 511) / 512;
2854 unsigned long frames[16];
2855 long ret;
2857 /* Rechecked in set_gdt, but ensures a sane limit for copy_from_user(). */
2858 if ( entries > FIRST_RESERVED_GDT_ENTRY )
2859 return -EINVAL;
2861 if ( copy_from_guest((unsigned long *)frames, frame_list, nr_pages) )
2862 return -EFAULT;
2864 LOCK_BIGLOCK(current->domain);
2866 if ( (ret = set_gdt(current, frames, entries)) == 0 )
2867 local_flush_tlb();
2869 UNLOCK_BIGLOCK(current->domain);
2871 return ret;
2875 long do_update_descriptor(u64 pa, u64 desc)
2877 struct domain *dom = current->domain;
2878 unsigned long gmfn = pa >> PAGE_SHIFT;
2879 unsigned long mfn;
2880 unsigned int offset;
2881 struct desc_struct *gdt_pent, d;
2882 struct page_info *page;
2883 long ret = -EINVAL;
2885 offset = ((unsigned int)pa & ~PAGE_MASK) / sizeof(struct desc_struct);
2887 *(u64 *)&d = desc;
2889 LOCK_BIGLOCK(dom);
2891 mfn = gmfn_to_mfn(dom, gmfn);
2892 if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
2893 !mfn_valid(mfn) ||
2894 !check_descriptor(dom, &d) )
2896 UNLOCK_BIGLOCK(dom);
2897 return -EINVAL;
2900 page = mfn_to_page(mfn);
2901 if ( unlikely(!get_page(page, dom)) )
2903 UNLOCK_BIGLOCK(dom);
2904 return -EINVAL;
2907 /* Check if the given frame is in use in an unsafe context. */
2908 switch ( page->u.inuse.type_info & PGT_type_mask )
2910 case PGT_gdt_page:
2911 if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
2912 goto out;
2913 break;
2914 case PGT_ldt_page:
2915 if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
2916 goto out;
2917 break;
2918 default:
2919 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
2920 goto out;
2921 break;
2924 mark_dirty(dom, mfn);
2926 /* All is good so make the update. */
2927 gdt_pent = map_domain_page(mfn);
2928 memcpy(&gdt_pent[offset], &d, 8);
2929 unmap_domain_page(gdt_pent);
2931 put_page_type(page);
2933 ret = 0; /* success */
2935 out:
2936 put_page(page);
2938 UNLOCK_BIGLOCK(dom);
2940 return ret;
2943 typedef struct e820entry e820entry_t;
2944 DEFINE_XEN_GUEST_HANDLE(e820entry_t);
2946 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
2948 switch ( op )
2950 case XENMEM_add_to_physmap:
2952 struct xen_add_to_physmap xatp;
2953 unsigned long prev_mfn, mfn = 0, gpfn;
2954 struct domain *d;
2956 if ( copy_from_guest(&xatp, arg, 1) )
2957 return -EFAULT;
2959 if ( xatp.domid == DOMID_SELF )
2961 d = current->domain;
2962 get_knownalive_domain(d);
2964 else if ( !IS_PRIV(current->domain) )
2965 return -EPERM;
2966 else if ( (d = find_domain_by_id(xatp.domid)) == NULL )
2967 return -ESRCH;
2969 switch ( xatp.space )
2971 case XENMAPSPACE_shared_info:
2972 if ( xatp.idx == 0 )
2973 mfn = virt_to_mfn(d->shared_info);
2974 break;
2975 case XENMAPSPACE_grant_table:
2976 if ( xatp.idx < NR_GRANT_FRAMES )
2977 mfn = virt_to_mfn(d->grant_table->shared) + xatp.idx;
2978 break;
2979 default:
2980 break;
2983 if ( !shadow_mode_translate(d) || (mfn == 0) )
2985 put_domain(d);
2986 return -EINVAL;
2989 LOCK_BIGLOCK(d);
2991 /* Remove previously mapped page if it was present. */
2992 prev_mfn = gmfn_to_mfn(d, xatp.gpfn);
2993 if ( mfn_valid(prev_mfn) )
2995 if ( IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn)) )
2996 /* Xen heap frames are simply unhooked from this phys slot. */
2997 guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
2998 else
2999 /* Normal domain memory is freed, to avoid leaking memory. */
3000 guest_remove_page(d, xatp.gpfn);
3003 /* Unmap from old location, if any. */
3004 gpfn = get_gpfn_from_mfn(mfn);
3005 if ( gpfn != INVALID_M2P_ENTRY )
3006 guest_physmap_remove_page(d, gpfn, mfn);
3008 /* Map at new location. */
3009 guest_physmap_add_page(d, xatp.gpfn, mfn);
3011 UNLOCK_BIGLOCK(d);
3013 put_domain(d);
3015 break;
3018 case XENMEM_set_memory_map:
3020 struct xen_foreign_memory_map fmap;
3021 struct domain *d;
3022 int rc;
3024 if ( copy_from_guest(&fmap, arg, 1) )
3025 return -EFAULT;
3027 if ( fmap.map.nr_entries > ARRAY_SIZE(d->arch.e820) )
3028 return -EINVAL;
3030 if ( fmap.domid == DOMID_SELF )
3032 d = current->domain;
3033 get_knownalive_domain(d);
3035 else if ( !IS_PRIV(current->domain) )
3036 return -EPERM;
3037 else if ( (d = find_domain_by_id(fmap.domid)) == NULL )
3038 return -ESRCH;
3040 rc = copy_from_guest(&d->arch.e820[0], fmap.map.buffer,
3041 fmap.map.nr_entries) ? -EFAULT : 0;
3042 d->arch.nr_e820 = fmap.map.nr_entries;
3044 put_domain(d);
3045 return rc;
3048 case XENMEM_memory_map:
3050 struct xen_memory_map map;
3051 struct domain *d = current->domain;
3053 /* Backwards compatibility. */
3054 if ( d->arch.nr_e820 == 0 )
3055 return -ENOSYS;
3057 if ( copy_from_guest(&map, arg, 1) )
3058 return -EFAULT;
3060 map.nr_entries = min(map.nr_entries, d->arch.nr_e820);
3061 if ( copy_to_guest(map.buffer, &d->arch.e820[0], map.nr_entries) ||
3062 copy_to_guest(arg, &map, 1) )
3063 return -EFAULT;
3065 return 0;
3068 case XENMEM_machine_memory_map:
3070 struct xen_memory_map memmap;
3071 XEN_GUEST_HANDLE(e820entry_t) buffer;
3072 int count;
3074 if ( !IS_PRIV(current->domain) )
3075 return -EINVAL;
3077 if ( copy_from_guest(&memmap, arg, 1) )
3078 return -EFAULT;
3079 if ( memmap.nr_entries < e820.nr_map + 1 )
3080 return -EINVAL;
3082 buffer = guest_handle_cast(memmap.buffer, e820entry_t);
3084 count = min((unsigned int)e820.nr_map, memmap.nr_entries);
3085 if ( copy_to_guest(buffer, &e820.map[0], count) < 0 )
3086 return -EFAULT;
3088 memmap.nr_entries = count;
3090 if ( copy_to_guest(arg, &memmap, 1) )
3091 return -EFAULT;
3093 return 0;
3096 case XENMEM_machphys_mapping:
3098 struct xen_machphys_mapping mapping = {
3099 .v_start = MACH2PHYS_VIRT_START,
3100 .v_end = MACH2PHYS_VIRT_END,
3101 .max_mfn = MACH2PHYS_NR_ENTRIES - 1
3102 };
3104 if ( copy_to_guest(arg, &mapping, 1) )
3105 return -EFAULT;
3107 return 0;
3110 default:
3111 return subarch_memory_op(op, arg);
3114 return 0;
3118 /*************************
3119 * Writable Pagetables
3120 */
3122 struct ptwr_emulate_ctxt {
3123 struct x86_emulate_ctxt ctxt;
3124 unsigned long cr2;
3125 l1_pgentry_t pte;
3126 };
3128 static int ptwr_emulated_read(
3129 enum x86_segment seg,
3130 unsigned long offset,
3131 unsigned long *val,
3132 unsigned int bytes,
3133 struct x86_emulate_ctxt *ctxt)
3135 unsigned int rc;
3136 unsigned long addr = offset;
3138 *val = 0;
3139 if ( (rc = copy_from_user((void *)val, (void *)addr, bytes)) != 0 )
3141 propagate_page_fault(addr + bytes - rc, 0); /* read fault */
3142 return X86EMUL_PROPAGATE_FAULT;
3145 return X86EMUL_CONTINUE;
3148 static int ptwr_emulated_update(
3149 unsigned long addr,
3150 paddr_t old,
3151 paddr_t val,
3152 unsigned int bytes,
3153 unsigned int do_cmpxchg,
3154 struct ptwr_emulate_ctxt *ptwr_ctxt)
3156 unsigned long mfn;
3157 struct page_info *page;
3158 l1_pgentry_t pte, ol1e, nl1e, *pl1e;
3159 struct vcpu *v = current;
3160 struct domain *d = v->domain;
3162 /* Only allow naturally-aligned stores within the original %cr2 page. */
3163 if ( unlikely(((addr^ptwr_ctxt->cr2) & PAGE_MASK) || (addr & (bytes-1))) )
3165 MEM_LOG("Bad ptwr access (cr2=%lx, addr=%lx, bytes=%u)",
3166 ptwr_ctxt->cr2, addr, bytes);
3167 return X86EMUL_UNHANDLEABLE;
3170 /* Turn a sub-word access into a full-word access. */
3171 if ( bytes != sizeof(paddr_t) )
3173 paddr_t full;
3174 unsigned int rc, offset = addr & (sizeof(paddr_t)-1);
3176 /* Align address; read full word. */
3177 addr &= ~(sizeof(paddr_t)-1);
3178 if ( (rc = copy_from_user(&full, (void *)addr, sizeof(paddr_t))) != 0 )
3180 propagate_page_fault(addr+sizeof(paddr_t)-rc, 0); /* read fault */
3181 return X86EMUL_PROPAGATE_FAULT;
3183 /* Mask out bits provided by caller. */
3184 full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
3185 /* Shift the caller value and OR in the missing bits. */
3186 val &= (((paddr_t)1 << (bytes*8)) - 1);
3187 val <<= (offset)*8;
3188 val |= full;
3189 /* Also fill in missing parts of the cmpxchg old value. */
3190 old &= (((paddr_t)1 << (bytes*8)) - 1);
3191 old <<= (offset)*8;
3192 old |= full;
3195 pte = ptwr_ctxt->pte;
3196 mfn = l1e_get_pfn(pte);
3197 page = mfn_to_page(mfn);
3199 /* We are looking only for read-only mappings of p.t. pages. */
3200 ASSERT((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) == _PAGE_PRESENT);
3201 ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_l1_page_table);
3202 ASSERT((page->u.inuse.type_info & PGT_count_mask) != 0);
3203 ASSERT(page_get_owner(page) == d);
3205 /* Check the new PTE. */
3206 nl1e = l1e_from_intpte(val);
3207 if ( unlikely(!get_page_from_l1e(gl1e_to_ml1e(d, nl1e), d)) )
3209 if ( (CONFIG_PAGING_LEVELS == 3 || IS_COMPAT(d)) &&
3210 (bytes == 4) && (addr & 4) && !do_cmpxchg &&
3211 (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
3213 /*
3214 * If this is an upper-half write to a PAE PTE then we assume that
3215 * the guest has simply got the two writes the wrong way round. We
3216 * zap the PRESENT bit on the assumption that the bottom half will
3217 * be written immediately after we return to the guest.
3218 */
3219 MEM_LOG("ptwr_emulate: fixing up invalid PAE PTE %"PRIpte,
3220 l1e_get_intpte(nl1e));
3221 l1e_remove_flags(nl1e, _PAGE_PRESENT);
3223 else
3225 MEM_LOG("ptwr_emulate: could not get_page_from_l1e()");
3226 return X86EMUL_UNHANDLEABLE;
3230 adjust_guest_l1e(nl1e, d);
3232 /* Checked successfully: do the update (write or cmpxchg). */
3233 pl1e = map_domain_page(page_to_mfn(page));
3234 pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
3235 if ( do_cmpxchg )
3237 int okay;
3238 ol1e = l1e_from_intpte(old);
3240 if ( shadow_mode_enabled(d) )
3242 intpte_t t = old;
3243 okay = shadow_cmpxchg_guest_entry(v, (intpte_t *) pl1e,
3244 &t, val, _mfn(mfn));
3245 okay = (okay && t == old);
3247 else
3248 okay = (cmpxchg((intpte_t *)pl1e, old, val) == old);
3250 if ( !okay )
3252 unmap_domain_page(pl1e);
3253 put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d);
3254 return X86EMUL_CMPXCHG_FAILED;
3257 else
3259 ol1e = *pl1e;
3260 if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, page_to_mfn(page), v) )
3261 BUG();
3264 unmap_domain_page(pl1e);
3266 /* Finally, drop the old PTE. */
3267 put_page_from_l1e(gl1e_to_ml1e(d, ol1e), d);
3269 return X86EMUL_CONTINUE;
3272 static int ptwr_emulated_write(
3273 enum x86_segment seg,
3274 unsigned long offset,
3275 unsigned long val,
3276 unsigned int bytes,
3277 struct x86_emulate_ctxt *ctxt)
3279 return ptwr_emulated_update(
3280 offset, 0, val, bytes, 0,
3281 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3284 static int ptwr_emulated_cmpxchg(
3285 enum x86_segment seg,
3286 unsigned long offset,
3287 unsigned long old,
3288 unsigned long new,
3289 unsigned int bytes,
3290 struct x86_emulate_ctxt *ctxt)
3292 return ptwr_emulated_update(
3293 offset, old, new, bytes, 1,
3294 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3297 static int ptwr_emulated_cmpxchg8b(
3298 enum x86_segment seg,
3299 unsigned long offset,
3300 unsigned long old,
3301 unsigned long old_hi,
3302 unsigned long new,
3303 unsigned long new_hi,
3304 struct x86_emulate_ctxt *ctxt)
3306 if ( CONFIG_PAGING_LEVELS == 2 )
3307 return X86EMUL_UNHANDLEABLE;
3308 return ptwr_emulated_update(
3309 offset, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1,
3310 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3313 static struct x86_emulate_ops ptwr_emulate_ops = {
3314 .read = ptwr_emulated_read,
3315 .insn_fetch = ptwr_emulated_read,
3316 .write = ptwr_emulated_write,
3317 .cmpxchg = ptwr_emulated_cmpxchg,
3318 .cmpxchg8b = ptwr_emulated_cmpxchg8b
3319 };
3321 /* Write page fault handler: check if guest is trying to modify a PTE. */
3322 int ptwr_do_page_fault(struct vcpu *v, unsigned long addr,
3323 struct cpu_user_regs *regs)
3325 struct domain *d = v->domain;
3326 struct page_info *page;
3327 l1_pgentry_t pte;
3328 struct ptwr_emulate_ctxt ptwr_ctxt;
3330 LOCK_BIGLOCK(d);
3332 /*
3333 * Attempt to read the PTE that maps the VA being accessed. By checking for
3334 * PDE validity in the L2 we avoid many expensive fixups in __get_user().
3335 */
3336 guest_get_eff_l1e(v, addr, &pte);
3337 if ( !(l1e_get_flags(pte) & _PAGE_PRESENT) )
3338 goto bail;
3339 page = l1e_get_page(pte);
3341 /* We are looking only for read-only mappings of p.t. pages. */
3342 if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
3343 ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
3344 ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
3345 (page_get_owner(page) != d) )
3346 goto bail;
3348 ptwr_ctxt.ctxt.regs = regs;
3349 ptwr_ctxt.ctxt.addr_size = ptwr_ctxt.ctxt.sp_size =
3350 IS_COMPAT(d) ? 32 : BITS_PER_LONG;
3351 ptwr_ctxt.cr2 = addr;
3352 ptwr_ctxt.pte = pte;
3353 if ( x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops) )
3354 goto bail;
3356 UNLOCK_BIGLOCK(d);
3357 perfc_incrc(ptwr_emulations);
3358 return EXCRET_fault_fixed;
3360 bail:
3361 UNLOCK_BIGLOCK(d);
3362 return 0;
3365 int map_pages_to_xen(
3366 unsigned long virt,
3367 unsigned long mfn,
3368 unsigned long nr_mfns,
3369 unsigned long flags)
3371 l2_pgentry_t *pl2e, ol2e;
3372 l1_pgentry_t *pl1e, ol1e;
3373 unsigned int i;
3375 unsigned int map_small_pages = !!(flags & MAP_SMALL_PAGES);
3376 flags &= ~MAP_SMALL_PAGES;
3378 while ( nr_mfns != 0 )
3380 pl2e = virt_to_xen_l2e(virt);
3382 if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1<<PAGETABLE_ORDER)-1)) == 0) &&
3383 (nr_mfns >= (1<<PAGETABLE_ORDER)) &&
3384 !map_small_pages )
3386 /* Super-page mapping. */
3387 ol2e = *pl2e;
3388 l2e_write(pl2e, l2e_from_pfn(mfn, flags|_PAGE_PSE));
3390 if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
3392 local_flush_tlb_pge();
3393 if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) )
3394 free_xen_pagetable(l2e_get_page(ol2e));
3397 virt += 1UL << L2_PAGETABLE_SHIFT;
3398 mfn += 1UL << PAGETABLE_ORDER;
3399 nr_mfns -= 1UL << PAGETABLE_ORDER;
3401 else
3403 /* Normal page mapping. */
3404 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
3406 pl1e = page_to_virt(alloc_xen_pagetable());
3407 clear_page(pl1e);
3408 l2e_write(pl2e, l2e_from_page(virt_to_page(pl1e),
3409 __PAGE_HYPERVISOR));
3411 else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
3413 pl1e = page_to_virt(alloc_xen_pagetable());
3414 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
3415 l1e_write(&pl1e[i],
3416 l1e_from_pfn(l2e_get_pfn(*pl2e) + i,
3417 l2e_get_flags(*pl2e) & ~_PAGE_PSE));
3418 l2e_write(pl2e, l2e_from_page(virt_to_page(pl1e),
3419 __PAGE_HYPERVISOR));
3420 local_flush_tlb_pge();
3423 pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
3424 ol1e = *pl1e;
3425 l1e_write(pl1e, l1e_from_pfn(mfn, flags));
3426 if ( (l1e_get_flags(ol1e) & _PAGE_PRESENT) )
3427 local_flush_tlb_one(virt);
3429 virt += 1UL << L1_PAGETABLE_SHIFT;
3430 mfn += 1UL;
3431 nr_mfns -= 1UL;
3435 return 0;
3438 void __set_fixmap(
3439 enum fixed_addresses idx, unsigned long mfn, unsigned long flags)
3441 BUG_ON(idx >= __end_of_fixed_addresses);
3442 map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags);
3445 #ifdef MEMORY_GUARD
3447 void memguard_init(void)
3449 map_pages_to_xen(
3450 PAGE_OFFSET, 0, xenheap_phys_end >> PAGE_SHIFT,
3451 __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
3454 static void __memguard_change_range(void *p, unsigned long l, int guard)
3456 unsigned long _p = (unsigned long)p;
3457 unsigned long _l = (unsigned long)l;
3458 unsigned long flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES;
3460 /* Ensure we are dealing with a page-aligned whole number of pages. */
3461 ASSERT((_p&PAGE_MASK) != 0);
3462 ASSERT((_l&PAGE_MASK) != 0);
3463 ASSERT((_p&~PAGE_MASK) == 0);
3464 ASSERT((_l&~PAGE_MASK) == 0);
3466 if ( guard )
3467 flags &= ~_PAGE_PRESENT;
3469 map_pages_to_xen(
3470 _p, virt_to_maddr(p) >> PAGE_SHIFT, _l >> PAGE_SHIFT, flags);
3473 void memguard_guard_range(void *p, unsigned long l)
3475 __memguard_change_range(p, l, 1);
3478 void memguard_unguard_range(void *p, unsigned long l)
3480 __memguard_change_range(p, l, 0);
3483 #endif
3485 void memguard_guard_stack(void *p)
3487 BUILD_BUG_ON((DEBUG_STACK_SIZE + PAGE_SIZE) > STACK_SIZE);
3488 p = (void *)((unsigned long)p + STACK_SIZE - DEBUG_STACK_SIZE - PAGE_SIZE);
3489 memguard_guard_range(p, PAGE_SIZE);
3492 /*
3493 * Local variables:
3494 * mode: C
3495 * c-set-style: "BSD"
3496 * c-basic-offset: 4
3497 * tab-width: 4
3498 * indent-tabs-mode: nil
3499 * End:
3500 */