ia64/xen-unstable

view xen/arch/x86/mm.c @ 14196:9d36026b1b43

xen: Cleanups and bug fixes after the rcu_lock_domain patch.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Mar 01 11:38:55 2007 +0000 (2007-03-01)
parents 09a9b6d6c356
children 0de2f7d8d89f
line source
1 /******************************************************************************
2 * arch/x86/mm.c
3 *
4 * Copyright (c) 2002-2005 K A Fraser
5 * Copyright (c) 2004 Christian Limpach
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
22 /*
23 * A description of the x86 page table API:
24 *
25 * Domains trap to do_mmu_update with a list of update requests.
26 * This is a list of (ptr, val) pairs, where the requested operation
27 * is *ptr = val.
28 *
29 * Reference counting of pages:
30 * ----------------------------
31 * Each page has two refcounts: tot_count and type_count.
32 *
33 * TOT_COUNT is the obvious reference count. It counts all uses of a
34 * physical page frame by a domain, including uses as a page directory,
35 * a page table, or simple mappings via a PTE. This count prevents a
36 * domain from releasing a frame back to the free pool when it still holds
37 * a reference to it.
38 *
39 * TYPE_COUNT is more subtle. A frame can be put to one of three
40 * mutually-exclusive uses: it might be used as a page directory, or a
41 * page table, or it may be mapped writable by the domain [of course, a
42 * frame may not be used in any of these three ways!].
43 * So, type_count is a count of the number of times a frame is being
44 * referred to in its current incarnation. Therefore, a page can only
45 * change its type when its type count is zero.
46 *
47 * Pinning the page type:
48 * ----------------------
49 * The type of a page can be pinned/unpinned with the commands
50 * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
51 * pinning is not reference counted, so it can't be nested).
52 * This is useful to prevent a page's type count falling to zero, at which
53 * point safety checks would need to be carried out next time the count
54 * is increased again.
55 *
56 * A further note on writable page mappings:
57 * -----------------------------------------
58 * For simplicity, the count of writable mappings for a page may not
59 * correspond to reality. The 'writable count' is incremented for every
60 * PTE which maps the page with the _PAGE_RW flag set. However, for
61 * write access to be possible the page directory entry must also have
62 * its _PAGE_RW bit set. We do not check this as it complicates the
63 * reference counting considerably [consider the case of multiple
64 * directory entries referencing a single page table, some with the RW
65 * bit set, others not -- it starts getting a bit messy].
66 * In normal use, this simplification shouldn't be a problem.
67 * However, the logic can be added if required.
68 *
69 * One more note on read-only page mappings:
70 * -----------------------------------------
71 * We want domains to be able to map pages for read-only access. The
72 * main reason is that page tables and directories should be readable
73 * by a domain, but it would not be safe for them to be writable.
74 * However, domains have free access to rings 1 & 2 of the Intel
75 * privilege model. In terms of page protection, these are considered
76 * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
77 * read-only restrictions are respected in supervisor mode -- if the
78 * bit is clear then any mapped page is writable.
79 *
80 * We get round this by always setting the WP bit and disallowing
81 * updates to it. This is very unlikely to cause a problem for guest
82 * OS's, which will generally use the WP bit to simplify copy-on-write
83 * implementation (in that case, OS wants a fault when it writes to
84 * an application-supplied buffer).
85 */
87 #include <xen/config.h>
88 #include <xen/init.h>
89 #include <xen/kernel.h>
90 #include <xen/lib.h>
91 #include <xen/mm.h>
92 #include <xen/domain.h>
93 #include <xen/sched.h>
94 #include <xen/errno.h>
95 #include <xen/perfc.h>
96 #include <xen/irq.h>
97 #include <xen/softirq.h>
98 #include <xen/domain_page.h>
99 #include <xen/event.h>
100 #include <xen/iocap.h>
101 #include <xen/guest_access.h>
102 #include <asm/paging.h>
103 #include <asm/shadow.h>
104 #include <asm/page.h>
105 #include <asm/flushtlb.h>
106 #include <asm/io.h>
107 #include <asm/ldt.h>
108 #include <asm/x86_emulate.h>
109 #include <asm/e820.h>
110 #include <asm/hypercall.h>
111 #include <public/memory.h>
113 #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)
115 /*
116 * PTE updates can be done with ordinary writes except:
117 * 1. Debug builds get extra checking by using CMPXCHG[8B].
118 * 2. PAE builds perform an atomic 8-byte store with CMPXCHG8B.
119 */
120 #if !defined(NDEBUG) || defined(CONFIG_X86_PAE)
121 #define PTE_UPDATE_WITH_CMPXCHG
122 #endif
124 /* Used to defer flushing of memory structures. */
125 struct percpu_mm_info {
126 #define DOP_FLUSH_TLB (1<<0) /* Flush the local TLB. */
127 #define DOP_FLUSH_ALL_TLBS (1<<1) /* Flush TLBs of all VCPUs of current dom. */
128 #define DOP_RELOAD_LDT (1<<2) /* Reload the LDT shadow mapping. */
129 unsigned int deferred_ops;
130 /* If non-NULL, specifies a foreign subject domain for some operations. */
131 struct domain *foreign;
132 };
133 static DEFINE_PER_CPU(struct percpu_mm_info, percpu_mm_info);
135 /*
136 * Returns the current foreign domain; defaults to the currently-executing
137 * domain if a foreign override hasn't been specified.
138 */
139 #define FOREIGNDOM (this_cpu(percpu_mm_info).foreign ?: current->domain)
141 /* Private domain structs for DOMID_XEN and DOMID_IO. */
142 static struct domain *dom_xen, *dom_io;
144 /* Frame table and its size in pages. */
145 struct page_info *frame_table;
146 unsigned long max_page;
147 unsigned long total_pages;
149 #ifdef CONFIG_COMPAT
150 l2_pgentry_t *compat_idle_pg_table_l2 = NULL;
151 #define l3_disallow_mask(d) (!IS_COMPAT(d) ? \
152 L3_DISALLOW_MASK : \
153 COMPAT_L3_DISALLOW_MASK)
154 #else
155 #define l3_disallow_mask(d) L3_DISALLOW_MASK
156 #endif
158 static void queue_deferred_ops(struct domain *d, unsigned int ops)
159 {
160 ASSERT(d == current->domain);
161 this_cpu(percpu_mm_info).deferred_ops |= ops;
162 }
164 void __init init_frametable(void)
165 {
166 unsigned long nr_pages, page_step, i, mfn;
168 frame_table = (struct page_info *)FRAMETABLE_VIRT_START;
170 nr_pages = PFN_UP(max_page * sizeof(*frame_table));
171 page_step = (1 << L2_PAGETABLE_SHIFT) >> PAGE_SHIFT;
173 for ( i = 0; i < nr_pages; i += page_step )
174 {
175 mfn = alloc_boot_pages(min(nr_pages - i, page_step), page_step);
176 if ( mfn == 0 )
177 panic("Not enough memory for frame table\n");
178 map_pages_to_xen(
179 FRAMETABLE_VIRT_START + (i << PAGE_SHIFT),
180 mfn, page_step, PAGE_HYPERVISOR);
181 }
183 memset(frame_table, 0, nr_pages << PAGE_SHIFT);
184 }
186 void arch_init_memory(void)
187 {
188 extern void subarch_init_memory(void);
190 unsigned long i, pfn, rstart_pfn, rend_pfn;
192 /*
193 * Initialise our DOMID_XEN domain.
194 * Any Xen-heap pages that we will allow to be mapped will have
195 * their domain field set to dom_xen.
196 */
197 dom_xen = alloc_domain(DOMID_XEN);
198 BUG_ON(dom_xen == NULL);
200 /*
201 * Initialise our DOMID_IO domain.
202 * This domain owns I/O pages that are within the range of the page_info
203 * array. Mappings occur at the priv of the caller.
204 */
205 dom_io = alloc_domain(DOMID_IO);
206 BUG_ON(dom_io == NULL);
208 /* First 1MB of RAM is historically marked as I/O. */
209 for ( i = 0; i < 0x100; i++ )
210 share_xen_page_with_guest(mfn_to_page(i), dom_io, XENSHARE_writable);
212 /* Any areas not specified as RAM by the e820 map are considered I/O. */
213 for ( i = 0, pfn = 0; i < e820.nr_map; i++ )
214 {
215 if ( e820.map[i].type != E820_RAM )
216 continue;
217 /* Every page from cursor to start of next RAM region is I/O. */
218 rstart_pfn = PFN_UP(e820.map[i].addr);
219 rend_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
220 for ( ; pfn < rstart_pfn; pfn++ )
221 {
222 BUG_ON(!mfn_valid(pfn));
223 share_xen_page_with_guest(
224 mfn_to_page(pfn), dom_io, XENSHARE_writable);
225 }
226 /* Skip the RAM region. */
227 pfn = rend_pfn;
228 }
229 BUG_ON(pfn != max_page);
231 subarch_init_memory();
232 }
234 int memory_is_conventional_ram(paddr_t p)
235 {
236 int i;
238 for ( i = 0; i < e820.nr_map; i++ )
239 {
240 if ( (e820.map[i].type == E820_RAM) &&
241 (e820.map[i].addr <= p) &&
242 (e820.map[i].size > p) )
243 return 1;
244 }
246 return 0;
247 }
249 void share_xen_page_with_guest(
250 struct page_info *page, struct domain *d, int readonly)
251 {
252 if ( page_get_owner(page) == d )
253 return;
255 set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
257 spin_lock(&d->page_alloc_lock);
259 /* The incremented type count pins as writable or read-only. */
260 page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page);
261 page->u.inuse.type_info |= PGT_validated | 1;
263 page_set_owner(page, d);
264 wmb(); /* install valid domain ptr before updating refcnt. */
265 ASSERT(page->count_info == 0);
267 /* Only add to the allocation list if the domain isn't dying. */
268 if ( !test_bit(_DOMF_dying, &d->domain_flags) )
269 {
270 page->count_info |= PGC_allocated | 1;
271 if ( unlikely(d->xenheap_pages++ == 0) )
272 get_knownalive_domain(d);
273 list_add_tail(&page->list, &d->xenpage_list);
274 }
276 spin_unlock(&d->page_alloc_lock);
277 }
279 void share_xen_page_with_privileged_guests(
280 struct page_info *page, int readonly)
281 {
282 share_xen_page_with_guest(page, dom_xen, readonly);
283 }
285 #if defined(CONFIG_X86_PAE)
287 #ifdef NDEBUG
288 /* Only PDPTs above 4GB boundary need to be shadowed in low memory. */
289 #define l3tab_needs_shadow(mfn) ((mfn) >= 0x100000)
290 #else
291 /*
292 * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths.
293 * We cannot safely shadow the idle page table, nor shadow (v1) page tables
294 * (detected by lack of an owning domain). As required for correctness, we
295 * always shadow PDPTs above 4GB.
296 */
297 #define l3tab_needs_shadow(mfn) \
298 (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \
299 (page_get_owner(mfn_to_page(mfn)) != NULL) && \
300 ((mfn) & 1)) || /* odd MFNs are shadowed */ \
301 ((mfn) >= 0x100000))
302 #endif
304 static l1_pgentry_t *fix_pae_highmem_pl1e;
306 /* Cache the address of PAE high-memory fixmap page tables. */
307 static int __init cache_pae_fixmap_address(void)
308 {
309 unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0);
310 l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base);
311 fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base);
312 return 0;
313 }
314 __initcall(cache_pae_fixmap_address);
316 static DEFINE_PER_CPU(u32, make_cr3_timestamp);
318 void make_cr3(struct vcpu *v, unsigned long mfn)
319 /* Takes the MFN of a PAE l3 table, copies the contents to below 4GB if
320 * necessary, and sets v->arch.cr3 to the value to load in CR3. */
321 {
322 l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
323 struct pae_l3_cache *cache = &v->arch.pae_l3_cache;
324 unsigned int cpu = smp_processor_id();
326 /* Fast path: does this mfn need a shadow at all? */
327 if ( !l3tab_needs_shadow(mfn) )
328 {
329 v->arch.cr3 = mfn << PAGE_SHIFT;
330 /* Cache is no longer in use or valid */
331 cache->high_mfn = 0;
332 return;
333 }
335 /* Caching logic is not interrupt safe. */
336 ASSERT(!in_irq());
338 /* Protects against pae_flush_pgd(). */
339 spin_lock(&cache->lock);
341 cache->inuse_idx ^= 1;
342 cache->high_mfn = mfn;
344 /* Map the guest L3 table and copy to the chosen low-memory cache. */
345 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
346 /* First check the previous high mapping can't be in the TLB.
347 * (i.e. have we loaded CR3 since we last did this?) */
348 if ( unlikely(this_cpu(make_cr3_timestamp) == this_cpu(tlbflush_time)) )
349 local_flush_tlb_one(fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu));
350 highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
351 lowmem_l3tab = cache->table[cache->inuse_idx];
352 memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0]));
353 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_empty());
354 this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time);
356 v->arch.cr3 = __pa(lowmem_l3tab);
358 spin_unlock(&cache->lock);
359 }
361 #else /* !CONFIG_X86_PAE */
363 void make_cr3(struct vcpu *v, unsigned long mfn)
364 {
365 v->arch.cr3 = mfn << PAGE_SHIFT;
366 }
368 #endif /* !CONFIG_X86_PAE */
370 void write_ptbase(struct vcpu *v)
371 {
372 write_cr3(v->arch.cr3);
373 }
375 /* Should be called after CR3 is updated.
376 * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
377 *
378 * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
379 * for HVM guests, arch.monitor_table and hvm's guest CR3.
380 *
381 * Update ref counts to shadow tables appropriately.
382 */
383 void update_cr3(struct vcpu *v)
384 {
385 unsigned long cr3_mfn=0;
387 if ( paging_mode_enabled(v->domain) )
388 {
389 paging_update_cr3(v);
390 return;
391 }
393 #if CONFIG_PAGING_LEVELS == 4
394 if ( !(v->arch.flags & TF_kernel_mode) )
395 cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
396 else
397 #endif
398 cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
400 make_cr3(v, cr3_mfn);
401 }
404 void invalidate_shadow_ldt(struct vcpu *v)
405 {
406 int i;
407 unsigned long pfn;
408 struct page_info *page;
410 if ( v->arch.shadow_ldt_mapcnt == 0 )
411 return;
413 v->arch.shadow_ldt_mapcnt = 0;
415 for ( i = 16; i < 32; i++ )
416 {
417 pfn = l1e_get_pfn(v->arch.perdomain_ptes[i]);
418 if ( pfn == 0 ) continue;
419 l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
420 page = mfn_to_page(pfn);
421 ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
422 ASSERT_PAGE_IS_DOMAIN(page, v->domain);
423 put_page_and_type(page);
424 }
426 /* Dispose of the (now possibly invalid) mappings from the TLB. */
427 queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
428 }
431 static int alloc_segdesc_page(struct page_info *page)
432 {
433 struct desc_struct *descs;
434 int i;
436 descs = map_domain_page(page_to_mfn(page));
438 for ( i = 0; i < 512; i++ )
439 if ( unlikely(!check_descriptor(page_get_owner(page), &descs[i])) )
440 goto fail;
442 unmap_domain_page(descs);
443 return 1;
445 fail:
446 unmap_domain_page(descs);
447 return 0;
448 }
451 /* Map shadow page at offset @off. */
452 int map_ldt_shadow_page(unsigned int off)
453 {
454 struct vcpu *v = current;
455 struct domain *d = v->domain;
456 unsigned long gmfn, mfn;
457 l1_pgentry_t l1e, nl1e;
458 unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT);
459 int okay;
461 BUG_ON(unlikely(in_irq()));
463 guest_get_eff_kern_l1e(v, gva, &l1e);
464 if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
465 return 0;
467 gmfn = l1e_get_pfn(l1e);
468 mfn = gmfn_to_mfn(d, gmfn);
469 if ( unlikely(!mfn_valid(mfn)) )
470 return 0;
472 okay = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
473 if ( unlikely(!okay) )
474 return 0;
476 nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW);
478 l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e);
479 v->arch.shadow_ldt_mapcnt++;
481 return 1;
482 }
485 static int get_page_from_pagenr(unsigned long page_nr, struct domain *d)
486 {
487 struct page_info *page = mfn_to_page(page_nr);
489 if ( unlikely(!mfn_valid(page_nr)) || unlikely(!get_page(page, d)) )
490 {
491 MEM_LOG("Could not get page ref for pfn %lx", page_nr);
492 return 0;
493 }
495 return 1;
496 }
499 static int get_page_and_type_from_pagenr(unsigned long page_nr,
500 unsigned long type,
501 struct domain *d)
502 {
503 struct page_info *page = mfn_to_page(page_nr);
505 if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
506 return 0;
508 if ( unlikely(!get_page_type(page, type)) )
509 {
510 put_page(page);
511 return 0;
512 }
514 return 1;
515 }
517 #ifndef CONFIG_X86_PAE /* We do not support guest linear mappings on PAE. */
518 /*
519 * We allow root tables to map each other (a.k.a. linear page tables). It
520 * needs some special care with reference counts and access permissions:
521 * 1. The mapping entry must be read-only, or the guest may get write access
522 * to its own PTEs.
523 * 2. We must only bump the reference counts for an *already validated*
524 * L2 table, or we can end up in a deadlock in get_page_type() by waiting
525 * on a validation that is required to complete that validation.
526 * 3. We only need to increment the reference counts for the mapped page
527 * frame if it is mapped by a different root table. This is sufficient and
528 * also necessary to allow validation of a root table mapping itself.
529 */
530 static int
531 get_linear_pagetable(
532 root_pgentry_t re, unsigned long re_pfn, struct domain *d)
533 {
534 unsigned long x, y;
535 struct page_info *page;
536 unsigned long pfn;
538 if ( (root_get_flags(re) & _PAGE_RW) )
539 {
540 MEM_LOG("Attempt to create linear p.t. with write perms");
541 return 0;
542 }
544 if ( (pfn = root_get_pfn(re)) != re_pfn )
545 {
546 /* Make sure the mapped frame belongs to the correct domain. */
547 if ( unlikely(!get_page_from_pagenr(pfn, d)) )
548 return 0;
550 /*
551 * Make sure that the mapped frame is an already-validated L2 table.
552 * If so, atomically increment the count (checking for overflow).
553 */
554 page = mfn_to_page(pfn);
555 y = page->u.inuse.type_info;
556 do {
557 x = y;
558 if ( unlikely((x & PGT_count_mask) == PGT_count_mask) ||
559 unlikely((x & (PGT_type_mask|PGT_validated)) !=
560 (PGT_root_page_table|PGT_validated)) )
561 {
562 put_page(page);
563 return 0;
564 }
565 }
566 while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x );
567 }
569 return 1;
570 }
571 #endif /* !CONFIG_X86_PAE */
573 int
574 get_page_from_l1e(
575 l1_pgentry_t l1e, struct domain *d)
576 {
577 unsigned long mfn = l1e_get_pfn(l1e);
578 struct page_info *page = mfn_to_page(mfn);
579 int okay;
581 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
582 return 1;
584 if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
585 {
586 MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
587 return 0;
588 }
590 if ( unlikely(!mfn_valid(mfn)) ||
591 unlikely(page_get_owner(page) == dom_io) )
592 {
593 /* DOMID_IO reverts to caller for privilege checks. */
594 if ( d == dom_io )
595 d = current->domain;
597 if ( !iomem_access_permitted(d, mfn, mfn) )
598 {
599 if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */
600 MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx",
601 d->domain_id, mfn);
602 return 0;
603 }
605 /* No reference counting for out-of-range I/O pages. */
606 if ( !mfn_valid(mfn) )
607 return 1;
609 d = dom_io;
610 }
612 /* Foreign mappings into guests in shadow external mode don't
613 * contribute to writeable mapping refcounts. (This allows the
614 * qemu-dm helper process in dom0 to map the domain's memory without
615 * messing up the count of "real" writable mappings.) */
616 okay = (((l1e_get_flags(l1e) & _PAGE_RW) &&
617 !(unlikely(paging_mode_external(d) && (d != current->domain))))
618 ? get_page_and_type(page, d, PGT_writable_page)
619 : get_page(page, d));
620 if ( !okay )
621 {
622 MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
623 " for dom%d",
624 mfn, get_gpfn_from_mfn(mfn),
625 l1e_get_intpte(l1e), d->domain_id);
626 }
628 return okay;
629 }
632 /* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
633 static int
634 get_page_from_l2e(
635 l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
636 {
637 int rc;
639 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
640 return 1;
642 if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
643 {
644 MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
645 return 0;
646 }
648 rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d);
649 #if CONFIG_PAGING_LEVELS == 2
650 if ( unlikely(!rc) )
651 rc = get_linear_pagetable(l2e, pfn, d);
652 #endif
653 return rc;
654 }
657 #if CONFIG_PAGING_LEVELS >= 3
658 static int
659 get_page_from_l3e(
660 l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
661 {
662 int rc;
664 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
665 return 1;
667 if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
668 {
669 MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d));
670 return 0;
671 }
673 rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d);
674 return rc;
675 }
676 #endif /* 3 level */
678 #if CONFIG_PAGING_LEVELS >= 4
679 static int
680 get_page_from_l4e(
681 l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
682 {
683 int rc;
685 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
686 return 1;
688 if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
689 {
690 MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
691 return 0;
692 }
694 rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d);
696 if ( unlikely(!rc) )
697 rc = get_linear_pagetable(l4e, pfn, d);
699 return rc;
700 }
701 #endif /* 4 level */
703 #ifdef __x86_64__
705 #ifdef USER_MAPPINGS_ARE_GLOBAL
706 #define adjust_guest_l1e(pl1e, d) \
707 do { \
708 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
709 likely(!IS_COMPAT(d)) ) \
710 { \
711 /* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */ \
712 if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \
713 == (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL) ) \
714 MEM_LOG("Global bit is set to kernel page %lx", \
715 l1e_get_pfn((pl1e))); \
716 if ( !(l1e_get_flags((pl1e)) & _PAGE_USER) ) \
717 l1e_add_flags((pl1e), (_PAGE_GUEST_KERNEL|_PAGE_USER)); \
718 if ( !(l1e_get_flags((pl1e)) & _PAGE_GUEST_KERNEL) ) \
719 l1e_add_flags((pl1e), (_PAGE_GLOBAL|_PAGE_USER)); \
720 } \
721 } while ( 0 )
722 #else
723 #define adjust_guest_l1e(pl1e, d) \
724 do { \
725 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
726 likely(!IS_COMPAT(d)) ) \
727 l1e_add_flags((pl1e), _PAGE_USER); \
728 } while ( 0 )
729 #endif
731 #define adjust_guest_l2e(pl2e, d) \
732 do { \
733 if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) && \
734 likely(!IS_COMPAT(d)) ) \
735 l2e_add_flags((pl2e), _PAGE_USER); \
736 } while ( 0 )
738 #define adjust_guest_l3e(pl3e, d) \
739 do { \
740 if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
741 l3e_add_flags((pl3e), likely(!IS_COMPAT(d)) ? \
742 _PAGE_USER : \
743 _PAGE_USER|_PAGE_RW); \
744 } while ( 0 )
746 #define adjust_guest_l4e(pl4e, d) \
747 do { \
748 if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) && \
749 likely(!IS_COMPAT(d)) ) \
750 l4e_add_flags((pl4e), _PAGE_USER); \
751 } while ( 0 )
753 #else /* !defined(__x86_64__) */
755 #define adjust_guest_l1e(_p, _d) ((void)(_d))
756 #define adjust_guest_l2e(_p, _d) ((void)(_d))
757 #define adjust_guest_l3e(_p, _d) ((void)(_d))
759 #endif
761 #ifdef CONFIG_COMPAT
762 #define unadjust_guest_l3e(pl3e, d) \
763 do { \
764 if ( unlikely(IS_COMPAT(d)) && \
765 likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
766 l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \
767 } while ( 0 )
768 #else
769 #define unadjust_guest_l3e(_p, _d) ((void)(_d))
770 #endif
772 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
773 {
774 unsigned long pfn = l1e_get_pfn(l1e);
775 struct page_info *page = mfn_to_page(pfn);
776 struct domain *e;
777 struct vcpu *v;
779 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(pfn) )
780 return;
782 e = page_get_owner(page);
784 /*
785 * Check if this is a mapping that was established via a grant reference.
786 * If it was then we should not be here: we require that such mappings are
787 * explicitly destroyed via the grant-table interface.
788 *
789 * The upshot of this is that the guest can end up with active grants that
790 * it cannot destroy (because it no longer has a PTE to present to the
791 * grant-table interface). This can lead to subtle hard-to-catch bugs,
792 * hence a special grant PTE flag can be enabled to catch the bug early.
793 *
794 * (Note that the undestroyable active grants are not a security hole in
795 * Xen. All active grants can safely be cleaned up when the domain dies.)
796 */
797 if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) &&
798 !(d->domain_flags & (DOMF_shutdown|DOMF_dying)) )
799 {
800 MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte,
801 l1e_get_intpte(l1e));
802 domain_crash(d);
803 }
805 /* Remember we didn't take a type-count of foreign writable mappings
806 * to paging-external domains */
807 if ( (l1e_get_flags(l1e) & _PAGE_RW) &&
808 !(unlikely((e != d) && paging_mode_external(e))) )
809 {
810 put_page_and_type(page);
811 }
812 else
813 {
814 /* We expect this is rare so we blow the entire shadow LDT. */
815 if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) ==
816 PGT_ldt_page)) &&
817 unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) &&
818 (d == e) )
819 {
820 for_each_vcpu ( d, v )
821 invalidate_shadow_ldt(v);
822 }
823 put_page(page);
824 }
825 }
828 /*
829 * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
830 * Note also that this automatically deals correctly with linear p.t.'s.
831 */
832 static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
833 {
834 if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
835 (l2e_get_pfn(l2e) != pfn) )
836 put_page_and_type(l2e_get_page(l2e));
837 }
840 #if CONFIG_PAGING_LEVELS >= 3
841 static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
842 {
843 if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
844 (l3e_get_pfn(l3e) != pfn) )
845 put_page_and_type(l3e_get_page(l3e));
846 }
847 #endif
849 #if CONFIG_PAGING_LEVELS >= 4
850 static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
851 {
852 if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
853 (l4e_get_pfn(l4e) != pfn) )
854 put_page_and_type(l4e_get_page(l4e));
855 }
856 #endif
858 static int alloc_l1_table(struct page_info *page)
859 {
860 struct domain *d = page_get_owner(page);
861 unsigned long pfn = page_to_mfn(page);
862 l1_pgentry_t *pl1e;
863 int i;
865 pl1e = map_domain_page(pfn);
867 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
868 {
869 if ( is_guest_l1_slot(i) &&
870 unlikely(!get_page_from_l1e(pl1e[i], d)) )
871 goto fail;
873 adjust_guest_l1e(pl1e[i], d);
874 }
876 unmap_domain_page(pl1e);
877 return 1;
879 fail:
880 MEM_LOG("Failure in alloc_l1_table: entry %d", i);
881 while ( i-- > 0 )
882 if ( is_guest_l1_slot(i) )
883 put_page_from_l1e(pl1e[i], d);
885 unmap_domain_page(pl1e);
886 return 0;
887 }
889 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
890 static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
891 {
892 struct page_info *page;
893 l2_pgentry_t *pl2e;
894 l3_pgentry_t l3e3;
895 #ifndef CONFIG_COMPAT
896 l2_pgentry_t l2e;
897 int i;
898 #else
900 if ( !IS_COMPAT(d) )
901 return 1;
902 #endif
904 pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK);
906 /* 3rd L3 slot contains L2 with Xen-private mappings. It *must* exist. */
907 l3e3 = pl3e[3];
908 if ( !(l3e_get_flags(l3e3) & _PAGE_PRESENT) )
909 {
910 MEM_LOG("PAE L3 3rd slot is empty");
911 return 0;
912 }
914 /*
915 * The Xen-private mappings include linear mappings. The L2 thus cannot
916 * be shared by multiple L3 tables. The test here is adequate because:
917 * 1. Cannot appear in slots != 3 because get_page_type() checks the
918 * PGT_pae_xen_l2 flag, which is asserted iff the L2 appears in slot 3
919 * 2. Cannot appear in another page table's L3:
920 * a. alloc_l3_table() calls this function and this check will fail
921 * b. mod_l3_entry() disallows updates to slot 3 in an existing table
922 */
923 page = l3e_get_page(l3e3);
924 BUG_ON(page->u.inuse.type_info & PGT_pinned);
925 BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0);
926 BUG_ON(!(page->u.inuse.type_info & PGT_pae_xen_l2));
927 if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
928 {
929 MEM_LOG("PAE L3 3rd slot is shared");
930 return 0;
931 }
933 /* Xen private mappings. */
934 pl2e = map_domain_page(l3e_get_pfn(l3e3));
935 #ifndef CONFIG_COMPAT
936 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
937 &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
938 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
939 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
940 {
941 l2e = l2e_from_page(
942 virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
943 __PAGE_HYPERVISOR);
944 l2e_write(&pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i], l2e);
945 }
946 for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
947 {
948 l2e = l2e_empty();
949 if ( l3e_get_flags(pl3e[i]) & _PAGE_PRESENT )
950 l2e = l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR);
951 l2e_write(&pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i], l2e);
952 }
953 #else
954 memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
955 &compat_idle_pg_table_l2[
956 l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
957 COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e));
958 #endif
959 unmap_domain_page(pl2e);
961 return 1;
962 }
963 #else
964 # define create_pae_xen_mappings(d, pl3e) (1)
965 #endif
967 #ifdef CONFIG_X86_PAE
968 /* Flush a pgdir update into low-memory caches. */
969 static void pae_flush_pgd(
970 unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
971 {
972 struct domain *d = page_get_owner(mfn_to_page(mfn));
973 struct vcpu *v;
974 intpte_t _ol3e, _nl3e, _pl3e;
975 l3_pgentry_t *l3tab_ptr;
976 struct pae_l3_cache *cache;
978 if ( unlikely(shadow_mode_enabled(d)) )
979 {
980 cpumask_t m = CPU_MASK_NONE;
981 /* Re-shadow this l3 table on any vcpus that are using it */
982 for_each_vcpu ( d, v )
983 if ( pagetable_get_pfn(v->arch.guest_table) == mfn )
984 {
985 paging_update_cr3(v);
986 cpus_or(m, m, v->vcpu_dirty_cpumask);
987 }
988 flush_tlb_mask(m);
989 }
991 /* If below 4GB then the pgdir is not shadowed in low memory. */
992 if ( !l3tab_needs_shadow(mfn) )
993 return;
995 for_each_vcpu ( d, v )
996 {
997 cache = &v->arch.pae_l3_cache;
999 spin_lock(&cache->lock);
1001 if ( cache->high_mfn == mfn )
1003 l3tab_ptr = &cache->table[cache->inuse_idx][idx];
1004 _ol3e = l3e_get_intpte(*l3tab_ptr);
1005 _nl3e = l3e_get_intpte(nl3e);
1006 _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
1007 BUG_ON(_pl3e != _ol3e);
1010 spin_unlock(&cache->lock);
1013 flush_tlb_mask(d->domain_dirty_cpumask);
1015 #else
1016 # define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
1017 #endif
1019 static int alloc_l2_table(struct page_info *page, unsigned long type)
1021 struct domain *d = page_get_owner(page);
1022 unsigned long pfn = page_to_mfn(page);
1023 l2_pgentry_t *pl2e;
1024 int i;
1026 pl2e = map_domain_page(pfn);
1028 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
1030 if ( is_guest_l2_slot(d, type, i) &&
1031 unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
1032 goto fail;
1034 adjust_guest_l2e(pl2e[i], d);
1037 #if CONFIG_PAGING_LEVELS == 2
1038 /* Xen private mappings. */
1039 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
1040 &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
1041 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
1042 pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1043 l2e_from_pfn(pfn, __PAGE_HYPERVISOR);
1044 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
1045 pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
1046 l2e_from_page(
1047 virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
1048 __PAGE_HYPERVISOR);
1049 #endif
1051 unmap_domain_page(pl2e);
1052 return 1;
1054 fail:
1055 MEM_LOG("Failure in alloc_l2_table: entry %d", i);
1056 while ( i-- > 0 )
1057 if ( is_guest_l2_slot(d, type, i) )
1058 put_page_from_l2e(pl2e[i], pfn);
1060 unmap_domain_page(pl2e);
1061 return 0;
1065 #if CONFIG_PAGING_LEVELS >= 3
1066 static int alloc_l3_table(struct page_info *page)
1068 struct domain *d = page_get_owner(page);
1069 unsigned long pfn = page_to_mfn(page);
1070 l3_pgentry_t *pl3e;
1071 int i;
1073 #ifdef CONFIG_X86_PAE
1074 /*
1075 * PAE pgdirs above 4GB are unacceptable if the guest does not understand
1076 * the weird 'extended cr3' format for dealing with high-order address
1077 * bits. We cut some slack for control tools (before vcpu0 is initialised).
1078 */
1079 if ( (pfn >= 0x100000) &&
1080 unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) &&
1081 d->vcpu[0] && test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) )
1083 MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
1084 return 0;
1086 #endif
1088 pl3e = map_domain_page(pfn);
1090 /*
1091 * PAE guests allocate full pages, but aren't required to initialize
1092 * more than the first four entries; when running in compatibility
1093 * mode, however, the full page is visible to the MMU, and hence all
1094 * 512 entries must be valid/verified, which is most easily achieved
1095 * by clearing them out.
1096 */
1097 if ( IS_COMPAT(d) )
1098 memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
1100 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
1102 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
1103 if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) && i == 3 )
1105 if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
1106 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ||
1107 !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
1108 PGT_l2_page_table |
1109 PGT_pae_xen_l2,
1110 d) )
1111 goto fail;
1113 else
1114 #endif
1115 if ( is_guest_l3_slot(i) &&
1116 unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
1117 goto fail;
1119 adjust_guest_l3e(pl3e[i], d);
1122 if ( !create_pae_xen_mappings(d, pl3e) )
1123 goto fail;
1125 unmap_domain_page(pl3e);
1126 return 1;
1128 fail:
1129 MEM_LOG("Failure in alloc_l3_table: entry %d", i);
1130 while ( i-- > 0 )
1131 if ( is_guest_l3_slot(i) )
1132 put_page_from_l3e(pl3e[i], pfn);
1134 unmap_domain_page(pl3e);
1135 return 0;
1137 #else
1138 #define alloc_l3_table(page) (0)
1139 #endif
1141 #if CONFIG_PAGING_LEVELS >= 4
1142 static int alloc_l4_table(struct page_info *page)
1144 struct domain *d = page_get_owner(page);
1145 unsigned long pfn = page_to_mfn(page);
1146 l4_pgentry_t *pl4e = page_to_virt(page);
1147 int i;
1149 for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
1151 if ( is_guest_l4_slot(d, i) &&
1152 unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
1153 goto fail;
1155 adjust_guest_l4e(pl4e[i], d);
1158 /* Xen private mappings. */
1159 memcpy(&pl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1160 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1161 ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
1162 pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
1163 l4e_from_pfn(pfn, __PAGE_HYPERVISOR);
1164 pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
1165 l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
1166 __PAGE_HYPERVISOR);
1167 if ( IS_COMPAT(d) )
1168 pl4e[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
1169 l4e_from_page(virt_to_page(d->arch.mm_arg_xlat_l3),
1170 __PAGE_HYPERVISOR);
1172 return 1;
1174 fail:
1175 MEM_LOG("Failure in alloc_l4_table: entry %d", i);
1176 while ( i-- > 0 )
1177 if ( is_guest_l4_slot(d, i) )
1178 put_page_from_l4e(pl4e[i], pfn);
1180 return 0;
1182 #else
1183 #define alloc_l4_table(page) (0)
1184 #endif
1187 static void free_l1_table(struct page_info *page)
1189 struct domain *d = page_get_owner(page);
1190 unsigned long pfn = page_to_mfn(page);
1191 l1_pgentry_t *pl1e;
1192 int i;
1194 pl1e = map_domain_page(pfn);
1196 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
1197 if ( is_guest_l1_slot(i) )
1198 put_page_from_l1e(pl1e[i], d);
1200 unmap_domain_page(pl1e);
1204 static void free_l2_table(struct page_info *page)
1206 #ifdef CONFIG_COMPAT
1207 struct domain *d = page_get_owner(page);
1208 #endif
1209 unsigned long pfn = page_to_mfn(page);
1210 l2_pgentry_t *pl2e;
1211 int i;
1213 pl2e = map_domain_page(pfn);
1215 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
1216 if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
1217 put_page_from_l2e(pl2e[i], pfn);
1219 unmap_domain_page(pl2e);
1221 page->u.inuse.type_info &= ~PGT_pae_xen_l2;
1225 #if CONFIG_PAGING_LEVELS >= 3
1227 static void free_l3_table(struct page_info *page)
1229 struct domain *d = page_get_owner(page);
1230 unsigned long pfn = page_to_mfn(page);
1231 l3_pgentry_t *pl3e;
1232 int i;
1234 pl3e = map_domain_page(pfn);
1236 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
1237 if ( is_guest_l3_slot(i) )
1239 put_page_from_l3e(pl3e[i], pfn);
1240 unadjust_guest_l3e(pl3e[i], d);
1243 unmap_domain_page(pl3e);
1246 #endif
1248 #if CONFIG_PAGING_LEVELS >= 4
1250 static void free_l4_table(struct page_info *page)
1252 struct domain *d = page_get_owner(page);
1253 unsigned long pfn = page_to_mfn(page);
1254 l4_pgentry_t *pl4e = page_to_virt(page);
1255 int i;
1257 for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
1258 if ( is_guest_l4_slot(d, i) )
1259 put_page_from_l4e(pl4e[i], pfn);
1262 #endif
1265 /* How to write an entry to the guest pagetables.
1266 * Returns 0 for failure (pointer not valid), 1 for success. */
1267 static inline int update_intpte(intpte_t *p,
1268 intpte_t old,
1269 intpte_t new,
1270 unsigned long mfn,
1271 struct vcpu *v)
1273 int rv = 1;
1274 #ifndef PTE_UPDATE_WITH_CMPXCHG
1275 rv = paging_write_guest_entry(v, p, new, _mfn(mfn));
1276 #else
1278 intpte_t t = old;
1279 for ( ; ; )
1281 rv = paging_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn));
1282 if ( unlikely(rv == 0) )
1284 MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
1285 ": saw %" PRIpte, old, new, t);
1286 break;
1289 if ( t == old )
1290 break;
1292 /* Allowed to change in Accessed/Dirty flags only. */
1293 BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY));
1295 old = t;
1298 #endif
1299 return rv;
1302 /* Macro that wraps the appropriate type-changes around update_intpte().
1303 * Arguments are: type, ptr, old, new, mfn, vcpu */
1304 #define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v) \
1305 update_intpte((intpte_t *)(_p), \
1306 _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \
1307 (_m), (_v))
1309 /* Update the L1 entry at pl1e to new value nl1e. */
1310 static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
1311 unsigned long gl1mfn)
1313 l1_pgentry_t ol1e;
1314 struct domain *d = current->domain;
1316 if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
1317 return 0;
1319 if ( unlikely(paging_mode_refcounts(d)) )
1320 return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
1322 if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
1324 /* Translate foreign guest addresses. */
1325 nl1e = l1e_from_pfn(gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e)),
1326 l1e_get_flags(nl1e));
1328 if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
1330 MEM_LOG("Bad L1 flags %x",
1331 l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
1332 return 0;
1335 adjust_guest_l1e(nl1e, d);
1337 /* Fast path for identical mapping, r/w and presence. */
1338 if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
1339 return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
1341 if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
1342 return 0;
1344 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
1346 put_page_from_l1e(nl1e, d);
1347 return 0;
1350 else
1352 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
1353 return 0;
1356 put_page_from_l1e(ol1e, d);
1357 return 1;
1361 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
1362 static int mod_l2_entry(l2_pgentry_t *pl2e,
1363 l2_pgentry_t nl2e,
1364 unsigned long pfn,
1365 unsigned long type)
1367 l2_pgentry_t ol2e;
1368 struct domain *d = current->domain;
1370 if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
1372 MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
1373 return 0;
1376 if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
1377 return 0;
1379 if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
1381 if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
1383 MEM_LOG("Bad L2 flags %x",
1384 l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
1385 return 0;
1388 adjust_guest_l2e(nl2e, d);
1390 /* Fast path for identical mapping and presence. */
1391 if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
1392 return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current);
1394 if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
1395 return 0;
1397 if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
1399 put_page_from_l2e(nl2e, pfn);
1400 return 0;
1403 else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
1405 return 0;
1408 put_page_from_l2e(ol2e, pfn);
1409 return 1;
1412 #if CONFIG_PAGING_LEVELS >= 3
1414 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
1415 static int mod_l3_entry(l3_pgentry_t *pl3e,
1416 l3_pgentry_t nl3e,
1417 unsigned long pfn)
1419 l3_pgentry_t ol3e;
1420 struct domain *d = current->domain;
1421 int okay;
1423 if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
1425 MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e);
1426 return 0;
1429 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
1430 /*
1431 * Disallow updates to final L3 slot. It contains Xen mappings, and it
1432 * would be a pain to ensure they remain continuously valid throughout.
1433 */
1434 if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) &&
1435 pgentry_ptr_to_slot(pl3e) >= 3 )
1436 return 0;
1437 #endif
1439 if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
1440 return 0;
1442 if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
1444 if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
1446 MEM_LOG("Bad L3 flags %x",
1447 l3e_get_flags(nl3e) & l3_disallow_mask(d));
1448 return 0;
1451 adjust_guest_l3e(nl3e, d);
1453 /* Fast path for identical mapping and presence. */
1454 if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
1455 return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current);
1457 if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
1458 return 0;
1460 if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
1462 put_page_from_l3e(nl3e, pfn);
1463 return 0;
1466 else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
1468 return 0;
1471 okay = create_pae_xen_mappings(d, pl3e);
1472 BUG_ON(!okay);
1474 pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
1476 put_page_from_l3e(ol3e, pfn);
1477 return 1;
1480 #endif
1482 #if CONFIG_PAGING_LEVELS >= 4
1484 /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
1485 static int mod_l4_entry(struct domain *d,
1486 l4_pgentry_t *pl4e,
1487 l4_pgentry_t nl4e,
1488 unsigned long pfn)
1490 l4_pgentry_t ol4e;
1492 if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
1494 MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e);
1495 return 0;
1498 if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
1499 return 0;
1501 if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
1503 if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
1505 MEM_LOG("Bad L4 flags %x",
1506 l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
1507 return 0;
1510 adjust_guest_l4e(nl4e, current->domain);
1512 /* Fast path for identical mapping and presence. */
1513 if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
1514 return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current);
1516 if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
1517 return 0;
1519 if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
1521 put_page_from_l4e(nl4e, pfn);
1522 return 0;
1525 else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
1527 return 0;
1530 put_page_from_l4e(ol4e, pfn);
1531 return 1;
1534 #endif
1536 int alloc_page_type(struct page_info *page, unsigned long type)
1538 struct domain *owner = page_get_owner(page);
1540 /* A page table is dirtied when its type count becomes non-zero. */
1541 if ( likely(owner != NULL) )
1542 mark_dirty(owner, page_to_mfn(page));
1544 switch ( type & PGT_type_mask )
1546 case PGT_l1_page_table:
1547 return alloc_l1_table(page);
1548 case PGT_l2_page_table:
1549 return alloc_l2_table(page, type);
1550 case PGT_l3_page_table:
1551 return alloc_l3_table(page);
1552 case PGT_l4_page_table:
1553 return alloc_l4_table(page);
1554 case PGT_gdt_page:
1555 case PGT_ldt_page:
1556 return alloc_segdesc_page(page);
1557 default:
1558 printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n",
1559 type, page->u.inuse.type_info,
1560 page->count_info);
1561 BUG();
1564 return 0;
1568 void free_page_type(struct page_info *page, unsigned long type)
1570 struct domain *owner = page_get_owner(page);
1571 unsigned long gmfn;
1573 if ( likely(owner != NULL) )
1575 /*
1576 * We have to flush before the next use of the linear mapping
1577 * (e.g., update_va_mapping()) or we could end up modifying a page
1578 * that is no longer a page table (and hence screw up ref counts).
1579 */
1580 if ( current->domain == owner )
1581 queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS);
1582 else
1583 flush_tlb_mask(owner->domain_dirty_cpumask);
1585 if ( unlikely(paging_mode_enabled(owner)) )
1587 /* A page table is dirtied when its type count becomes zero. */
1588 mark_dirty(owner, page_to_mfn(page));
1590 if ( shadow_mode_refcounts(owner) )
1591 return;
1593 gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
1594 ASSERT(VALID_M2P(gmfn));
1595 shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
1599 switch ( type & PGT_type_mask )
1601 case PGT_l1_page_table:
1602 free_l1_table(page);
1603 break;
1605 case PGT_l2_page_table:
1606 free_l2_table(page);
1607 break;
1609 #if CONFIG_PAGING_LEVELS >= 3
1610 case PGT_l3_page_table:
1611 free_l3_table(page);
1612 break;
1613 #endif
1615 #if CONFIG_PAGING_LEVELS >= 4
1616 case PGT_l4_page_table:
1617 free_l4_table(page);
1618 break;
1619 #endif
1621 default:
1622 printk("%s: type %lx pfn %lx\n",__FUNCTION__,
1623 type, page_to_mfn(page));
1624 BUG();
1629 void put_page_type(struct page_info *page)
1631 unsigned long nx, x, y = page->u.inuse.type_info;
1633 again:
1634 do {
1635 x = y;
1636 nx = x - 1;
1638 ASSERT((x & PGT_count_mask) != 0);
1640 if ( unlikely((nx & PGT_count_mask) == 0) )
1642 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
1643 likely(nx & PGT_validated) )
1645 /*
1646 * Page-table pages must be unvalidated when count is zero. The
1647 * 'free' is safe because the refcnt is non-zero and validated
1648 * bit is clear => other ops will spin or fail.
1649 */
1650 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x,
1651 x & ~PGT_validated)) != x) )
1652 goto again;
1653 /* We cleared the 'valid bit' so we do the clean up. */
1654 free_page_type(page, x);
1655 /* Carry on, but with the 'valid bit' now clear. */
1656 x &= ~PGT_validated;
1657 nx &= ~PGT_validated;
1660 /*
1661 * Record TLB information for flush later. We do not stamp page
1662 * tables when running in shadow mode:
1663 * 1. Pointless, since it's the shadow pt's which must be tracked.
1664 * 2. Shadow mode reuses this field for shadowed page tables to
1665 * store flags info -- we don't want to conflict with that.
1666 */
1667 if ( !(shadow_mode_enabled(page_get_owner(page)) &&
1668 (page->count_info & PGC_page_table)) )
1669 page->tlbflush_timestamp = tlbflush_current_time();
1672 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
1676 int get_page_type(struct page_info *page, unsigned long type)
1678 unsigned long nx, x, y = page->u.inuse.type_info;
1680 ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
1682 again:
1683 do {
1684 x = y;
1685 nx = x + 1;
1686 if ( unlikely((nx & PGT_count_mask) == 0) )
1688 MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
1689 return 0;
1691 else if ( unlikely((x & PGT_count_mask) == 0) )
1693 struct domain *d = page_get_owner(page);
1695 /* Never allow a shadowed frame to go from type count 0 to 1 */
1696 if ( d && shadow_mode_enabled(d) )
1697 shadow_remove_all_shadows(d->vcpu[0], _mfn(page_to_mfn(page)));
1699 ASSERT(!(x & PGT_pae_xen_l2));
1700 if ( (x & PGT_type_mask) != type )
1702 /*
1703 * On type change we check to flush stale TLB entries. This
1704 * may be unnecessary (e.g., page was GDT/LDT) but those
1705 * circumstances should be very rare.
1706 */
1707 cpumask_t mask = d->domain_dirty_cpumask;
1709 /* Don't flush if the timestamp is old enough */
1710 tlbflush_filter(mask, page->tlbflush_timestamp);
1712 if ( unlikely(!cpus_empty(mask)) &&
1713 /* Shadow mode: track only writable pages. */
1714 (!shadow_mode_enabled(page_get_owner(page)) ||
1715 ((nx & PGT_type_mask) == PGT_writable_page)) )
1717 perfc_incrc(need_flush_tlb_flush);
1718 flush_tlb_mask(mask);
1721 /* We lose existing type, back pointer, and validity. */
1722 nx &= ~(PGT_type_mask | PGT_validated);
1723 nx |= type;
1725 /* No special validation needed for writable pages. */
1726 /* Page tables and GDT/LDT need to be scanned for validity. */
1727 if ( type == PGT_writable_page )
1728 nx |= PGT_validated;
1731 else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
1733 if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
1734 (type != PGT_l1_page_table) )
1735 MEM_LOG("Bad type (saw %" PRtype_info
1736 " != exp %" PRtype_info ") "
1737 "for mfn %lx (pfn %lx)",
1738 x, type, page_to_mfn(page),
1739 get_gpfn_from_mfn(page_to_mfn(page)));
1740 return 0;
1742 else if ( unlikely(!(x & PGT_validated)) )
1744 /* Someone else is updating validation of this page. Wait... */
1745 while ( (y = page->u.inuse.type_info) == x )
1746 cpu_relax();
1747 goto again;
1750 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
1752 if ( unlikely(!(nx & PGT_validated)) )
1754 /* Try to validate page type; drop the new reference on failure. */
1755 if ( unlikely(!alloc_page_type(page, type)) )
1757 MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
1758 PRtype_info ": caf=%08x taf=%" PRtype_info,
1759 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
1760 type, page->count_info, page->u.inuse.type_info);
1761 /* Noone else can get a reference. We hold the only ref. */
1762 page->u.inuse.type_info = 0;
1763 return 0;
1766 /* Noone else is updating simultaneously. */
1767 __set_bit(_PGT_validated, &page->u.inuse.type_info);
1770 return 1;
1774 int new_guest_cr3(unsigned long mfn)
1776 struct vcpu *v = current;
1777 struct domain *d = v->domain;
1778 int okay;
1779 unsigned long old_base_mfn;
1781 #ifdef CONFIG_COMPAT
1782 if ( IS_COMPAT(d) )
1784 okay = paging_mode_refcounts(d)
1785 ? 0 /* Old code was broken, but what should it be? */
1786 : mod_l4_entry(
1787 d,
1788 __va(pagetable_get_paddr(v->arch.guest_table)),
1789 l4e_from_pfn(
1790 mfn,
1791 (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
1792 pagetable_get_pfn(v->arch.guest_table));
1793 if ( unlikely(!okay) )
1795 MEM_LOG("Error while installing new compat baseptr %lx", mfn);
1796 return 0;
1799 invalidate_shadow_ldt(v);
1800 write_ptbase(v);
1802 return 1;
1804 #endif
1805 okay = paging_mode_refcounts(d)
1806 ? get_page_from_pagenr(mfn, d)
1807 : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
1808 if ( unlikely(!okay) )
1810 MEM_LOG("Error while installing new baseptr %lx", mfn);
1811 return 0;
1814 invalidate_shadow_ldt(v);
1816 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1818 v->arch.guest_table = pagetable_from_pfn(mfn);
1819 update_cr3(v);
1821 write_ptbase(v);
1823 if ( likely(old_base_mfn != 0) )
1825 if ( paging_mode_refcounts(d) )
1826 put_page(mfn_to_page(old_base_mfn));
1827 else
1828 put_page_and_type(mfn_to_page(old_base_mfn));
1831 return 1;
1834 static void process_deferred_ops(void)
1836 unsigned int deferred_ops;
1837 struct domain *d = current->domain;
1838 struct percpu_mm_info *info = &this_cpu(percpu_mm_info);
1840 deferred_ops = info->deferred_ops;
1841 info->deferred_ops = 0;
1843 if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) )
1845 if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
1846 flush_tlb_mask(d->domain_dirty_cpumask);
1847 else
1848 local_flush_tlb();
1851 if ( deferred_ops & DOP_RELOAD_LDT )
1852 (void)map_ldt_shadow_page(0);
1854 if ( unlikely(info->foreign != NULL) )
1856 put_domain(info->foreign);
1857 info->foreign = NULL;
1861 static int set_foreigndom(domid_t domid)
1863 struct domain *e, *d = current->domain;
1864 struct percpu_mm_info *info = &this_cpu(percpu_mm_info);
1865 int okay = 1;
1867 ASSERT(info->foreign == NULL);
1869 if ( likely(domid == DOMID_SELF) )
1870 goto out;
1872 if ( unlikely(domid == d->domain_id) )
1874 MEM_LOG("Dom %u tried to specify itself as foreign domain",
1875 d->domain_id);
1876 okay = 0;
1878 else if ( unlikely(paging_mode_translate(d)) )
1880 MEM_LOG("Cannot mix foreign mappings with translated domains");
1881 okay = 0;
1883 else if ( !IS_PRIV(d) )
1885 switch ( domid )
1887 case DOMID_IO:
1888 get_knownalive_domain(dom_io);
1889 info->foreign = dom_io;
1890 break;
1891 default:
1892 MEM_LOG("Dom %u cannot set foreign dom", d->domain_id);
1893 okay = 0;
1894 break;
1897 else
1899 info->foreign = e = get_domain_by_id(domid);
1900 if ( e == NULL )
1902 switch ( domid )
1904 case DOMID_XEN:
1905 get_knownalive_domain(dom_xen);
1906 info->foreign = dom_xen;
1907 break;
1908 case DOMID_IO:
1909 get_knownalive_domain(dom_io);
1910 info->foreign = dom_io;
1911 break;
1912 default:
1913 MEM_LOG("Unknown domain '%u'", domid);
1914 okay = 0;
1915 break;
1920 out:
1921 return okay;
1924 static inline cpumask_t vcpumask_to_pcpumask(
1925 struct domain *d, unsigned long vmask)
1927 unsigned int vcpu_id;
1928 cpumask_t pmask = CPU_MASK_NONE;
1929 struct vcpu *v;
1931 while ( vmask != 0 )
1933 vcpu_id = find_first_set_bit(vmask);
1934 vmask &= ~(1UL << vcpu_id);
1935 if ( (vcpu_id < MAX_VIRT_CPUS) &&
1936 ((v = d->vcpu[vcpu_id]) != NULL) )
1937 cpus_or(pmask, pmask, v->vcpu_dirty_cpumask);
1940 return pmask;
1943 int do_mmuext_op(
1944 XEN_GUEST_HANDLE(mmuext_op_t) uops,
1945 unsigned int count,
1946 XEN_GUEST_HANDLE(uint) pdone,
1947 unsigned int foreigndom)
1949 struct mmuext_op op;
1950 int rc = 0, i = 0, okay;
1951 unsigned long mfn = 0, gmfn = 0, type;
1952 unsigned int done = 0;
1953 struct page_info *page;
1954 struct vcpu *v = current;
1955 struct domain *d = v->domain;
1957 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
1959 count &= ~MMU_UPDATE_PREEMPTED;
1960 if ( unlikely(!guest_handle_is_null(pdone)) )
1961 (void)copy_from_guest(&done, pdone, 1);
1964 if ( unlikely(!guest_handle_okay(uops, count)) )
1966 rc = -EFAULT;
1967 goto out;
1970 if ( !set_foreigndom(foreigndom) )
1972 rc = -ESRCH;
1973 goto out;
1976 LOCK_BIGLOCK(d);
1978 for ( i = 0; i < count; i++ )
1980 if ( hypercall_preempt_check() )
1982 rc = hypercall_create_continuation(
1983 __HYPERVISOR_mmuext_op, "hihi",
1984 uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
1985 break;
1988 if ( unlikely(__copy_from_guest(&op, uops, 1) != 0) )
1990 MEM_LOG("Bad __copy_from_guest");
1991 rc = -EFAULT;
1992 break;
1995 okay = 1;
1996 gmfn = op.arg1.mfn;
1997 mfn = gmfn_to_mfn(FOREIGNDOM, gmfn);
1998 page = mfn_to_page(mfn);
2000 switch ( op.cmd )
2002 case MMUEXT_PIN_L1_TABLE:
2003 type = PGT_l1_page_table;
2004 goto pin_page;
2006 case MMUEXT_PIN_L2_TABLE:
2007 type = PGT_l2_page_table;
2008 goto pin_page;
2010 case MMUEXT_PIN_L3_TABLE:
2011 type = PGT_l3_page_table;
2012 goto pin_page;
2014 case MMUEXT_PIN_L4_TABLE:
2015 if ( IS_COMPAT(FOREIGNDOM) )
2016 break;
2017 type = PGT_l4_page_table;
2019 pin_page:
2020 /* Ignore pinning of invalid paging levels. */
2021 if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) )
2022 break;
2024 if ( paging_mode_refcounts(FOREIGNDOM) )
2025 break;
2027 okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
2028 if ( unlikely(!okay) )
2030 MEM_LOG("Error while pinning mfn %lx", mfn);
2031 break;
2034 if ( unlikely(test_and_set_bit(_PGT_pinned,
2035 &page->u.inuse.type_info)) )
2037 MEM_LOG("Mfn %lx already pinned", mfn);
2038 put_page_and_type(page);
2039 okay = 0;
2040 break;
2043 /* A page is dirtied when its pin status is set. */
2044 mark_dirty(d, mfn);
2046 break;
2048 case MMUEXT_UNPIN_TABLE:
2049 if ( paging_mode_refcounts(d) )
2050 break;
2052 if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) )
2054 MEM_LOG("Mfn %lx bad domain (dom=%p)",
2055 mfn, page_get_owner(page));
2057 else if ( likely(test_and_clear_bit(_PGT_pinned,
2058 &page->u.inuse.type_info)) )
2060 put_page_and_type(page);
2061 put_page(page);
2062 /* A page is dirtied when its pin status is cleared. */
2063 mark_dirty(d, mfn);
2065 else
2067 okay = 0;
2068 put_page(page);
2069 MEM_LOG("Mfn %lx not pinned", mfn);
2071 break;
2073 case MMUEXT_NEW_BASEPTR:
2074 okay = new_guest_cr3(mfn);
2075 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB;
2076 break;
2078 #ifdef __x86_64__
2079 case MMUEXT_NEW_USER_BASEPTR: {
2080 unsigned long old_mfn;
2082 if ( mfn != 0 )
2084 if ( paging_mode_refcounts(d) )
2085 okay = get_page_from_pagenr(mfn, d);
2086 else
2087 okay = get_page_and_type_from_pagenr(
2088 mfn, PGT_root_page_table, d);
2089 if ( unlikely(!okay) )
2091 MEM_LOG("Error while installing new mfn %lx", mfn);
2092 break;
2096 old_mfn = pagetable_get_pfn(v->arch.guest_table_user);
2097 v->arch.guest_table_user = pagetable_from_pfn(mfn);
2099 if ( old_mfn != 0 )
2101 if ( paging_mode_refcounts(d) )
2102 put_page(mfn_to_page(old_mfn));
2103 else
2104 put_page_and_type(mfn_to_page(old_mfn));
2107 break;
2109 #endif
2111 case MMUEXT_TLB_FLUSH_LOCAL:
2112 this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
2113 break;
2115 case MMUEXT_INVLPG_LOCAL:
2116 if ( !paging_mode_enabled(d)
2117 || paging_invlpg(v, op.arg1.linear_addr) != 0 )
2118 local_flush_tlb_one(op.arg1.linear_addr);
2119 break;
2121 case MMUEXT_TLB_FLUSH_MULTI:
2122 case MMUEXT_INVLPG_MULTI:
2124 unsigned long vmask;
2125 cpumask_t pmask;
2126 if ( unlikely(copy_from_guest(&vmask, op.arg2.vcpumask, 1)) )
2128 okay = 0;
2129 break;
2131 pmask = vcpumask_to_pcpumask(d, vmask);
2132 if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
2133 flush_tlb_mask(pmask);
2134 else
2135 flush_tlb_one_mask(pmask, op.arg1.linear_addr);
2136 break;
2139 case MMUEXT_TLB_FLUSH_ALL:
2140 flush_tlb_mask(d->domain_dirty_cpumask);
2141 break;
2143 case MMUEXT_INVLPG_ALL:
2144 flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr);
2145 break;
2147 case MMUEXT_FLUSH_CACHE:
2148 if ( unlikely(!cache_flush_permitted(d)) )
2150 MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
2151 okay = 0;
2153 else
2155 wbinvd();
2157 break;
2159 case MMUEXT_SET_LDT:
2161 unsigned long ptr = op.arg1.linear_addr;
2162 unsigned long ents = op.arg2.nr_ents;
2164 if ( paging_mode_external(d) )
2166 MEM_LOG("ignoring SET_LDT hypercall from external "
2167 "domain %u", d->domain_id);
2168 okay = 0;
2170 else if ( ((ptr & (PAGE_SIZE-1)) != 0) ||
2171 (ents > 8192) ||
2172 !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) )
2174 okay = 0;
2175 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
2177 else if ( (v->arch.guest_context.ldt_ents != ents) ||
2178 (v->arch.guest_context.ldt_base != ptr) )
2180 invalidate_shadow_ldt(v);
2181 v->arch.guest_context.ldt_base = ptr;
2182 v->arch.guest_context.ldt_ents = ents;
2183 load_LDT(v);
2184 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT;
2185 if ( ents != 0 )
2186 this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT;
2188 break;
2191 default:
2192 MEM_LOG("Invalid extended pt command 0x%x", op.cmd);
2193 rc = -ENOSYS;
2194 okay = 0;
2195 break;
2198 if ( unlikely(!okay) )
2200 rc = rc ? rc : -EINVAL;
2201 break;
2204 guest_handle_add_offset(uops, 1);
2207 process_deferred_ops();
2209 UNLOCK_BIGLOCK(d);
2211 out:
2212 /* Add incremental work we have done to the @done output parameter. */
2213 if ( unlikely(!guest_handle_is_null(pdone)) )
2215 done += i;
2216 copy_to_guest(pdone, &done, 1);
2219 return rc;
2222 int do_mmu_update(
2223 XEN_GUEST_HANDLE(mmu_update_t) ureqs,
2224 unsigned int count,
2225 XEN_GUEST_HANDLE(uint) pdone,
2226 unsigned int foreigndom)
2228 struct mmu_update req;
2229 void *va;
2230 unsigned long gpfn, gmfn, mfn;
2231 struct page_info *page;
2232 int rc = 0, okay = 1, i = 0;
2233 unsigned int cmd, done = 0;
2234 struct vcpu *v = current;
2235 struct domain *d = v->domain;
2236 unsigned long type_info;
2237 struct domain_mmap_cache mapcache, sh_mapcache;
2239 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
2241 count &= ~MMU_UPDATE_PREEMPTED;
2242 if ( unlikely(!guest_handle_is_null(pdone)) )
2243 (void)copy_from_guest(&done, pdone, 1);
2246 if ( unlikely(!guest_handle_okay(ureqs, count)) )
2248 rc = -EFAULT;
2249 goto out;
2252 if ( !set_foreigndom(foreigndom) )
2254 rc = -ESRCH;
2255 goto out;
2258 domain_mmap_cache_init(&mapcache);
2259 domain_mmap_cache_init(&sh_mapcache);
2261 perfc_incrc(calls_to_mmu_update);
2262 perfc_addc(num_page_updates, count);
2264 LOCK_BIGLOCK(d);
2266 for ( i = 0; i < count; i++ )
2268 if ( hypercall_preempt_check() )
2270 rc = hypercall_create_continuation(
2271 __HYPERVISOR_mmu_update, "hihi",
2272 ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
2273 break;
2276 if ( unlikely(__copy_from_guest(&req, ureqs, 1) != 0) )
2278 MEM_LOG("Bad __copy_from_guest");
2279 rc = -EFAULT;
2280 break;
2283 cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
2284 okay = 0;
2286 switch ( cmd )
2288 /*
2289 * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
2290 */
2291 case MMU_NORMAL_PT_UPDATE:
2293 gmfn = req.ptr >> PAGE_SHIFT;
2294 mfn = gmfn_to_mfn(d, gmfn);
2296 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2298 MEM_LOG("Could not get page for normal update");
2299 break;
2302 va = map_domain_page_with_cache(mfn, &mapcache);
2303 va = (void *)((unsigned long)va +
2304 (unsigned long)(req.ptr & ~PAGE_MASK));
2305 page = mfn_to_page(mfn);
2307 switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
2309 case PGT_l1_page_table:
2310 case PGT_l2_page_table:
2311 case PGT_l3_page_table:
2312 case PGT_l4_page_table:
2314 if ( paging_mode_refcounts(d) )
2316 MEM_LOG("mmu update on auto-refcounted domain!");
2317 break;
2320 if ( unlikely(!get_page_type(
2321 page, type_info & (PGT_type_mask|PGT_pae_xen_l2))) )
2322 goto not_a_pt;
2324 switch ( type_info & PGT_type_mask )
2326 case PGT_l1_page_table:
2328 l1_pgentry_t l1e = l1e_from_intpte(req.val);
2329 okay = mod_l1_entry(va, l1e, mfn);
2331 break;
2332 case PGT_l2_page_table:
2334 l2_pgentry_t l2e = l2e_from_intpte(req.val);
2335 okay = mod_l2_entry(va, l2e, mfn, type_info);
2337 break;
2338 #if CONFIG_PAGING_LEVELS >= 3
2339 case PGT_l3_page_table:
2341 l3_pgentry_t l3e = l3e_from_intpte(req.val);
2342 okay = mod_l3_entry(va, l3e, mfn);
2344 break;
2345 #endif
2346 #if CONFIG_PAGING_LEVELS >= 4
2347 case PGT_l4_page_table:
2349 l4_pgentry_t l4e = l4e_from_intpte(req.val);
2350 okay = mod_l4_entry(d, va, l4e, mfn);
2352 break;
2353 #endif
2356 put_page_type(page);
2358 break;
2360 default:
2361 not_a_pt:
2363 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
2364 break;
2366 okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
2368 put_page_type(page);
2370 break;
2373 unmap_domain_page_with_cache(va, &mapcache);
2375 put_page(page);
2376 break;
2378 case MMU_MACHPHYS_UPDATE:
2380 mfn = req.ptr >> PAGE_SHIFT;
2381 gpfn = req.val;
2383 if ( unlikely(!get_page_from_pagenr(mfn, FOREIGNDOM)) )
2385 MEM_LOG("Could not get page for mach->phys update");
2386 break;
2389 if ( unlikely(paging_mode_translate(FOREIGNDOM)) )
2391 MEM_LOG("Mach-phys update on auto-translate guest");
2392 break;
2395 set_gpfn_from_mfn(mfn, gpfn);
2396 okay = 1;
2398 mark_dirty(FOREIGNDOM, mfn);
2400 put_page(mfn_to_page(mfn));
2401 break;
2403 default:
2404 MEM_LOG("Invalid page update command %x", cmd);
2405 rc = -ENOSYS;
2406 okay = 0;
2407 break;
2410 if ( unlikely(!okay) )
2412 rc = rc ? rc : -EINVAL;
2413 break;
2416 guest_handle_add_offset(ureqs, 1);
2419 domain_mmap_cache_destroy(&mapcache);
2420 domain_mmap_cache_destroy(&sh_mapcache);
2422 process_deferred_ops();
2424 UNLOCK_BIGLOCK(d);
2426 out:
2427 /* Add incremental work we have done to the @done output parameter. */
2428 if ( unlikely(!guest_handle_is_null(pdone)) )
2430 done += i;
2431 copy_to_guest(pdone, &done, 1);
2434 return rc;
2438 static int create_grant_pte_mapping(
2439 uint64_t pte_addr, l1_pgentry_t nl1e, struct vcpu *v)
2441 int rc = GNTST_okay;
2442 void *va;
2443 unsigned long gmfn, mfn;
2444 struct page_info *page;
2445 u32 type;
2446 l1_pgentry_t ol1e;
2447 struct domain *d = v->domain;
2449 ASSERT(spin_is_locked(&d->big_lock));
2451 adjust_guest_l1e(nl1e, d);
2453 gmfn = pte_addr >> PAGE_SHIFT;
2454 mfn = gmfn_to_mfn(d, gmfn);
2456 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2458 MEM_LOG("Could not get page for normal update");
2459 return GNTST_general_error;
2462 va = map_domain_page(mfn);
2463 va = (void *)((unsigned long)va + ((unsigned long)pte_addr & ~PAGE_MASK));
2464 page = mfn_to_page(mfn);
2466 type = page->u.inuse.type_info & PGT_type_mask;
2467 if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
2469 MEM_LOG("Grant map attempted to update a non-L1 page");
2470 rc = GNTST_general_error;
2471 goto failed;
2474 ol1e = *(l1_pgentry_t *)va;
2475 if ( !UPDATE_ENTRY(l1, va, ol1e, nl1e, mfn, v) )
2477 put_page_type(page);
2478 rc = GNTST_general_error;
2479 goto failed;
2482 if ( !paging_mode_refcounts(d) )
2483 put_page_from_l1e(ol1e, d);
2485 put_page_type(page);
2487 failed:
2488 unmap_domain_page(va);
2489 put_page(page);
2491 return rc;
2494 static int destroy_grant_pte_mapping(
2495 uint64_t addr, unsigned long frame, struct domain *d)
2497 int rc = GNTST_okay;
2498 void *va;
2499 unsigned long gmfn, mfn;
2500 struct page_info *page;
2501 u32 type;
2502 l1_pgentry_t ol1e;
2504 gmfn = addr >> PAGE_SHIFT;
2505 mfn = gmfn_to_mfn(d, gmfn);
2507 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2509 MEM_LOG("Could not get page for normal update");
2510 return GNTST_general_error;
2513 va = map_domain_page(mfn);
2514 va = (void *)((unsigned long)va + ((unsigned long)addr & ~PAGE_MASK));
2515 page = mfn_to_page(mfn);
2517 type = page->u.inuse.type_info & PGT_type_mask;
2518 if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
2520 MEM_LOG("Grant map attempted to update a non-L1 page");
2521 rc = GNTST_general_error;
2522 goto failed;
2525 if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) )
2527 put_page_type(page);
2528 rc = GNTST_general_error;
2529 goto failed;
2532 /* Check that the virtual address supplied is actually mapped to frame. */
2533 if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
2535 MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame %lx",
2536 (unsigned long)l1e_get_intpte(ol1e), addr, frame);
2537 put_page_type(page);
2538 rc = GNTST_general_error;
2539 goto failed;
2542 /* Delete pagetable entry. */
2543 if ( unlikely(!UPDATE_ENTRY(l1,
2544 (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn,
2545 d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) )
2547 MEM_LOG("Cannot delete PTE entry at %p", va);
2548 put_page_type(page);
2549 rc = GNTST_general_error;
2550 goto failed;
2553 put_page_type(page);
2555 failed:
2556 unmap_domain_page(va);
2557 put_page(page);
2558 return rc;
2562 static int create_grant_va_mapping(
2563 unsigned long va, l1_pgentry_t nl1e, struct vcpu *v)
2565 l1_pgentry_t *pl1e, ol1e;
2566 struct domain *d = v->domain;
2567 unsigned long gl1mfn;
2568 int okay;
2570 ASSERT(spin_is_locked(&d->big_lock));
2572 adjust_guest_l1e(nl1e, d);
2574 pl1e = guest_map_l1e(v, va, &gl1mfn);
2575 if ( !pl1e )
2577 MEM_LOG("Could not find L1 PTE for address %lx", va);
2578 return GNTST_general_error;
2580 ol1e = *pl1e;
2581 okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v);
2582 guest_unmap_l1e(v, pl1e);
2583 pl1e = NULL;
2585 if ( !okay )
2586 return GNTST_general_error;
2588 if ( !paging_mode_refcounts(d) )
2589 put_page_from_l1e(ol1e, d);
2591 return GNTST_okay;
2594 static int destroy_grant_va_mapping(
2595 unsigned long addr, unsigned long frame, struct vcpu *v)
2597 l1_pgentry_t *pl1e, ol1e;
2598 unsigned long gl1mfn;
2599 int rc = 0;
2601 pl1e = guest_map_l1e(v, addr, &gl1mfn);
2602 if ( !pl1e )
2604 MEM_LOG("Could not find L1 PTE for address %lx", addr);
2605 return GNTST_general_error;
2607 ol1e = *pl1e;
2609 /* Check that the virtual address supplied is actually mapped to frame. */
2610 if ( unlikely(l1e_get_pfn(ol1e) != frame) )
2612 MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
2613 l1e_get_pfn(ol1e), addr, frame);
2614 rc = GNTST_general_error;
2615 goto out;
2618 /* Delete pagetable entry. */
2619 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
2621 MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
2622 rc = GNTST_general_error;
2623 goto out;
2626 out:
2627 guest_unmap_l1e(v, pl1e);
2628 return rc;
2631 int create_grant_host_mapping(
2632 uint64_t addr, unsigned long frame, unsigned int flags)
2634 l1_pgentry_t pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS);
2636 if ( (flags & GNTMAP_application_map) )
2637 l1e_add_flags(pte,_PAGE_USER);
2638 if ( !(flags & GNTMAP_readonly) )
2639 l1e_add_flags(pte,_PAGE_RW);
2641 if ( flags & GNTMAP_contains_pte )
2642 return create_grant_pte_mapping(addr, pte, current);
2643 return create_grant_va_mapping(addr, pte, current);
2646 int destroy_grant_host_mapping(
2647 uint64_t addr, unsigned long frame, unsigned int flags)
2649 if ( flags & GNTMAP_contains_pte )
2650 return destroy_grant_pte_mapping(addr, frame, current->domain);
2651 return destroy_grant_va_mapping(addr, frame, current);
2654 int steal_page(
2655 struct domain *d, struct page_info *page, unsigned int memflags)
2657 u32 _d, _nd, x, y;
2659 spin_lock(&d->page_alloc_lock);
2661 /*
2662 * The tricky bit: atomically release ownership while there is just one
2663 * benign reference to the page (PGC_allocated). If that reference
2664 * disappears then the deallocation routine will safely spin.
2665 */
2666 _d = pickle_domptr(d);
2667 _nd = page->u.inuse._domain;
2668 y = page->count_info;
2669 do {
2670 x = y;
2671 if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
2672 (1 | PGC_allocated)) || unlikely(_nd != _d)) {
2673 MEM_LOG("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p,"
2674 " caf=%08x, taf=%" PRtype_info "\n",
2675 (void *) page_to_mfn(page),
2676 d, d->domain_id, unpickle_domptr(_nd), x,
2677 page->u.inuse.type_info);
2678 spin_unlock(&d->page_alloc_lock);
2679 return -1;
2681 __asm__ __volatile__(
2682 LOCK_PREFIX "cmpxchg8b %2"
2683 : "=d" (_nd), "=a" (y),
2684 "=m" (*(volatile u64 *)(&page->count_info))
2685 : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
2686 } while (unlikely(_nd != _d) || unlikely(y != x));
2688 /*
2689 * Unlink from 'd'. At least one reference remains (now anonymous), so
2690 * noone else is spinning to try to delete this page from 'd'.
2691 */
2692 if ( !(memflags & MEMF_no_refcount) )
2693 d->tot_pages--;
2694 list_del(&page->list);
2696 spin_unlock(&d->page_alloc_lock);
2698 return 0;
2701 int do_update_va_mapping(unsigned long va, u64 val64,
2702 unsigned long flags)
2704 l1_pgentry_t val = l1e_from_intpte(val64);
2705 struct vcpu *v = current;
2706 struct domain *d = v->domain;
2707 l1_pgentry_t *pl1e;
2708 unsigned long vmask, bmap_ptr, gl1mfn;
2709 cpumask_t pmask;
2710 int rc = 0;
2712 perfc_incrc(calls_to_update_va);
2714 if ( unlikely(!__addr_ok(va) && !paging_mode_external(d)) )
2715 return -EINVAL;
2717 LOCK_BIGLOCK(d);
2719 pl1e = guest_map_l1e(v, va, &gl1mfn);
2721 if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn)) )
2722 rc = -EINVAL;
2724 if ( pl1e )
2725 guest_unmap_l1e(v, pl1e);
2726 pl1e = NULL;
2728 switch ( flags & UVMF_FLUSHTYPE_MASK )
2730 case UVMF_TLB_FLUSH:
2731 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
2733 case UVMF_LOCAL:
2734 local_flush_tlb();
2735 break;
2736 case UVMF_ALL:
2737 flush_tlb_mask(d->domain_dirty_cpumask);
2738 break;
2739 default:
2740 if ( unlikely(!IS_COMPAT(d) ?
2741 get_user(vmask, (unsigned long *)bmap_ptr) :
2742 get_user(vmask, (unsigned int *)bmap_ptr)) )
2743 rc = -EFAULT;
2744 pmask = vcpumask_to_pcpumask(d, vmask);
2745 flush_tlb_mask(pmask);
2746 break;
2748 break;
2750 case UVMF_INVLPG:
2751 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
2753 case UVMF_LOCAL:
2754 if ( !paging_mode_enabled(d)
2755 || (paging_invlpg(current, va) != 0) )
2756 local_flush_tlb_one(va);
2757 break;
2758 case UVMF_ALL:
2759 flush_tlb_one_mask(d->domain_dirty_cpumask, va);
2760 break;
2761 default:
2762 if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
2763 rc = -EFAULT;
2764 pmask = vcpumask_to_pcpumask(d, vmask);
2765 flush_tlb_one_mask(pmask, va);
2766 break;
2768 break;
2771 process_deferred_ops();
2773 UNLOCK_BIGLOCK(d);
2775 return rc;
2778 int do_update_va_mapping_otherdomain(unsigned long va, u64 val64,
2779 unsigned long flags,
2780 domid_t domid)
2782 int rc;
2784 if ( unlikely(!IS_PRIV(current->domain)) )
2785 return -EPERM;
2787 if ( !set_foreigndom(domid) )
2788 return -ESRCH;
2790 rc = do_update_va_mapping(va, val64, flags);
2792 return rc;
2797 /*************************
2798 * Descriptor Tables
2799 */
2801 void destroy_gdt(struct vcpu *v)
2803 int i;
2804 unsigned long pfn;
2806 v->arch.guest_context.gdt_ents = 0;
2807 for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ )
2809 if ( (pfn = l1e_get_pfn(v->arch.perdomain_ptes[i])) != 0 )
2810 put_page_and_type(mfn_to_page(pfn));
2811 l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
2812 v->arch.guest_context.gdt_frames[i] = 0;
2817 long set_gdt(struct vcpu *v,
2818 unsigned long *frames,
2819 unsigned int entries)
2821 struct domain *d = v->domain;
2822 /* NB. There are 512 8-byte entries per GDT page. */
2823 int i, nr_pages = (entries + 511) / 512;
2824 unsigned long mfn;
2826 if ( entries > FIRST_RESERVED_GDT_ENTRY )
2827 return -EINVAL;
2829 /* Check the pages in the new GDT. */
2830 for ( i = 0; i < nr_pages; i++ ) {
2831 mfn = frames[i] = gmfn_to_mfn(d, frames[i]);
2832 if ( !mfn_valid(mfn) ||
2833 !get_page_and_type(mfn_to_page(mfn), d, PGT_gdt_page) )
2834 goto fail;
2837 /* Tear down the old GDT. */
2838 destroy_gdt(v);
2840 /* Install the new GDT. */
2841 v->arch.guest_context.gdt_ents = entries;
2842 for ( i = 0; i < nr_pages; i++ )
2844 v->arch.guest_context.gdt_frames[i] = frames[i];
2845 l1e_write(&v->arch.perdomain_ptes[i],
2846 l1e_from_pfn(frames[i], __PAGE_HYPERVISOR));
2849 return 0;
2851 fail:
2852 while ( i-- > 0 )
2853 put_page_and_type(mfn_to_page(frames[i]));
2854 return -EINVAL;
2858 long do_set_gdt(XEN_GUEST_HANDLE(ulong) frame_list, unsigned int entries)
2860 int nr_pages = (entries + 511) / 512;
2861 unsigned long frames[16];
2862 long ret;
2864 /* Rechecked in set_gdt, but ensures a sane limit for copy_from_user(). */
2865 if ( entries > FIRST_RESERVED_GDT_ENTRY )
2866 return -EINVAL;
2868 if ( copy_from_guest((unsigned long *)frames, frame_list, nr_pages) )
2869 return -EFAULT;
2871 LOCK_BIGLOCK(current->domain);
2873 if ( (ret = set_gdt(current, frames, entries)) == 0 )
2874 local_flush_tlb();
2876 UNLOCK_BIGLOCK(current->domain);
2878 return ret;
2882 long do_update_descriptor(u64 pa, u64 desc)
2884 struct domain *dom = current->domain;
2885 unsigned long gmfn = pa >> PAGE_SHIFT;
2886 unsigned long mfn;
2887 unsigned int offset;
2888 struct desc_struct *gdt_pent, d;
2889 struct page_info *page;
2890 long ret = -EINVAL;
2892 offset = ((unsigned int)pa & ~PAGE_MASK) / sizeof(struct desc_struct);
2894 *(u64 *)&d = desc;
2896 LOCK_BIGLOCK(dom);
2898 mfn = gmfn_to_mfn(dom, gmfn);
2899 if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
2900 !mfn_valid(mfn) ||
2901 !check_descriptor(dom, &d) )
2903 UNLOCK_BIGLOCK(dom);
2904 return -EINVAL;
2907 page = mfn_to_page(mfn);
2908 if ( unlikely(!get_page(page, dom)) )
2910 UNLOCK_BIGLOCK(dom);
2911 return -EINVAL;
2914 /* Check if the given frame is in use in an unsafe context. */
2915 switch ( page->u.inuse.type_info & PGT_type_mask )
2917 case PGT_gdt_page:
2918 if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
2919 goto out;
2920 break;
2921 case PGT_ldt_page:
2922 if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
2923 goto out;
2924 break;
2925 default:
2926 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
2927 goto out;
2928 break;
2931 mark_dirty(dom, mfn);
2933 /* All is good so make the update. */
2934 gdt_pent = map_domain_page(mfn);
2935 memcpy(&gdt_pent[offset], &d, 8);
2936 unmap_domain_page(gdt_pent);
2938 put_page_type(page);
2940 ret = 0; /* success */
2942 out:
2943 put_page(page);
2945 UNLOCK_BIGLOCK(dom);
2947 return ret;
2950 typedef struct e820entry e820entry_t;
2951 DEFINE_XEN_GUEST_HANDLE(e820entry_t);
2953 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
2955 switch ( op )
2957 case XENMEM_add_to_physmap:
2959 struct xen_add_to_physmap xatp;
2960 unsigned long prev_mfn, mfn = 0, gpfn;
2961 struct domain *d;
2963 if ( copy_from_guest(&xatp, arg, 1) )
2964 return -EFAULT;
2966 if ( xatp.domid == DOMID_SELF )
2967 d = rcu_lock_current_domain();
2968 else if ( !IS_PRIV(current->domain) )
2969 return -EPERM;
2970 else if ( (d = rcu_lock_domain_by_id(xatp.domid)) == NULL )
2971 return -ESRCH;
2973 switch ( xatp.space )
2975 case XENMAPSPACE_shared_info:
2976 if ( xatp.idx == 0 )
2977 mfn = virt_to_mfn(d->shared_info);
2978 break;
2979 case XENMAPSPACE_grant_table:
2980 spin_lock(&d->grant_table->lock);
2982 if ( (xatp.idx >= nr_grant_frames(d->grant_table)) &&
2983 (xatp.idx < max_nr_grant_frames) )
2984 gnttab_grow_table(d, xatp.idx + 1);
2986 if ( xatp.idx < nr_grant_frames(d->grant_table) )
2987 mfn = virt_to_mfn(d->grant_table->shared[xatp.idx]);
2989 spin_unlock(&d->grant_table->lock);
2990 break;
2991 default:
2992 break;
2995 if ( !paging_mode_translate(d) || (mfn == 0) )
2997 rcu_unlock_domain(d);
2998 return -EINVAL;
3001 LOCK_BIGLOCK(d);
3003 /* Remove previously mapped page if it was present. */
3004 prev_mfn = gmfn_to_mfn(d, xatp.gpfn);
3005 if ( mfn_valid(prev_mfn) )
3007 if ( IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn)) )
3008 /* Xen heap frames are simply unhooked from this phys slot. */
3009 guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
3010 else
3011 /* Normal domain memory is freed, to avoid leaking memory. */
3012 guest_remove_page(d, xatp.gpfn);
3015 /* Unmap from old location, if any. */
3016 gpfn = get_gpfn_from_mfn(mfn);
3017 if ( gpfn != INVALID_M2P_ENTRY )
3018 guest_physmap_remove_page(d, gpfn, mfn);
3020 /* Map at new location. */
3021 guest_physmap_add_page(d, xatp.gpfn, mfn);
3023 UNLOCK_BIGLOCK(d);
3025 rcu_unlock_domain(d);
3027 break;
3030 case XENMEM_set_memory_map:
3032 struct xen_foreign_memory_map fmap;
3033 struct domain *d;
3034 int rc;
3036 if ( copy_from_guest(&fmap, arg, 1) )
3037 return -EFAULT;
3039 if ( fmap.map.nr_entries > ARRAY_SIZE(d->arch.e820) )
3040 return -EINVAL;
3042 if ( fmap.domid == DOMID_SELF )
3043 d = rcu_lock_current_domain();
3044 else if ( !IS_PRIV(current->domain) )
3045 return -EPERM;
3046 else if ( (d = rcu_lock_domain_by_id(fmap.domid)) == NULL )
3047 return -ESRCH;
3049 rc = copy_from_guest(&d->arch.e820[0], fmap.map.buffer,
3050 fmap.map.nr_entries) ? -EFAULT : 0;
3051 d->arch.nr_e820 = fmap.map.nr_entries;
3053 rcu_unlock_domain(d);
3054 return rc;
3057 case XENMEM_memory_map:
3059 struct xen_memory_map map;
3060 struct domain *d = current->domain;
3062 /* Backwards compatibility. */
3063 if ( d->arch.nr_e820 == 0 )
3064 return -ENOSYS;
3066 if ( copy_from_guest(&map, arg, 1) )
3067 return -EFAULT;
3069 map.nr_entries = min(map.nr_entries, d->arch.nr_e820);
3070 if ( copy_to_guest(map.buffer, &d->arch.e820[0], map.nr_entries) ||
3071 copy_to_guest(arg, &map, 1) )
3072 return -EFAULT;
3074 return 0;
3077 case XENMEM_machine_memory_map:
3079 struct xen_memory_map memmap;
3080 XEN_GUEST_HANDLE(e820entry_t) buffer;
3081 int count;
3083 if ( !IS_PRIV(current->domain) )
3084 return -EINVAL;
3086 if ( copy_from_guest(&memmap, arg, 1) )
3087 return -EFAULT;
3088 if ( memmap.nr_entries < e820.nr_map + 1 )
3089 return -EINVAL;
3091 buffer = guest_handle_cast(memmap.buffer, e820entry_t);
3093 count = min((unsigned int)e820.nr_map, memmap.nr_entries);
3094 if ( copy_to_guest(buffer, &e820.map[0], count) < 0 )
3095 return -EFAULT;
3097 memmap.nr_entries = count;
3099 if ( copy_to_guest(arg, &memmap, 1) )
3100 return -EFAULT;
3102 return 0;
3105 case XENMEM_machphys_mapping:
3107 struct xen_machphys_mapping mapping = {
3108 .v_start = MACH2PHYS_VIRT_START,
3109 .v_end = MACH2PHYS_VIRT_END,
3110 .max_mfn = MACH2PHYS_NR_ENTRIES - 1
3111 };
3113 if ( copy_to_guest(arg, &mapping, 1) )
3114 return -EFAULT;
3116 return 0;
3119 default:
3120 return subarch_memory_op(op, arg);
3123 return 0;
3127 /*************************
3128 * Writable Pagetables
3129 */
3131 struct ptwr_emulate_ctxt {
3132 struct x86_emulate_ctxt ctxt;
3133 unsigned long cr2;
3134 l1_pgentry_t pte;
3135 };
3137 static int ptwr_emulated_read(
3138 enum x86_segment seg,
3139 unsigned long offset,
3140 unsigned long *val,
3141 unsigned int bytes,
3142 struct x86_emulate_ctxt *ctxt)
3144 unsigned int rc;
3145 unsigned long addr = offset;
3147 *val = 0;
3148 if ( (rc = copy_from_user((void *)val, (void *)addr, bytes)) != 0 )
3150 propagate_page_fault(addr + bytes - rc, 0); /* read fault */
3151 return X86EMUL_EXCEPTION;
3154 return X86EMUL_OKAY;
3157 static int ptwr_emulated_update(
3158 unsigned long addr,
3159 paddr_t old,
3160 paddr_t val,
3161 unsigned int bytes,
3162 unsigned int do_cmpxchg,
3163 struct ptwr_emulate_ctxt *ptwr_ctxt)
3165 unsigned long mfn;
3166 struct page_info *page;
3167 l1_pgentry_t pte, ol1e, nl1e, *pl1e;
3168 struct vcpu *v = current;
3169 struct domain *d = v->domain;
3171 /* Only allow naturally-aligned stores within the original %cr2 page. */
3172 if ( unlikely(((addr^ptwr_ctxt->cr2) & PAGE_MASK) || (addr & (bytes-1))) )
3174 MEM_LOG("Bad ptwr access (cr2=%lx, addr=%lx, bytes=%u)",
3175 ptwr_ctxt->cr2, addr, bytes);
3176 return X86EMUL_UNHANDLEABLE;
3179 /* Turn a sub-word access into a full-word access. */
3180 if ( bytes != sizeof(paddr_t) )
3182 paddr_t full;
3183 unsigned int rc, offset = addr & (sizeof(paddr_t)-1);
3185 /* Align address; read full word. */
3186 addr &= ~(sizeof(paddr_t)-1);
3187 if ( (rc = copy_from_user(&full, (void *)addr, sizeof(paddr_t))) != 0 )
3189 propagate_page_fault(addr+sizeof(paddr_t)-rc, 0); /* read fault */
3190 return X86EMUL_EXCEPTION;
3192 /* Mask out bits provided by caller. */
3193 full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
3194 /* Shift the caller value and OR in the missing bits. */
3195 val &= (((paddr_t)1 << (bytes*8)) - 1);
3196 val <<= (offset)*8;
3197 val |= full;
3198 /* Also fill in missing parts of the cmpxchg old value. */
3199 old &= (((paddr_t)1 << (bytes*8)) - 1);
3200 old <<= (offset)*8;
3201 old |= full;
3204 pte = ptwr_ctxt->pte;
3205 mfn = l1e_get_pfn(pte);
3206 page = mfn_to_page(mfn);
3208 /* We are looking only for read-only mappings of p.t. pages. */
3209 ASSERT((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) == _PAGE_PRESENT);
3210 ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_l1_page_table);
3211 ASSERT((page->u.inuse.type_info & PGT_count_mask) != 0);
3212 ASSERT(page_get_owner(page) == d);
3214 /* Check the new PTE. */
3215 nl1e = l1e_from_intpte(val);
3216 if ( unlikely(!get_page_from_l1e(gl1e_to_ml1e(d, nl1e), d)) )
3218 if ( (CONFIG_PAGING_LEVELS == 3 || IS_COMPAT(d)) &&
3219 (bytes == 4) && (addr & 4) && !do_cmpxchg &&
3220 (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
3222 /*
3223 * If this is an upper-half write to a PAE PTE then we assume that
3224 * the guest has simply got the two writes the wrong way round. We
3225 * zap the PRESENT bit on the assumption that the bottom half will
3226 * be written immediately after we return to the guest.
3227 */
3228 MEM_LOG("ptwr_emulate: fixing up invalid PAE PTE %"PRIpte,
3229 l1e_get_intpte(nl1e));
3230 l1e_remove_flags(nl1e, _PAGE_PRESENT);
3232 else
3234 MEM_LOG("ptwr_emulate: could not get_page_from_l1e()");
3235 return X86EMUL_UNHANDLEABLE;
3239 adjust_guest_l1e(nl1e, d);
3241 /* Checked successfully: do the update (write or cmpxchg). */
3242 pl1e = map_domain_page(page_to_mfn(page));
3243 pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
3244 if ( do_cmpxchg )
3246 int okay;
3247 intpte_t t = old;
3248 ol1e = l1e_from_intpte(old);
3250 okay = paging_cmpxchg_guest_entry(v, (intpte_t *) pl1e,
3251 &t, val, _mfn(mfn));
3252 okay = (okay && t == old);
3254 if ( !okay )
3256 unmap_domain_page(pl1e);
3257 put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d);
3258 return X86EMUL_CMPXCHG_FAILED;
3261 else
3263 ol1e = *pl1e;
3264 if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, page_to_mfn(page), v) )
3265 BUG();
3268 unmap_domain_page(pl1e);
3270 /* Finally, drop the old PTE. */
3271 put_page_from_l1e(gl1e_to_ml1e(d, ol1e), d);
3273 return X86EMUL_OKAY;
3276 static int ptwr_emulated_write(
3277 enum x86_segment seg,
3278 unsigned long offset,
3279 unsigned long val,
3280 unsigned int bytes,
3281 struct x86_emulate_ctxt *ctxt)
3283 return ptwr_emulated_update(
3284 offset, 0, val, bytes, 0,
3285 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3288 static int ptwr_emulated_cmpxchg(
3289 enum x86_segment seg,
3290 unsigned long offset,
3291 unsigned long old,
3292 unsigned long new,
3293 unsigned int bytes,
3294 struct x86_emulate_ctxt *ctxt)
3296 return ptwr_emulated_update(
3297 offset, old, new, bytes, 1,
3298 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3301 static int ptwr_emulated_cmpxchg8b(
3302 enum x86_segment seg,
3303 unsigned long offset,
3304 unsigned long old,
3305 unsigned long old_hi,
3306 unsigned long new,
3307 unsigned long new_hi,
3308 struct x86_emulate_ctxt *ctxt)
3310 if ( CONFIG_PAGING_LEVELS == 2 )
3311 return X86EMUL_UNHANDLEABLE;
3312 return ptwr_emulated_update(
3313 offset, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1,
3314 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3317 static struct x86_emulate_ops ptwr_emulate_ops = {
3318 .read = ptwr_emulated_read,
3319 .insn_fetch = ptwr_emulated_read,
3320 .write = ptwr_emulated_write,
3321 .cmpxchg = ptwr_emulated_cmpxchg,
3322 .cmpxchg8b = ptwr_emulated_cmpxchg8b
3323 };
3325 /* Write page fault handler: check if guest is trying to modify a PTE. */
3326 int ptwr_do_page_fault(struct vcpu *v, unsigned long addr,
3327 struct cpu_user_regs *regs)
3329 struct domain *d = v->domain;
3330 struct page_info *page;
3331 l1_pgentry_t pte;
3332 struct ptwr_emulate_ctxt ptwr_ctxt;
3333 int rc;
3335 LOCK_BIGLOCK(d);
3337 /*
3338 * Attempt to read the PTE that maps the VA being accessed. By checking for
3339 * PDE validity in the L2 we avoid many expensive fixups in __get_user().
3340 */
3341 guest_get_eff_l1e(v, addr, &pte);
3342 if ( !(l1e_get_flags(pte) & _PAGE_PRESENT) )
3343 goto bail;
3344 page = l1e_get_page(pte);
3346 /* We are looking only for read-only mappings of p.t. pages. */
3347 if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
3348 ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
3349 ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
3350 (page_get_owner(page) != d) )
3351 goto bail;
3353 ptwr_ctxt.ctxt.regs = regs;
3354 ptwr_ctxt.ctxt.addr_size = ptwr_ctxt.ctxt.sp_size =
3355 IS_COMPAT(d) ? 32 : BITS_PER_LONG;
3356 ptwr_ctxt.cr2 = addr;
3357 ptwr_ctxt.pte = pte;
3359 rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops);
3360 if ( rc == X86EMUL_UNHANDLEABLE )
3361 goto bail;
3363 UNLOCK_BIGLOCK(d);
3364 perfc_incrc(ptwr_emulations);
3365 return EXCRET_fault_fixed;
3367 bail:
3368 UNLOCK_BIGLOCK(d);
3369 return 0;
3372 int map_pages_to_xen(
3373 unsigned long virt,
3374 unsigned long mfn,
3375 unsigned long nr_mfns,
3376 unsigned long flags)
3378 l2_pgentry_t *pl2e, ol2e;
3379 l1_pgentry_t *pl1e, ol1e;
3380 unsigned int i;
3382 unsigned int map_small_pages = !!(flags & MAP_SMALL_PAGES);
3383 flags &= ~MAP_SMALL_PAGES;
3385 while ( nr_mfns != 0 )
3387 pl2e = virt_to_xen_l2e(virt);
3389 if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1<<PAGETABLE_ORDER)-1)) == 0) &&
3390 (nr_mfns >= (1<<PAGETABLE_ORDER)) &&
3391 !map_small_pages )
3393 /* Super-page mapping. */
3394 ol2e = *pl2e;
3395 l2e_write(pl2e, l2e_from_pfn(mfn, flags|_PAGE_PSE));
3397 if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
3399 local_flush_tlb_pge();
3400 if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) )
3401 free_xen_pagetable(mfn_to_virt(l2e_get_pfn(ol2e)));
3404 virt += 1UL << L2_PAGETABLE_SHIFT;
3405 mfn += 1UL << PAGETABLE_ORDER;
3406 nr_mfns -= 1UL << PAGETABLE_ORDER;
3408 else
3410 /* Normal page mapping. */
3411 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
3413 pl1e = alloc_xen_pagetable();
3414 clear_page(pl1e);
3415 l2e_write(pl2e, l2e_from_pfn(virt_to_mfn(pl1e),
3416 __PAGE_HYPERVISOR));
3418 else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
3420 pl1e = alloc_xen_pagetable();
3421 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
3422 l1e_write(&pl1e[i],
3423 l1e_from_pfn(l2e_get_pfn(*pl2e) + i,
3424 l2e_get_flags(*pl2e) & ~_PAGE_PSE));
3425 l2e_write(pl2e, l2e_from_pfn(virt_to_mfn(pl1e),
3426 __PAGE_HYPERVISOR));
3427 local_flush_tlb_pge();
3430 pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
3431 ol1e = *pl1e;
3432 l1e_write(pl1e, l1e_from_pfn(mfn, flags));
3433 if ( (l1e_get_flags(ol1e) & _PAGE_PRESENT) )
3434 local_flush_tlb_one(virt);
3436 virt += 1UL << L1_PAGETABLE_SHIFT;
3437 mfn += 1UL;
3438 nr_mfns -= 1UL;
3442 return 0;
3445 void __set_fixmap(
3446 enum fixed_addresses idx, unsigned long mfn, unsigned long flags)
3448 BUG_ON(idx >= __end_of_fixed_addresses);
3449 map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags);
3452 #ifdef MEMORY_GUARD
3454 void memguard_init(void)
3456 map_pages_to_xen(
3457 PAGE_OFFSET, 0, xenheap_phys_end >> PAGE_SHIFT,
3458 __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
3461 static void __memguard_change_range(void *p, unsigned long l, int guard)
3463 unsigned long _p = (unsigned long)p;
3464 unsigned long _l = (unsigned long)l;
3465 unsigned long flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES;
3467 /* Ensure we are dealing with a page-aligned whole number of pages. */
3468 ASSERT((_p&PAGE_MASK) != 0);
3469 ASSERT((_l&PAGE_MASK) != 0);
3470 ASSERT((_p&~PAGE_MASK) == 0);
3471 ASSERT((_l&~PAGE_MASK) == 0);
3473 if ( guard )
3474 flags &= ~_PAGE_PRESENT;
3476 map_pages_to_xen(
3477 _p, virt_to_maddr(p) >> PAGE_SHIFT, _l >> PAGE_SHIFT, flags);
3480 void memguard_guard_range(void *p, unsigned long l)
3482 __memguard_change_range(p, l, 1);
3485 void memguard_unguard_range(void *p, unsigned long l)
3487 __memguard_change_range(p, l, 0);
3490 #endif
3492 void memguard_guard_stack(void *p)
3494 BUILD_BUG_ON((DEBUG_STACK_SIZE + PAGE_SIZE) > STACK_SIZE);
3495 p = (void *)((unsigned long)p + STACK_SIZE - DEBUG_STACK_SIZE - PAGE_SIZE);
3496 memguard_guard_range(p, PAGE_SIZE);
3499 /*
3500 * Local variables:
3501 * mode: C
3502 * c-set-style: "BSD"
3503 * c-basic-offset: 4
3504 * tab-width: 4
3505 * indent-tabs-mode: nil
3506 * End:
3507 */